Show More
@@ -26,10 +26,9 b' from pylons import request, response, se' | |||||
26 | from pylons.controllers.util import abort, redirect |
|
26 | from pylons.controllers.util import abort, redirect | |
27 | from pylons_app.lib.auth import LoginRequired |
|
27 | from pylons_app.lib.auth import LoginRequired | |
28 | from pylons_app.lib.base import BaseController, render |
|
28 | from pylons_app.lib.base import BaseController, render | |
29 |
from pylons_app.lib.indexers import |
|
29 | from pylons_app.lib.indexers import IDX_LOCATION, SCHEMA, IDX_NAME, ResultWrapper | |
30 |
from webhelpers. |
|
30 | from webhelpers.paginate import Page | |
31 | from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter, \ |
|
31 | from webhelpers.util import update_params | |
32 | ContextFragmenter |
|
|||
33 | from pylons.i18n.translation import _ |
|
32 | from pylons.i18n.translation import _ | |
34 | from whoosh.index import open_dir, EmptyIndexError |
|
33 | from whoosh.index import open_dir, EmptyIndexError | |
35 | from whoosh.qparser import QueryParser, QueryParserError |
|
34 | from whoosh.qparser import QueryParser, QueryParserError | |
@@ -45,69 +44,55 b' class SearchController(BaseController):' | |||||
45 | def __before__(self): |
|
44 | def __before__(self): | |
46 | super(SearchController, self).__before__() |
|
45 | super(SearchController, self).__before__() | |
47 |
|
46 | |||
48 |
|
||||
49 | def index(self): |
|
47 | def index(self): | |
50 | c.formated_results = [] |
|
48 | c.formated_results = [] | |
51 | c.runtime = '' |
|
49 | c.runtime = '' | |
52 | search_items = set() |
|
|||
53 | c.cur_query = request.GET.get('q', None) |
|
50 | c.cur_query = request.GET.get('q', None) | |
54 | if c.cur_query: |
|
51 | if c.cur_query: | |
55 | cur_query = c.cur_query.lower() |
|
52 | cur_query = c.cur_query.lower() | |
56 |
|
53 | |||
57 |
|
||||
58 | if c.cur_query: |
|
54 | if c.cur_query: | |
|
55 | p = int(request.params.get('page', 1)) | |||
|
56 | highlight_items = set() | |||
59 | try: |
|
57 | try: | |
60 | idx = open_dir(IDX_LOCATION, indexname=IDX_NAME) |
|
58 | idx = open_dir(IDX_LOCATION, indexname=IDX_NAME) | |
61 | searcher = idx.searcher() |
|
59 | searcher = idx.searcher() | |
62 |
|
60 | |||
63 | qp = QueryParser("content", schema=SCHEMA) |
|
61 | qp = QueryParser("content", schema=SCHEMA) | |
64 | try: |
|
62 | try: | |
65 | query = qp.parse(unicode(cur_query)) |
|
63 | query = qp.parse(unicode(cur_query)) | |
66 |
|
64 | |||
67 | if isinstance(query, Phrase): |
|
65 | if isinstance(query, Phrase): | |
68 |
|
|
66 | highlight_items.update(query.words) | |
69 | else: |
|
67 | else: | |
70 | for i in query.all_terms(): |
|
68 | for i in query.all_terms(): | |
71 |
|
|
69 | if i[0] == 'content': | |
72 |
|
70 | highlight_items.add(i[1]) | ||
73 | log.debug(query) |
|
|||
74 | log.debug(search_items) |
|
|||
75 | results = searcher.search(query) |
|
|||
76 | c.runtime = '%s results (%.3f seconds)' \ |
|
|||
77 | % (len(results), results.runtime) |
|
|||
78 |
|
71 | |||
79 | analyzer = ANALYZER |
|
72 | matcher = query.matcher(searcher) | |
80 | formatter = HtmlFormatter('span', |
|
|||
81 | between='\n<span class="break">...</span>\n') |
|
|||
82 |
|
||||
83 | #how the parts are splitted within the same text part |
|
|||
84 | fragmenter = SimpleFragmenter(200) |
|
|||
85 | #fragmenter = ContextFragmenter(search_items) |
|
|||
86 |
|
73 | |||
87 | for res in results: |
|
74 | log.debug(query) | |
88 | d = {} |
|
75 | log.debug(highlight_items) | |
89 | d.update(res) |
|
76 | results = searcher.search(query) | |
90 | hl = highlight(escape(res['content']), search_items, |
|
77 | res_ln = len(results) | |
91 | analyzer=analyzer, |
|
78 | c.runtime = '%s results (%.3f seconds)' \ | |
92 | fragmenter=fragmenter, |
|
79 | % (res_ln, results.runtime) | |
93 | formatter=formatter, |
|
80 | ||
94 | top=5) |
|
81 | def url_generator(**kw): | |
95 | f_path = res['path'][res['path'].find(res['repository']) \ |
|
82 | return update_params("?q=%s" % c.cur_query, **kw) | |
96 | + len(res['repository']):].lstrip('/') |
|
83 | ||
97 | d.update({'content_short':hl, |
|
84 | c.formated_results = Page( | |
98 | 'f_path':f_path}) |
|
85 | ResultWrapper(searcher, matcher, highlight_items), | |
99 |
|
|
86 | page=p, item_count=res_ln, | |
100 | c.formated_results.append(d) |
|
87 | items_per_page=10, url=url_generator) | |
101 |
|
|
88 | ||
102 | except QueryParserError: |
|
89 | except QueryParserError: | |
103 | c.runtime = _('Invalid search query. Try quoting it.') |
|
90 | c.runtime = _('Invalid search query. Try quoting it.') | |
104 |
|
91 | searcher.close() | ||
105 | except (EmptyIndexError, IOError): |
|
92 | except (EmptyIndexError, IOError): | |
106 | log.error(traceback.format_exc()) |
|
93 | log.error(traceback.format_exc()) | |
107 | log.error('Empty Index data') |
|
94 | log.error('Empty Index data') | |
108 | c.runtime = _('There is no index to search in. Please run whoosh indexer') |
|
95 | c.runtime = _('There is no index to search in. Please run whoosh indexer') | |
109 |
|
96 | |||
110 |
|
||||
111 |
|
||||
112 | # Return a rendered template |
|
97 | # Return a rendered template | |
113 | return render('/search/search.html') |
|
98 | return render('/search/search.html') |
@@ -1,41 +1,140 b'' | |||||
1 | import sys |
|
1 | from os.path import dirname as dn, join as jn | |
|
2 | from pidlock import LockHeld, DaemonLock | |||
|
3 | from pylons_app.config.environment import load_environment | |||
|
4 | from pylons_app.model.hg_model import HgModel | |||
|
5 | from shutil import rmtree | |||
|
6 | from webhelpers.html.builder import escape | |||
|
7 | from vcs.utils.lazy import LazyProperty | |||
|
8 | ||||
|
9 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | |||
|
10 | from whoosh.fields import TEXT, ID, STORED, Schema, FieldType | |||
|
11 | from whoosh.index import create_in, open_dir | |||
|
12 | from whoosh.formats import Characters | |||
|
13 | from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter | |||
|
14 | ||||
2 | import os |
|
15 | import os | |
3 | from pidlock import LockHeld, DaemonLock |
|
16 | import sys | |
4 | import traceback |
|
17 | import traceback | |
5 |
|
18 | |||
6 | from os.path import dirname as dn |
|
19 | ||
7 | from os.path import join as jn |
|
|||
8 |
|
20 | |||
9 | #to get the pylons_app import |
|
21 | #to get the pylons_app import | |
10 | sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) |
|
22 | sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) | |
11 |
|
23 | |||
12 | from pylons_app.config.environment import load_environment |
|
|||
13 | from pylons_app.model.hg_model import HgModel |
|
|||
14 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
|
|||
15 | from whoosh.fields import TEXT, ID, STORED, Schema |
|
|||
16 | from whoosh.index import create_in, open_dir |
|
|||
17 | from shutil import rmtree |
|
|||
18 |
|
24 | |||
19 | #LOCATION WE KEEP THE INDEX |
|
25 | #LOCATION WE KEEP THE INDEX | |
20 | IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index') |
|
26 | IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index') | |
21 |
|
27 | |||
22 | #EXTENSIONS WE WANT TO INDEX CONTENT OFF |
|
28 | #EXTENSIONS WE WANT TO INDEX CONTENT OFF | |
23 |
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c', |
|
29 | INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c', | |
24 |
'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', |
|
30 | 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', | |
25 |
'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp', |
|
31 | 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp', | |
26 |
'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3', |
|
32 | 'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3', | |
27 |
'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql', |
|
33 | 'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql', | |
28 |
'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml','xsl','xslt', |
|
34 | 'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt', | |
29 | 'yaws'] |
|
35 | 'yaws'] | |
30 |
|
36 | |||
31 | #CUSTOM ANALYZER wordsplit + lowercase filter |
|
37 | #CUSTOM ANALYZER wordsplit + lowercase filter | |
32 | ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
|
38 | ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() | |
33 |
|
39 | |||
|
40 | ||||
34 | #INDEX SCHEMA DEFINITION |
|
41 | #INDEX SCHEMA DEFINITION | |
35 | SCHEMA = Schema(owner=TEXT(), |
|
42 | SCHEMA = Schema(owner=TEXT(), | |
36 | repository=TEXT(stored=True), |
|
43 | repository=TEXT(stored=True), | |
37 | path=ID(stored=True, unique=True), |
|
44 | path=ID(stored=True, unique=True), | |
38 |
content= |
|
45 | content=FieldType(format=Characters(ANALYZER), | |
39 | modtime=STORED(),extension=TEXT(stored=True)) |
|
46 | scorable=True, stored=True), | |
|
47 | modtime=STORED(), extension=TEXT(stored=True)) | |||
|
48 | ||||
|
49 | ||||
|
50 | IDX_NAME = 'HG_INDEX' | |||
|
51 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') | |||
|
52 | FRAGMENTER = SimpleFragmenter(200) | |||
|
53 | ||||
|
54 | ||||
|
55 | ||||
|
56 | ||||
|
57 | class ResultWrapper(object): | |||
|
58 | def __init__(self, searcher, matcher, highlight_items): | |||
|
59 | self.searcher = searcher | |||
|
60 | self.matcher = matcher | |||
|
61 | self.highlight_items = highlight_items | |||
|
62 | self.fragment_size = 150 * 2 | |||
|
63 | ||||
|
64 | @LazyProperty | |||
|
65 | def doc_ids(self): | |||
|
66 | docs_id = [] | |||
|
67 | while self.matcher.is_active(): | |||
|
68 | docnum = self.matcher.id() | |||
|
69 | docs_id.append(docnum) | |||
|
70 | self.matcher.next() | |||
|
71 | return docs_id | |||
|
72 | ||||
|
73 | def __str__(self): | |||
|
74 | return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids)) | |||
|
75 | ||||
|
76 | def __repr__(self): | |||
|
77 | return self.__str__() | |||
|
78 | ||||
|
79 | def __len__(self): | |||
|
80 | return len(self.doc_ids) | |||
|
81 | ||||
|
82 | def __iter__(self): | |||
|
83 | """ | |||
|
84 | Allows Iteration over results,and lazy generate content | |||
|
85 | ||||
|
86 | *Requires* implementation of ``__getitem__`` method. | |||
|
87 | """ | |||
|
88 | for docid in self.doc_ids: | |||
|
89 | yield self.get_full_content(docid) | |||
40 |
|
90 | |||
41 | IDX_NAME = 'HG_INDEX' No newline at end of file |
|
91 | def __getslice__(self, i, j): | |
|
92 | """ | |||
|
93 | Slicing of resultWrapper | |||
|
94 | """ | |||
|
95 | slice = [] | |||
|
96 | for docid in self.doc_ids[i:j]: | |||
|
97 | slice.append(self.get_full_content(docid)) | |||
|
98 | return slice | |||
|
99 | ||||
|
100 | ||||
|
101 | def get_full_content(self, docid): | |||
|
102 | res = self.searcher.stored_fields(docid) | |||
|
103 | f_path = res['path'][res['path'].find(res['repository']) \ | |||
|
104 | + len(res['repository']):].lstrip('/') | |||
|
105 | ||||
|
106 | content_short = ''.join(self.get_short_content(res)) | |||
|
107 | res.update({'content_short':content_short, | |||
|
108 | 'content_short_hl':self.highlight(content_short), | |||
|
109 | 'f_path':f_path}) | |||
|
110 | ||||
|
111 | return res | |||
|
112 | ||||
|
113 | def get_short_content(self, res): | |||
|
114 | """ | |||
|
115 | Smart function that implements chunking the content | |||
|
116 | but not overlap chunks so it doesn't highlight the same | |||
|
117 | close occurences twice. | |||
|
118 | @param matcher: | |||
|
119 | @param size: | |||
|
120 | """ | |||
|
121 | memory = [(0, 0)] | |||
|
122 | for span in self.matcher.spans(): | |||
|
123 | start = span.startchar or 0 | |||
|
124 | end = span.endchar or 0 | |||
|
125 | start_offseted = max(0, start - self.fragment_size) | |||
|
126 | end_offseted = end + self.fragment_size | |||
|
127 | print start_offseted, end_offseted | |||
|
128 | if start_offseted < memory[-1][1]: | |||
|
129 | start_offseted = memory[-1][1] | |||
|
130 | memory.append((start_offseted, end_offseted,)) | |||
|
131 | yield res["content"][start_offseted:end_offseted] | |||
|
132 | ||||
|
133 | def highlight(self, content, top=5): | |||
|
134 | hl = highlight(escape(content), | |||
|
135 | self.highlight_items, | |||
|
136 | analyzer=ANALYZER, | |||
|
137 | fragmenter=FRAGMENTER, | |||
|
138 | formatter=FORMATTER, | |||
|
139 | top=top) | |||
|
140 | return hl |
@@ -46,7 +46,7 b'' | |||||
46 | h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div> |
|
46 | h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div> | |
47 | </div> |
|
47 | </div> | |
48 | <div class="code-body"> |
|
48 | <div class="code-body"> | |
49 | <pre>${h.literal(sr['content_short'])}</pre> |
|
49 | <pre>${h.literal(sr['content_short_hl'])}</pre> | |
50 | </div> |
|
50 | </div> | |
51 | </div> |
|
51 | </div> | |
52 | </div> |
|
52 | </div> | |
@@ -59,11 +59,13 b'' | |||||
59 | </div> |
|
59 | </div> | |
60 | %endif |
|
60 | %endif | |
61 |
|
61 | |||
62 | %endif |
|
62 | %endif | |
63 | %endfor |
|
63 | %endfor | |
64 |
|
64 | %if c.cur_query: | ||
65 |
|
65 | <div class="pagination-wh pagination-left"> | ||
66 |
|
66 | ${c.formated_results.pager('$link_previous ~2~ $link_next')} | ||
|
67 | </div> | |||
|
68 | %endif | |||
67 | </div> |
|
69 | </div> | |
68 |
|
70 | |||
69 | </%def> |
|
71 | </%def> |
@@ -7,7 +7,7 b' except ImportError:' | |||||
7 | from setuptools import setup, find_packages |
|
7 | from setuptools import setup, find_packages | |
8 |
|
8 | |||
9 | setup( |
|
9 | setup( | |
10 | name='HgApp-%s'%get_version(), |
|
10 | name='HgApp-%s' % get_version(), | |
11 | version=get_version(), |
|
11 | version=get_version(), | |
12 | description='Mercurial repository serving and browsing app', |
|
12 | description='Mercurial repository serving and browsing app', | |
13 | keywords='mercurial web hgwebdir replacement serving hgweb', |
|
13 | keywords='mercurial web hgwebdir replacement serving hgweb', | |
@@ -20,11 +20,11 b' setup(' | |||||
20 | "SQLAlchemy>=0.6", |
|
20 | "SQLAlchemy>=0.6", | |
21 | "babel", |
|
21 | "babel", | |
22 | "Mako>=0.3.2", |
|
22 | "Mako>=0.3.2", | |
23 |
"vcs>=0.1. |
|
23 | "vcs>=0.1.5", | |
24 | "pygments>=1.3.0", |
|
24 | "pygments>=1.3.0", | |
25 | "mercurial>=1.6", |
|
25 | "mercurial>=1.6", | |
26 | "pysqlite", |
|
26 | "pysqlite", | |
27 |
"whoosh==1.0.0b1 |
|
27 | "whoosh==1.0.0b16", | |
28 | "py-bcrypt", |
|
28 | "py-bcrypt", | |
29 | "celery", |
|
29 | "celery", | |
30 | ], |
|
30 | ], |
General Comments 0
You need to be logged in to leave comments.
Login now