Show More
@@ -1,113 +1,98 b'' | |||
|
1 | 1 | #!/usr/bin/env python |
|
2 | 2 | # encoding: utf-8 |
|
3 | 3 | # search controller for pylons |
|
4 | 4 | # Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com> |
|
5 | 5 | # |
|
6 | 6 | # This program is free software; you can redistribute it and/or |
|
7 | 7 | # modify it under the terms of the GNU General Public License |
|
8 | 8 | # as published by the Free Software Foundation; version 2 |
|
9 | 9 | # of the License or (at your opinion) any later version of the license. |
|
10 | 10 | # |
|
11 | 11 | # This program is distributed in the hope that it will be useful, |
|
12 | 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 | 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 | 14 | # GNU General Public License for more details. |
|
15 | 15 | # |
|
16 | 16 | # You should have received a copy of the GNU General Public License |
|
17 | 17 | # along with this program; if not, write to the Free Software |
|
18 | 18 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
|
19 | 19 | # MA 02110-1301, USA. |
|
20 | 20 | """ |
|
21 | 21 | Created on Aug 7, 2010 |
|
22 | 22 | search controller for pylons |
|
23 | 23 | @author: marcink |
|
24 | 24 | """ |
|
25 | 25 | from pylons import request, response, session, tmpl_context as c, url |
|
26 | 26 | from pylons.controllers.util import abort, redirect |
|
27 | 27 | from pylons_app.lib.auth import LoginRequired |
|
28 | 28 | from pylons_app.lib.base import BaseController, render |
|
29 |
from pylons_app.lib.indexers import |
|
|
30 |
from webhelpers. |
|
|
31 | from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter, \ | |
|
32 | ContextFragmenter | |
|
29 | from pylons_app.lib.indexers import IDX_LOCATION, SCHEMA, IDX_NAME, ResultWrapper | |
|
30 | from webhelpers.paginate import Page | |
|
31 | from webhelpers.util import update_params | |
|
33 | 32 | from pylons.i18n.translation import _ |
|
34 | 33 | from whoosh.index import open_dir, EmptyIndexError |
|
35 | 34 | from whoosh.qparser import QueryParser, QueryParserError |
|
36 | 35 | from whoosh.query import Phrase |
|
37 | 36 | import logging |
|
38 | 37 | import traceback |
|
39 | 38 | |
|
40 | 39 | log = logging.getLogger(__name__) |
|
41 | 40 | |
|
42 | 41 | class SearchController(BaseController): |
|
43 | 42 | |
|
44 | 43 | @LoginRequired() |
|
45 | 44 | def __before__(self): |
|
46 | 45 | super(SearchController, self).__before__() |
|
47 | 46 | |
|
48 | ||
|
49 | 47 | def index(self): |
|
50 | 48 | c.formated_results = [] |
|
51 | 49 | c.runtime = '' |
|
52 | search_items = set() | |
|
53 | 50 | c.cur_query = request.GET.get('q', None) |
|
54 | 51 | if c.cur_query: |
|
55 | 52 | cur_query = c.cur_query.lower() |
|
56 | 53 | |
|
57 | ||
|
58 | 54 | if c.cur_query: |
|
55 | p = int(request.params.get('page', 1)) | |
|
56 | highlight_items = set() | |
|
59 | 57 | try: |
|
60 | 58 | idx = open_dir(IDX_LOCATION, indexname=IDX_NAME) |
|
61 | 59 | searcher = idx.searcher() |
|
62 | 60 | |
|
63 | 61 | qp = QueryParser("content", schema=SCHEMA) |
|
64 | 62 | try: |
|
65 | 63 | query = qp.parse(unicode(cur_query)) |
|
66 | 64 | |
|
67 | 65 | if isinstance(query, Phrase): |
|
68 |
|
|
|
66 | highlight_items.update(query.words) | |
|
69 | 67 | else: |
|
70 | 68 | for i in query.all_terms(): |
|
71 |
|
|
|
69 | if i[0] == 'content': | |
|
70 | highlight_items.add(i[1]) | |
|
71 | ||
|
72 | matcher = query.matcher(searcher) | |
|
72 | 73 |
|
|
73 | 74 | log.debug(query) |
|
74 |
log.debug( |
|
|
75 | log.debug(highlight_items) | |
|
75 | 76 | results = searcher.search(query) |
|
77 | res_ln = len(results) | |
|
76 | 78 | c.runtime = '%s results (%.3f seconds)' \ |
|
77 |
% ( |
|
|
78 | ||
|
79 | analyzer = ANALYZER | |
|
80 | formatter = HtmlFormatter('span', | |
|
81 | between='\n<span class="break">...</span>\n') | |
|
82 | ||
|
83 | #how the parts are splitted within the same text part | |
|
84 | fragmenter = SimpleFragmenter(200) | |
|
85 | #fragmenter = ContextFragmenter(search_items) | |
|
79 | % (res_ln, results.runtime) | |
|
86 | 80 | |
|
87 | for res in results: | |
|
88 | d = {} | |
|
89 | d.update(res) | |
|
90 | hl = highlight(escape(res['content']), search_items, | |
|
91 | analyzer=analyzer, | |
|
92 | fragmenter=fragmenter, | |
|
93 | formatter=formatter, | |
|
94 | top=5) | |
|
95 | f_path = res['path'][res['path'].find(res['repository']) \ | |
|
96 | + len(res['repository']):].lstrip('/') | |
|
97 | d.update({'content_short':hl, | |
|
98 | 'f_path':f_path}) | |
|
99 | #del d['content'] | |
|
100 | c.formated_results.append(d) | |
|
81 | def url_generator(**kw): | |
|
82 | return update_params("?q=%s" % c.cur_query, **kw) | |
|
83 | ||
|
84 | c.formated_results = Page( | |
|
85 | ResultWrapper(searcher, matcher, highlight_items), | |
|
86 | page=p, item_count=res_ln, | |
|
87 | items_per_page=10, url=url_generator) | |
|
101 | 88 |
|
|
102 | 89 | except QueryParserError: |
|
103 | 90 | c.runtime = _('Invalid search query. Try quoting it.') |
|
104 | ||
|
91 | searcher.close() | |
|
105 | 92 | except (EmptyIndexError, IOError): |
|
106 | 93 | log.error(traceback.format_exc()) |
|
107 | 94 | log.error('Empty Index data') |
|
108 | 95 | c.runtime = _('There is no index to search in. Please run whoosh indexer') |
|
109 | 96 | |
|
110 | ||
|
111 | ||
|
112 | 97 | # Return a rendered template |
|
113 | 98 | return render('/search/search.html') |
@@ -1,41 +1,140 b'' | |||
|
1 | import sys | |
|
1 | from os.path import dirname as dn, join as jn | |
|
2 | from pidlock import LockHeld, DaemonLock | |
|
3 | from pylons_app.config.environment import load_environment | |
|
4 | from pylons_app.model.hg_model import HgModel | |
|
5 | from shutil import rmtree | |
|
6 | from webhelpers.html.builder import escape | |
|
7 | from vcs.utils.lazy import LazyProperty | |
|
8 | ||
|
9 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | |
|
10 | from whoosh.fields import TEXT, ID, STORED, Schema, FieldType | |
|
11 | from whoosh.index import create_in, open_dir | |
|
12 | from whoosh.formats import Characters | |
|
13 | from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter | |
|
14 | ||
|
2 | 15 | import os |
|
3 | from pidlock import LockHeld, DaemonLock | |
|
16 | import sys | |
|
4 | 17 | import traceback |
|
5 | 18 | |
|
6 | from os.path import dirname as dn | |
|
7 | from os.path import join as jn | |
|
19 | ||
|
8 | 20 | |
|
9 | 21 | #to get the pylons_app import |
|
10 | 22 | sys.path.append(dn(dn(dn(os.path.realpath(__file__))))) |
|
11 | 23 | |
|
12 | from pylons_app.config.environment import load_environment | |
|
13 | from pylons_app.model.hg_model import HgModel | |
|
14 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | |
|
15 | from whoosh.fields import TEXT, ID, STORED, Schema | |
|
16 | from whoosh.index import create_in, open_dir | |
|
17 | from shutil import rmtree | |
|
18 | 24 | |
|
19 | 25 | #LOCATION WE KEEP THE INDEX |
|
20 | 26 | IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index') |
|
21 | 27 | |
|
22 | 28 | #EXTENSIONS WE WANT TO INDEX CONTENT OFF |
|
23 | 29 |
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c', |
|
24 |
'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', |
|
|
25 |
'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp', |
|
|
30 | 'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl', | |
|
31 | 'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp', | |
|
26 | 32 |
'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3', |
|
27 | 33 |
'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql', |
|
28 | 34 |
'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml','xsl','xslt', |
|
29 | 35 | 'yaws'] |
|
30 | 36 | |
|
31 | 37 | #CUSTOM ANALYZER wordsplit + lowercase filter |
|
32 | 38 | ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
|
33 | 39 | |
|
40 | ||
|
34 | 41 | #INDEX SCHEMA DEFINITION |
|
35 | 42 | SCHEMA = Schema(owner=TEXT(), |
|
36 | 43 | repository=TEXT(stored=True), |
|
37 | 44 | path=ID(stored=True, unique=True), |
|
38 |
content= |
|
|
45 | content=FieldType(format=Characters(ANALYZER), | |
|
46 | scorable=True, stored=True), | |
|
39 | 47 | modtime=STORED(),extension=TEXT(stored=True)) |
|
40 | 48 | |
|
41 | IDX_NAME = 'HG_INDEX' No newline at end of file | |
|
49 | ||
|
50 | IDX_NAME = 'HG_INDEX' | |
|
51 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') | |
|
52 | FRAGMENTER = SimpleFragmenter(200) | |
|
53 | ||
|
54 | ||
|
55 | ||
|
56 | ||
|
57 | class ResultWrapper(object): | |
|
58 | def __init__(self, searcher, matcher, highlight_items): | |
|
59 | self.searcher = searcher | |
|
60 | self.matcher = matcher | |
|
61 | self.highlight_items = highlight_items | |
|
62 | self.fragment_size = 150 * 2 | |
|
63 | ||
|
64 | @LazyProperty | |
|
65 | def doc_ids(self): | |
|
66 | docs_id = [] | |
|
67 | while self.matcher.is_active(): | |
|
68 | docnum = self.matcher.id() | |
|
69 | docs_id.append(docnum) | |
|
70 | self.matcher.next() | |
|
71 | return docs_id | |
|
72 | ||
|
73 | def __str__(self): | |
|
74 | return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids)) | |
|
75 | ||
|
76 | def __repr__(self): | |
|
77 | return self.__str__() | |
|
78 | ||
|
79 | def __len__(self): | |
|
80 | return len(self.doc_ids) | |
|
81 | ||
|
82 | def __iter__(self): | |
|
83 | """ | |
|
84 | Allows Iteration over results,and lazy generate content | |
|
85 | ||
|
86 | *Requires* implementation of ``__getitem__`` method. | |
|
87 | """ | |
|
88 | for docid in self.doc_ids: | |
|
89 | yield self.get_full_content(docid) | |
|
90 | ||
|
91 | def __getslice__(self, i, j): | |
|
92 | """ | |
|
93 | Slicing of resultWrapper | |
|
94 | """ | |
|
95 | slice = [] | |
|
96 | for docid in self.doc_ids[i:j]: | |
|
97 | slice.append(self.get_full_content(docid)) | |
|
98 | return slice | |
|
99 | ||
|
100 | ||
|
101 | def get_full_content(self, docid): | |
|
102 | res = self.searcher.stored_fields(docid) | |
|
103 | f_path = res['path'][res['path'].find(res['repository']) \ | |
|
104 | + len(res['repository']):].lstrip('/') | |
|
105 | ||
|
106 | content_short = ''.join(self.get_short_content(res)) | |
|
107 | res.update({'content_short':content_short, | |
|
108 | 'content_short_hl':self.highlight(content_short), | |
|
109 | 'f_path':f_path}) | |
|
110 | ||
|
111 | return res | |
|
112 | ||
|
113 | def get_short_content(self, res): | |
|
114 | """ | |
|
115 | Smart function that implements chunking the content | |
|
116 | but not overlap chunks so it doesn't highlight the same | |
|
117 | close occurences twice. | |
|
118 | @param matcher: | |
|
119 | @param size: | |
|
120 | """ | |
|
121 | memory = [(0, 0)] | |
|
122 | for span in self.matcher.spans(): | |
|
123 | start = span.startchar or 0 | |
|
124 | end = span.endchar or 0 | |
|
125 | start_offseted = max(0, start - self.fragment_size) | |
|
126 | end_offseted = end + self.fragment_size | |
|
127 | print start_offseted, end_offseted | |
|
128 | if start_offseted < memory[-1][1]: | |
|
129 | start_offseted = memory[-1][1] | |
|
130 | memory.append((start_offseted, end_offseted,)) | |
|
131 | yield res["content"][start_offseted:end_offseted] | |
|
132 | ||
|
133 | def highlight(self, content, top=5): | |
|
134 | hl = highlight(escape(content), | |
|
135 | self.highlight_items, | |
|
136 | analyzer=ANALYZER, | |
|
137 | fragmenter=FRAGMENTER, | |
|
138 | formatter=FORMATTER, | |
|
139 | top=top) | |
|
140 | return hl |
@@ -1,69 +1,71 b'' | |||
|
1 | 1 | ## -*- coding: utf-8 -*- |
|
2 | 2 | <%inherit file="/base/base.html"/> |
|
3 | 3 | <%def name="title()"> |
|
4 | 4 | ${_('Search')}: ${c.cur_query} |
|
5 | 5 | </%def> |
|
6 | 6 | <%def name="breadcrumbs()"> |
|
7 | 7 | ${c.hg_app_name} |
|
8 | 8 | </%def> |
|
9 | 9 | <%def name="page_nav()"> |
|
10 | 10 | ${self.menu('home')} |
|
11 | 11 | </%def> |
|
12 | 12 | <%def name="main()"> |
|
13 | 13 | |
|
14 | 14 | <div class="box"> |
|
15 | 15 | <!-- box / title --> |
|
16 | 16 | <div class="title"> |
|
17 | 17 | <h5>${_('Search')}</h5> |
|
18 | 18 | </div> |
|
19 | 19 | <!-- end box / title --> |
|
20 | 20 | ${h.form('search',method='get')} |
|
21 | 21 | <div class="form"> |
|
22 | 22 | <div class="fields"> |
|
23 | 23 | |
|
24 | 24 | <div class="field "> |
|
25 | 25 | <div class="label"> |
|
26 | 26 | <label for="q">${_('Search:')}</label> |
|
27 | 27 | </div> |
|
28 | 28 | <div class="input"> |
|
29 | 29 | ${h.text('q',c.cur_query,class_="small")} |
|
30 | 30 | <div class="button highlight"> |
|
31 | 31 | <input type="submit" value="${_('Search')}" class="ui-button ui-widget ui-state-default ui-corner-all"/> |
|
32 | 32 | </div> |
|
33 | 33 | <div style="font-weight: bold;clear:both;padding: 5px">${c.runtime}</div> |
|
34 | 34 | </div> |
|
35 | 35 | </div> |
|
36 | 36 | </div> |
|
37 | 37 | </div> |
|
38 | 38 | ${h.end_form()} |
|
39 | 39 | |
|
40 | 40 | %for cnt,sr in enumerate(c.formated_results): |
|
41 | 41 | %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'): |
|
42 | 42 | <div class="table"> |
|
43 | 43 | <div id="body${cnt}" class="codeblock"> |
|
44 | 44 | <div class="code-header"> |
|
45 | 45 | <div class="revision">${h.link_to(h.literal('%s » %s' % (sr['repository'],sr['f_path'])), |
|
46 | 46 | h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div> |
|
47 | 47 | </div> |
|
48 | 48 | <div class="code-body"> |
|
49 | <pre>${h.literal(sr['content_short'])}</pre> | |
|
49 | <pre>${h.literal(sr['content_short_hl'])}</pre> | |
|
50 | 50 | </div> |
|
51 | 51 | </div> |
|
52 | 52 | </div> |
|
53 | 53 | %else: |
|
54 | 54 | %if cnt == 0: |
|
55 | 55 | <div class="table"> |
|
56 | 56 | <div id="body${cnt}" class="codeblock"> |
|
57 | 57 | <div class="error">${_('Permission denied')}</div> |
|
58 | 58 | </div> |
|
59 | 59 | </div> |
|
60 | 60 | %endif |
|
61 | 61 | |
|
62 | 62 | %endif |
|
63 | 63 | %endfor |
|
64 | ||
|
65 | ||
|
66 | ||
|
64 | %if c.cur_query: | |
|
65 | <div class="pagination-wh pagination-left"> | |
|
66 | ${c.formated_results.pager('$link_previous ~2~ $link_next')} | |
|
67 | </div> | |
|
68 | %endif | |
|
67 | 69 | </div> |
|
68 | 70 | |
|
69 | 71 | </%def> |
@@ -1,49 +1,49 b'' | |||
|
1 | 1 | from pylons_app import get_version |
|
2 | 2 | try: |
|
3 | 3 | from setuptools import setup, find_packages |
|
4 | 4 | except ImportError: |
|
5 | 5 | from ez_setup import use_setuptools |
|
6 | 6 | use_setuptools() |
|
7 | 7 | from setuptools import setup, find_packages |
|
8 | 8 | |
|
9 | 9 | setup( |
|
10 | 10 | name='HgApp-%s'%get_version(), |
|
11 | 11 | version=get_version(), |
|
12 | 12 | description='Mercurial repository serving and browsing app', |
|
13 | 13 | keywords='mercurial web hgwebdir replacement serving hgweb', |
|
14 | 14 | license='BSD', |
|
15 | 15 | author='marcin kuzminski', |
|
16 | 16 | author_email='marcin@python-works.com', |
|
17 | 17 | url='http://hg.python-works.com', |
|
18 | 18 | install_requires=[ |
|
19 | 19 | "Pylons>=1.0.0", |
|
20 | 20 | "SQLAlchemy>=0.6", |
|
21 | 21 | "babel", |
|
22 | 22 | "Mako>=0.3.2", |
|
23 |
"vcs>=0.1. |
|
|
23 | "vcs>=0.1.5", | |
|
24 | 24 | "pygments>=1.3.0", |
|
25 | 25 | "mercurial>=1.6", |
|
26 | 26 | "pysqlite", |
|
27 |
"whoosh==1.0.0b1 |
|
|
27 | "whoosh==1.0.0b16", | |
|
28 | 28 | "py-bcrypt", |
|
29 | 29 | "celery", |
|
30 | 30 | ], |
|
31 | 31 | setup_requires=["PasteScript>=1.6.3"], |
|
32 | 32 | packages=find_packages(exclude=['ez_setup']), |
|
33 | 33 | include_package_data=True, |
|
34 | 34 | test_suite='nose.collector', |
|
35 | 35 | package_data={'pylons_app': ['i18n/*/LC_MESSAGES/*.mo']}, |
|
36 | 36 | message_extractors={'pylons_app': [ |
|
37 | 37 | ('**.py', 'python', None), |
|
38 | 38 | ('templates/**.mako', 'mako', {'input_encoding': 'utf-8'}), |
|
39 | 39 | ('public/**', 'ignore', None)]}, |
|
40 | 40 | zip_safe=False, |
|
41 | 41 | paster_plugins=['PasteScript', 'Pylons'], |
|
42 | 42 | entry_points=""" |
|
43 | 43 | [paste.app_factory] |
|
44 | 44 | main = pylons_app.config.middleware:make_app |
|
45 | 45 | |
|
46 | 46 | [paste.app_install] |
|
47 | 47 | main = pylons.util:PylonsInstaller |
|
48 | 48 | """, |
|
49 | 49 | ) |
General Comments 0
You need to be logged in to leave comments.
Login now