##// END OF EJS Templates
bumbed whoosh to 2.3.X series...
marcink -
r1995:b6c902d8 beta
parent child Browse files
Show More
@@ -5,7 +5,7 b' formencode==1.2.4'
5 SQLAlchemy==0.7.4
5 SQLAlchemy==0.7.4
6 Mako==0.5.0
6 Mako==0.5.0
7 pygments>=1.4
7 pygments>=1.4
8 whoosh<1.8
8 whoosh<2.4
9 celery>=2.2.5,<2.3
9 celery>=2.2.5,<2.3
10 babel
10 babel
11 python-dateutil>=1.5.0,<2.0.0
11 python-dateutil>=1.5.0,<2.0.0
@@ -44,7 +44,7 b' requirements = ['
44 "SQLAlchemy==0.7.4",
44 "SQLAlchemy==0.7.4",
45 "Mako==0.5.0",
45 "Mako==0.5.0",
46 "pygments>=1.4",
46 "pygments>=1.4",
47 "whoosh<1.8",
47 "whoosh<2.4",
48 "celery>=2.2.5,<2.3",
48 "celery>=2.2.5,<2.3",
49 "babel",
49 "babel",
50 "python-dateutil>=1.5.0,<2.0.0",
50 "python-dateutil>=1.5.0,<2.0.0",
@@ -26,7 +26,7 b' import logging'
26 import traceback
26 import traceback
27
27
28 from pylons.i18n.translation import _
28 from pylons.i18n.translation import _
29 from pylons import request, config, session, tmpl_context as c
29 from pylons import request, config, tmpl_context as c
30
30
31 from rhodecode.lib.auth import LoginRequired
31 from rhodecode.lib.auth import LoginRequired
32 from rhodecode.lib.base import BaseController, render
32 from rhodecode.lib.base import BaseController, render
@@ -76,7 +76,7 b' class SearchController(BaseController):'
76 cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
76 cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
77 try:
77 try:
78 query = qp.parse(unicode(cur_query))
78 query = qp.parse(unicode(cur_query))
79
79 # extract words for highlight
80 if isinstance(query, Phrase):
80 if isinstance(query, Phrase):
81 highlight_items.update(query.words)
81 highlight_items.update(query.words)
82 elif isinstance(query, Prefix):
82 elif isinstance(query, Prefix):
@@ -92,18 +92,22 b' class SearchController(BaseController):'
92 log.debug(highlight_items)
92 log.debug(highlight_items)
93 results = searcher.search(query)
93 results = searcher.search(query)
94 res_ln = len(results)
94 res_ln = len(results)
95 c.runtime = '%s results (%.3f seconds)' \
95 c.runtime = '%s results (%.3f seconds)' % (
96 % (res_ln, results.runtime)
96 res_ln, results.runtime
97 )
97
98
98 def url_generator(**kw):
99 def url_generator(**kw):
99 return update_params("?q=%s&type=%s" \
100 return update_params("?q=%s&type=%s" \
100 % (c.cur_query, c.cur_search), **kw)
101 % (c.cur_query, c.cur_search), **kw)
101
102
102 c.formated_results = Page(
103 c.formated_results = Page(
103 ResultWrapper(search_type, searcher, matcher,
104 ResultWrapper(search_type, searcher, matcher,
104 highlight_items),
105 highlight_items),
105 page=p, item_count=res_ln,
106 page=p,
106 items_per_page=10, url=url_generator)
107 item_count=res_ln,
108 items_per_page=10,
109 url=url_generator
110 )
107
111
108 except QueryParserError:
112 except QueryParserError:
109 c.runtime = _('Invalid search query. Try quoting it.')
113 c.runtime = _('Invalid search query. Try quoting it.')
@@ -117,5 +121,6 b' class SearchController(BaseController):'
117 log.error(traceback.format_exc())
121 log.error(traceback.format_exc())
118 c.runtime = _('An error occurred during this search operation')
122 c.runtime = _('An error occurred during this search operation')
119
123
124
120 # Return a rendered template
125 # Return a rendered template
121 return render('/search/search.html')
126 return render('/search/search.html')
@@ -37,38 +37,39 b' from whoosh.analysis import RegexTokeniz'
37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
37 from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
38 from whoosh.index import create_in, open_dir
38 from whoosh.index import create_in, open_dir
39 from whoosh.formats import Characters
39 from whoosh.formats import Characters
40 from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
40 from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
41
41
42 from webhelpers.html.builder import escape
42 from webhelpers.html.builder import escape
43 from sqlalchemy import engine_from_config
43 from sqlalchemy import engine_from_config
44 from vcs.utils.lazy import LazyProperty
45
44
46 from rhodecode.model import init_model
45 from rhodecode.model import init_model
47 from rhodecode.model.scm import ScmModel
46 from rhodecode.model.scm import ScmModel
48 from rhodecode.model.repo import RepoModel
47 from rhodecode.model.repo import RepoModel
49 from rhodecode.config.environment import load_environment
48 from rhodecode.config.environment import load_environment
50 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
49 from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, LazyProperty
51 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
50 from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
52
51
53 #EXTENSIONS WE WANT TO INDEX CONTENT OFF
52 # EXTENSIONS WE WANT TO INDEX CONTENT OFF
54 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
53 INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
55
54
56 #CUSTOM ANALYZER wordsplit + lowercase filter
55 # CUSTOM ANALYZER wordsplit + lowercase filter
57 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
56 ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
58
57
59
58
60 #INDEX SCHEMA DEFINITION
59 #INDEX SCHEMA DEFINITION
61 SCHEMA = Schema(owner=TEXT(),
60 SCHEMA = Schema(
62 repository=TEXT(stored=True),
61 owner=TEXT(),
63 path=TEXT(stored=True),
62 repository=TEXT(stored=True),
64 content=FieldType(format=Characters(ANALYZER),
63 path=TEXT(stored=True),
65 scorable=True, stored=True),
64 content=FieldType(format=Characters(), analyzer=ANALYZER,
66 modtime=STORED(), extension=TEXT(stored=True))
65 scorable=True, stored=True),
67
66 modtime=STORED(),
67 extension=TEXT(stored=True)
68 )
68
69
69 IDX_NAME = 'HG_INDEX'
70 IDX_NAME = 'HG_INDEX'
70 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
71 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
71 FRAGMENTER = SimpleFragmenter(200)
72 FRAGMENTER = ContextFragmenter(200)
72
73
73
74
74 class MakeIndex(BasePasterCommand):
75 class MakeIndex(BasePasterCommand):
@@ -136,7 +137,7 b' class ResultWrapper(object):'
136 self.searcher = searcher
137 self.searcher = searcher
137 self.matcher = matcher
138 self.matcher = matcher
138 self.highlight_items = highlight_items
139 self.highlight_items = highlight_items
139 self.fragment_size = 200 / 2
140 self.fragment_size = 200
140
141
141 @LazyProperty
142 @LazyProperty
142 def doc_ids(self):
143 def doc_ids(self):
@@ -172,10 +173,10 b' class ResultWrapper(object):'
172 """
173 """
173 i, j = key.start, key.stop
174 i, j = key.start, key.stop
174
175
175 slice = []
176 slices = []
176 for docid in self.doc_ids[i:j]:
177 for docid in self.doc_ids[i:j]:
177 slice.append(self.get_full_content(docid))
178 slices.append(self.get_full_content(docid))
178 return slice
179 return slices
179
180
180 def get_full_content(self, docid):
181 def get_full_content(self, docid):
181 res = self.searcher.stored_fields(docid[0])
182 res = self.searcher.stored_fields(docid[0])
@@ -183,9 +184,9 b' class ResultWrapper(object):'
183 + len(res['repository']):].lstrip('/')
184 + len(res['repository']):].lstrip('/')
184
185
185 content_short = self.get_short_content(res, docid[1])
186 content_short = self.get_short_content(res, docid[1])
186 res.update({'content_short':content_short,
187 res.update({'content_short': content_short,
187 'content_short_hl':self.highlight(content_short),
188 'content_short_hl': self.highlight(content_short),
188 'f_path':f_path})
189 'f_path': f_path})
189
190
190 return res
191 return res
191
192
@@ -217,10 +218,12 b' class ResultWrapper(object):'
217 def highlight(self, content, top=5):
218 def highlight(self, content, top=5):
218 if self.search_type != 'content':
219 if self.search_type != 'content':
219 return ''
220 return ''
220 hl = highlight(escape(content),
221 hl = highlight(
221 self.highlight_items,
222 text=escape(content),
222 analyzer=ANALYZER,
223 terms=self.highlight_items,
223 fragmenter=FRAGMENTER,
224 analyzer=ANALYZER,
224 formatter=FORMATTER,
225 fragmenter=FRAGMENTER,
225 top=top)
226 formatter=FORMATTER,
227 top=top
228 )
226 return hl
229 return hl
@@ -49,7 +49,6 b' from vcs.exceptions import ChangesetErro'
49 from whoosh.index import create_in, open_dir
49 from whoosh.index import create_in, open_dir
50
50
51
51
52
53 log = logging.getLogger('whooshIndexer')
52 log = logging.getLogger('whooshIndexer')
54 # create logger
53 # create logger
55 log.setLevel(logging.DEBUG)
54 log.setLevel(logging.DEBUG)
@@ -68,12 +67,13 b' ch.setFormatter(formatter)'
68 # add ch to logger
67 # add ch to logger
69 log.addHandler(ch)
68 log.addHandler(ch)
70
69
70
71 class WhooshIndexingDaemon(object):
71 class WhooshIndexingDaemon(object):
72 """
72 """
73 Daemon for atomic jobs
73 Daemon for atomic jobs
74 """
74 """
75
75
76 def __init__(self, indexname='HG_INDEX', index_location=None,
76 def __init__(self, indexname=IDX_NAME, index_location=None,
77 repo_location=None, sa=None, repo_list=None):
77 repo_location=None, sa=None, repo_list=None):
78 self.indexname = indexname
78 self.indexname = indexname
79
79
@@ -95,7 +95,6 b' class WhooshIndexingDaemon(object):'
95
95
96 self.repo_paths = filtered_repo_paths
96 self.repo_paths = filtered_repo_paths
97
97
98
99 self.initial = False
98 self.initial = False
100 if not os.path.isdir(self.index_location):
99 if not os.path.isdir(self.index_location):
101 os.makedirs(self.index_location)
100 os.makedirs(self.index_location)
@@ -155,7 +154,6 b' class WhooshIndexingDaemon(object):'
155 modtime=self.get_node_mtime(node),
154 modtime=self.get_node_mtime(node),
156 extension=node.extension)
155 extension=node.extension)
157
156
158
159 def build_index(self):
157 def build_index(self):
160 if os.path.exists(self.index_location):
158 if os.path.exists(self.index_location):
161 log.debug('removing previous index')
159 log.debug('removing previous index')
@@ -177,7 +175,6 b' class WhooshIndexingDaemon(object):'
177 writer.commit(merge=True)
175 writer.commit(merge=True)
178 log.debug('>>> FINISHED BUILDING INDEX <<<')
176 log.debug('>>> FINISHED BUILDING INDEX <<<')
179
177
180
181 def update_index(self):
178 def update_index(self):
182 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
179 log.debug('STARTING INCREMENTAL INDEXING UPDATE')
183
180
@@ -65,10 +65,20 b' div.codeblock .code-body table td {'
65 div.code-body {
65 div.code-body {
66 background-color: #FFFFFF;
66 background-color: #FFFFFF;
67 }
67 }
68 div.code-body pre .match{
68
69 div.codeblock .code-header .search-path {
70 padding: 0px 0px 0px 10px;
71 }
72
73 div.search-code-body {
74 background-color: #FFFFFF;
75 padding: 5px 0px 5px 10px;
76 }
77
78 div.search-code-body pre .match{
69 background-color: #FAFFA6;
79 background-color: #FAFFA6;
70 }
80 }
71 div.code-body pre .break{
81 div.search-code-body pre .break{
72 background-color: #DDE7EF;
82 background-color: #DDE7EF;
73 width: 100%;
83 width: 100%;
74 color: #747474;
84 color: #747474;
@@ -5,10 +5,11 b''
5 <div class="table">
5 <div class="table">
6 <div id="body${cnt}" class="codeblock">
6 <div id="body${cnt}" class="codeblock">
7 <div class="code-header">
7 <div class="code-header">
8 <div class="revision">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
8 <div class="search-path">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
9 h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div>
9 h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}
10 </div>
10 </div>
11 </div>
11 <div class="code-body">
12 <div class="search-code-body">
12 <pre>${h.literal(sr['content_short_hl'])}</pre>
13 <pre>${h.literal(sr['content_short_hl'])}</pre>
13 </div>
14 </div>
14 </div>
15 </div>
General Comments 0
You need to be logged in to leave comments. Login now