Show More
@@ -30,7 +30,7 b' from pylons import request, config, tmpl' | |||||
30 |
|
30 | |||
31 | from rhodecode.lib.auth import LoginRequired |
|
31 | from rhodecode.lib.auth import LoginRequired | |
32 | from rhodecode.lib.base import BaseController, render |
|
32 | from rhodecode.lib.base import BaseController, render | |
33 | from rhodecode.lib.indexers import SCHEMA, IDX_NAME, WhooshResultWrapper |
|
33 | from rhodecode.lib.indexers import CHGSETS_SCHEMA, SCHEMA, CHGSET_IDX_NAME, IDX_NAME, WhooshResultWrapper | |
34 |
|
34 | |||
35 | from webhelpers.paginate import Page |
|
35 | from webhelpers.paginate import Page | |
36 | from webhelpers.util import update_params |
|
36 | from webhelpers.util import update_params | |
@@ -54,25 +54,41 b' class SearchController(BaseController):' | |||||
54 | c.formated_results = [] |
|
54 | c.formated_results = [] | |
55 | c.runtime = '' |
|
55 | c.runtime = '' | |
56 | c.cur_query = request.GET.get('q', None) |
|
56 | c.cur_query = request.GET.get('q', None) | |
57 |
c.cur_type = request.GET.get('type', ' |
|
57 | c.cur_type = request.GET.get('type', 'content') | |
58 | c.cur_search = search_type = {'content': 'content', |
|
58 | c.cur_search = search_type = {'content': 'content', | |
59 |
'commit': ' |
|
59 | 'commit': 'message', | |
60 | 'path': 'path', |
|
60 | 'path': 'path', | |
61 | 'repository': 'repository'}\ |
|
61 | 'repository': 'repository'}\ | |
62 | .get(c.cur_type, 'content') |
|
62 | .get(c.cur_type, 'content') | |
63 |
|
63 | |||
|
64 | index_name = { | |||
|
65 | 'content': IDX_NAME, | |||
|
66 | 'commit': CHGSET_IDX_NAME, | |||
|
67 | 'path': IDX_NAME}\ | |||
|
68 | .get(c.cur_type, IDX_NAME) | |||
|
69 | ||||
|
70 | schema_defn = { | |||
|
71 | 'content': SCHEMA, | |||
|
72 | 'commit': CHGSETS_SCHEMA, | |||
|
73 | 'path': SCHEMA}\ | |||
|
74 | .get(c.cur_type, SCHEMA) | |||
|
75 | ||||
|
76 | log.debug('IDX: %s' % index_name) | |||
|
77 | log.debug('SCHEMA: %s' % schema_defn) | |||
|
78 | ||||
64 | if c.cur_query: |
|
79 | if c.cur_query: | |
65 | cur_query = c.cur_query.lower() |
|
80 | cur_query = c.cur_query.lower() | |
|
81 | log.debug(cur_query) | |||
66 |
|
82 | |||
67 | if c.cur_query: |
|
83 | if c.cur_query: | |
68 | p = int(request.params.get('page', 1)) |
|
84 | p = int(request.params.get('page', 1)) | |
69 | highlight_items = set() |
|
85 | highlight_items = set() | |
70 | try: |
|
86 | try: | |
71 | idx = open_dir(config['app_conf']['index_dir'], |
|
87 | idx = open_dir(config['app_conf']['index_dir'], | |
72 |
indexname= |
|
88 | indexname=index_name) | |
73 | searcher = idx.searcher() |
|
89 | searcher = idx.searcher() | |
74 |
|
90 | |||
75 |
qp = QueryParser(search_type, schema= |
|
91 | qp = QueryParser(search_type, schema=schema_defn) | |
76 | if c.repo_name: |
|
92 | if c.repo_name: | |
77 | cur_query = u'repository:%s %s' % (c.repo_name, cur_query) |
|
93 | cur_query = u'repository:%s %s' % (c.repo_name, cur_query) | |
78 | try: |
|
94 | try: | |
@@ -84,13 +100,13 b' class SearchController(BaseController):' | |||||
84 | highlight_items.add(query.text) |
|
100 | highlight_items.add(query.text) | |
85 | else: |
|
101 | else: | |
86 | for i in query.all_terms(): |
|
102 | for i in query.all_terms(): | |
87 |
if i[0] |
|
103 | if i[0] in ['content', 'message']: | |
88 | highlight_items.add(i[1]) |
|
104 | highlight_items.add(i[1]) | |
89 |
|
105 | |||
90 | matcher = query.matcher(searcher) |
|
106 | matcher = query.matcher(searcher) | |
91 |
|
107 | |||
92 | log.debug(query) |
|
108 | log.debug('query: %s' % query) | |
93 | log.debug(highlight_items) |
|
109 | log.debug('hl terms: %s' % highlight_items) | |
94 | results = searcher.search(query) |
|
110 | results = searcher.search(query) | |
95 | res_ln = len(results) |
|
111 | res_ln = len(results) | |
96 | c.runtime = '%s results (%.3f seconds)' % ( |
|
112 | c.runtime = '%s results (%.3f seconds)' % ( | |
@@ -99,7 +115,7 b' class SearchController(BaseController):' | |||||
99 |
|
115 | |||
100 | def url_generator(**kw): |
|
116 | def url_generator(**kw): | |
101 | return update_params("?q=%s&type=%s" \ |
|
117 | return update_params("?q=%s&type=%s" \ | |
102 |
% (c.cur_query, c.cur_ |
|
118 | % (c.cur_query, c.cur_type), **kw) | |
103 | repo_location = RepoModel().repos_path |
|
119 | repo_location = RepoModel().repos_path | |
104 | c.formated_results = Page( |
|
120 | c.formated_results = Page( | |
105 | WhooshResultWrapper(search_type, searcher, matcher, |
|
121 | WhooshResultWrapper(search_type, searcher, matcher, |
@@ -35,7 +35,7 b' from string import strip' | |||||
35 | from shutil import rmtree |
|
35 | from shutil import rmtree | |
36 |
|
36 | |||
37 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter |
|
37 | from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter | |
38 | from whoosh.fields import TEXT, ID, STORED, Schema, FieldType |
|
38 | from whoosh.fields import TEXT, ID, STORED, NUMERIC, BOOLEAN, Schema, FieldType | |
39 | from whoosh.index import create_in, open_dir |
|
39 | from whoosh.index import create_in, open_dir | |
40 | from whoosh.formats import Characters |
|
40 | from whoosh.formats import Characters | |
41 | from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter |
|
41 | from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter | |
@@ -51,10 +51,11 b' from rhodecode.lib.utils2 import LazyPro' | |||||
51 | from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ |
|
51 | from rhodecode.lib.utils import BasePasterCommand, Command, add_cache,\ | |
52 | load_rcextensions |
|
52 | load_rcextensions | |
53 |
|
53 | |||
|
54 | log = logging.getLogger(__name__) | |||
|
55 | ||||
54 | # CUSTOM ANALYZER wordsplit + lowercase filter |
|
56 | # CUSTOM ANALYZER wordsplit + lowercase filter | |
55 | ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() |
|
57 | ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter() | |
56 |
|
58 | |||
57 |
|
||||
58 | #INDEX SCHEMA DEFINITION |
|
59 | #INDEX SCHEMA DEFINITION | |
59 | SCHEMA = Schema( |
|
60 | SCHEMA = Schema( | |
60 | fileid=ID(unique=True), |
|
61 | fileid=ID(unique=True), | |
@@ -71,6 +72,22 b" IDX_NAME = 'HG_INDEX'" | |||||
71 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') |
|
72 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') | |
72 | FRAGMENTER = ContextFragmenter(200) |
|
73 | FRAGMENTER = ContextFragmenter(200) | |
73 |
|
74 | |||
|
75 | CHGSETS_SCHEMA = Schema( | |||
|
76 | path=ID(unique=True, stored=True), | |||
|
77 | revision=NUMERIC(unique=True, stored=True), | |||
|
78 | last=BOOLEAN(), | |||
|
79 | owner=TEXT(), | |||
|
80 | repository=ID(unique=True, stored=True), | |||
|
81 | author=TEXT(stored=True), | |||
|
82 | message=FieldType(format=Characters(), analyzer=ANALYZER, | |||
|
83 | scorable=True, stored=True), | |||
|
84 | parents=TEXT(), | |||
|
85 | added=TEXT(), | |||
|
86 | removed=TEXT(), | |||
|
87 | changed=TEXT(), | |||
|
88 | ) | |||
|
89 | ||||
|
90 | CHGSET_IDX_NAME = 'CHGSET_INDEX' | |||
74 |
|
91 | |||
75 | class MakeIndex(BasePasterCommand): |
|
92 | class MakeIndex(BasePasterCommand): | |
76 |
|
93 | |||
@@ -191,14 +208,20 b' class WhooshResultWrapper(object):' | |||||
191 |
|
208 | |||
192 | def get_full_content(self, docid): |
|
209 | def get_full_content(self, docid): | |
193 | res = self.searcher.stored_fields(docid[0]) |
|
210 | res = self.searcher.stored_fields(docid[0]) | |
|
211 | log.debug('result: %s' % res) | |||
194 | full_repo_path = jn(self.repo_location, res['repository']) |
|
212 | full_repo_path = jn(self.repo_location, res['repository']) | |
195 | f_path = res['path'].split(full_repo_path)[-1] |
|
213 | f_path = res['path'].split(full_repo_path)[-1] | |
196 | f_path = f_path.lstrip(os.sep) |
|
214 | f_path = f_path.lstrip(os.sep) | |
|
215 | res.update({'f_path': f_path}) | |||
197 |
|
216 | |||
198 | content_short = self.get_short_content(res, docid[1]) |
|
217 | if self.search_type == 'content': | |
199 | res.update({'content_short': content_short, |
|
218 | content_short = self.get_short_content(res, docid[1]) | |
200 |
|
|
219 | res.update({'content_short': content_short, | |
201 | 'f_path': f_path}) |
|
220 | 'content_short_hl': self.highlight(content_short)}) | |
|
221 | elif self.search_type == 'message': | |||
|
222 | res.update({'message_hl': self.highlight(res['message'])}) | |||
|
223 | ||||
|
224 | log.debug('result: %s' % res) | |||
202 |
|
225 | |||
203 | return res |
|
226 | return res | |
204 |
|
227 | |||
@@ -216,19 +239,20 b' class WhooshResultWrapper(object):' | |||||
216 | :param size: |
|
239 | :param size: | |
217 | """ |
|
240 | """ | |
218 | memory = [(0, 0)] |
|
241 | memory = [(0, 0)] | |
219 |
f |
|
242 | if self.matcher.supports('positions'): | |
220 | start = span.startchar or 0 |
|
243 | for span in self.matcher.spans(): | |
221 |
|
|
244 | start = span.startchar or 0 | |
222 | start_offseted = max(0, start - self.fragment_size) |
|
245 | end = span.endchar or 0 | |
223 |
|
|
246 | start_offseted = max(0, start - self.fragment_size) | |
|
247 | end_offseted = end + self.fragment_size | |||
224 |
|
248 | |||
225 | if start_offseted < memory[-1][1]: |
|
249 | if start_offseted < memory[-1][1]: | |
226 | start_offseted = memory[-1][1] |
|
250 | start_offseted = memory[-1][1] | |
227 | memory.append((start_offseted, end_offseted,)) |
|
251 | memory.append((start_offseted, end_offseted,)) | |
228 | yield (start_offseted, end_offseted,) |
|
252 | yield (start_offseted, end_offseted,) | |
229 |
|
253 | |||
230 | def highlight(self, content, top=5): |
|
254 | def highlight(self, content, top=5): | |
231 |
if self.search_type |
|
255 | if self.search_type not in ['content', 'message']: | |
232 | return '' |
|
256 | return '' | |
233 | hl = highlight( |
|
257 | hl = highlight( | |
234 | text=content, |
|
258 | text=content, |
@@ -41,12 +41,14 b' sys.path.append(project_path)' | |||||
41 | from rhodecode.config.conf import INDEX_EXTENSIONS |
|
41 | from rhodecode.config.conf import INDEX_EXTENSIONS | |
42 | from rhodecode.model.scm import ScmModel |
|
42 | from rhodecode.model.scm import ScmModel | |
43 | from rhodecode.lib.utils2 import safe_unicode |
|
43 | from rhodecode.lib.utils2 import safe_unicode | |
44 | from rhodecode.lib.indexers import SCHEMA, IDX_NAME |
|
44 | from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, CHGSET_IDX_NAME | |
45 |
|
45 | |||
46 | from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \ |
|
46 | from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \ | |
47 | NodeDoesNotExistError |
|
47 | NodeDoesNotExistError | |
48 |
|
48 | |||
49 | from whoosh.index import create_in, open_dir |
|
49 | from whoosh.index import create_in, open_dir, exists_in | |
|
50 | from whoosh.query import * | |||
|
51 | from whoosh.qparser import QueryParser | |||
50 |
|
52 | |||
51 | log = logging.getLogger('whoosh_indexer') |
|
53 | log = logging.getLogger('whoosh_indexer') | |
52 |
|
54 | |||
@@ -89,12 +91,19 b' class WhooshIndexingDaemon(object):' | |||||
89 | self.filtered_repo_update_paths[repo_name] = repo |
|
91 | self.filtered_repo_update_paths[repo_name] = repo | |
90 | self.repo_paths = self.filtered_repo_update_paths |
|
92 | self.repo_paths = self.filtered_repo_update_paths | |
91 |
|
93 | |||
92 |
self.initial = |
|
94 | self.initial = True | |
93 | if not os.path.isdir(self.index_location): |
|
95 | if not os.path.isdir(self.index_location): | |
94 | os.makedirs(self.index_location) |
|
96 | os.makedirs(self.index_location) | |
95 | log.info('Cannot run incremental index since it does not' |
|
97 | log.info('Cannot run incremental index since it does not' | |
96 | ' yet exist running full build') |
|
98 | ' yet exist running full build') | |
97 | self.initial = True |
|
99 | elif not exists_in(self.index_location, IDX_NAME): | |
|
100 | log.info('Running full index build as the file content' | |||
|
101 | ' index does not exist') | |||
|
102 | elif not exists_in(self.index_location, CHGSET_IDX_NAME): | |||
|
103 | log.info('Running full index build as the changeset' | |||
|
104 | ' index does not exist') | |||
|
105 | else: | |||
|
106 | self.initial = False | |||
98 |
|
107 | |||
99 | def get_paths(self, repo): |
|
108 | def get_paths(self, repo): | |
100 | """ |
|
109 | """ | |
@@ -158,35 +167,86 b' class WhooshIndexingDaemon(object):' | |||||
158 | ) |
|
167 | ) | |
159 | return indexed, indexed_w_content |
|
168 | return indexed, indexed_w_content | |
160 |
|
169 | |||
161 | def build_index(self): |
|
170 | def index_changesets(self, writer, repo_name, repo, start_rev=0): | |
162 | if os.path.exists(self.index_location): |
|
171 | """ | |
163 | log.debug('removing previous index') |
|
172 | Add all changeset in the vcs repo starting at start_rev | |
164 | rmtree(self.index_location) |
|
173 | to the index writer | |
|
174 | """ | |||
|
175 | ||||
|
176 | log.debug('indexing changesets in %s[%d:]' % (repo_name, start_rev)) | |||
165 |
|
177 | |||
166 | if not os.path.exists(self.index_location): |
|
178 | indexed=0 | |
167 | os.mkdir(self.index_location) |
|
179 | for cs in repo[start_rev:]: | |
|
180 | writer.add_document( | |||
|
181 | path=unicode(cs.raw_id), | |||
|
182 | owner=unicode(repo.contact), | |||
|
183 | repository=safe_unicode(repo_name), | |||
|
184 | author=cs.author, | |||
|
185 | message=cs.message, | |||
|
186 | revision=cs.revision, | |||
|
187 | last=cs.last, | |||
|
188 | added=u' '.join([node.path for node in cs.added]).lower(), | |||
|
189 | removed=u' '.join([node.path for node in cs.removed]).lower(), | |||
|
190 | changed=u' '.join([node.path for node in cs.changed]).lower(), | |||
|
191 | parents=u' '.join([cs.raw_id for cs in cs.parents]), | |||
|
192 | ) | |||
|
193 | indexed += 1 | |||
168 |
|
194 | |||
169 | idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME) |
|
195 | log.debug('indexed %d changesets for repo %s' % (indexed, repo_name)) | |
170 | writer = idx.writer() |
|
196 | ||
171 | log.debug('BUILDING INDEX FOR EXTENSIONS %s ' |
|
197 | def index_files(self, file_idx_writer, repo_name, repo): | |
172 | 'AND REPOS %s' % (INDEX_EXTENSIONS, self.repo_paths.keys())) |
|
198 | i_cnt = iwc_cnt = 0 | |
|
199 | log.debug('building index for [%s]' % repo.path) | |||
|
200 | for idx_path in self.get_paths(repo): | |||
|
201 | i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name) | |||
|
202 | i_cnt += i | |||
|
203 | iwc_cnt += iwc | |||
|
204 | ||||
|
205 | log.debug('added %s files %s with content for repo %s' % (i_cnt + iwc_cnt, iwc_cnt, repo.path)) | |||
|
206 | ||||
|
207 | def update_changeset_index(self): | |||
|
208 | idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME) | |||
173 |
|
209 | |||
174 | for repo_name, repo in self.repo_paths.items(): |
|
210 | with idx.searcher() as searcher: | |
175 | log.debug('building index @ %s' % repo.path) |
|
211 | writer = idx.writer() | |
176 | i_cnt = iwc_cnt = 0 |
|
212 | writer_is_dirty = False | |
177 | for idx_path in self.get_paths(repo): |
|
213 | try: | |
178 | i, iwc = self.add_doc(writer, idx_path, repo, repo_name) |
|
214 | for repo_name, repo in self.repo_paths.items(): | |
179 | i_cnt += i |
|
215 | # skip indexing if there aren't any revs in the repo | |
180 | iwc_cnt += iwc |
|
216 | revs = repo.revisions | |
181 | log.debug('added %s files %s with content for repo %s' % ( |
|
217 | if len(revs) < 1: | |
182 |
|
|
218 | continue | |
183 | ) |
|
219 | ||
|
220 | qp = QueryParser('repository', schema=CHGSETS_SCHEMA) | |||
|
221 | q = qp.parse(u"last:t AND %s" % repo_name) | |||
|
222 | ||||
|
223 | results = searcher.search(q, sortedby='revision') | |||
|
224 | ||||
|
225 | last_rev = 0 | |||
|
226 | if len(results) > 0: | |||
|
227 | last_rev = results[0]['revision'] | |||
184 |
|
228 | |||
185 | log.debug('>> COMMITING CHANGES <<') |
|
229 | # there are new changesets to index or a new repo to index | |
186 | writer.commit(merge=True) |
|
230 | if last_rev == 0 or len(revs) > last_rev + 1: | |
187 | log.debug('>>> FINISHED BUILDING INDEX <<<') |
|
231 | # delete the docs in the index for the previous last changeset(s) | |
|
232 | for hit in results: | |||
|
233 | q = qp.parse(u"last:t AND %s AND path:%s" % | |||
|
234 | (repo_name, hit['path'])) | |||
|
235 | writer.delete_by_query(q) | |||
188 |
|
236 | |||
189 | def update_index(self): |
|
237 | # index from the previous last changeset + all new ones | |
|
238 | self.index_changesets(writer, repo_name, repo, last_rev) | |||
|
239 | writer_is_dirty = True | |||
|
240 | ||||
|
241 | finally: | |||
|
242 | if writer_is_dirty: | |||
|
243 | log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<') | |||
|
244 | writer.commit(merge=True) | |||
|
245 | log.debug('>> COMMITTED CHANGES TO CHANGESET INDEX<<') | |||
|
246 | else: | |||
|
247 | writer.cancel | |||
|
248 | ||||
|
249 | def update_file_index(self): | |||
190 | log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s ' |
|
250 | log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s ' | |
191 | 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys())) |
|
251 | 'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys())) | |
192 |
|
252 | |||
@@ -196,72 +256,117 b' class WhooshIndexingDaemon(object):' | |||||
196 | # The set of all paths we need to re-index |
|
256 | # The set of all paths we need to re-index | |
197 | to_index = set() |
|
257 | to_index = set() | |
198 |
|
258 | |||
199 | reader = idx.reader() |
|
|||
200 | writer = idx.writer() |
|
259 | writer = idx.writer() | |
|
260 | writer_is_dirty = False | |||
|
261 | try: | |||
|
262 | with idx.reader() as reader: | |||
|
263 | ||||
|
264 | # Loop over the stored fields in the index | |||
|
265 | for fields in reader.all_stored_fields(): | |||
|
266 | indexed_path = fields['path'] | |||
|
267 | indexed_repo_path = fields['repository'] | |||
|
268 | indexed_paths.add(indexed_path) | |||
|
269 | ||||
|
270 | if not indexed_repo_path in self.filtered_repo_update_paths: | |||
|
271 | continue | |||
|
272 | ||||
|
273 | repo = self.repo_paths[indexed_repo_path] | |||
|
274 | ||||
|
275 | try: | |||
|
276 | node = self.get_node(repo, indexed_path) | |||
|
277 | # Check if this file was changed since it was indexed | |||
|
278 | indexed_time = fields['modtime'] | |||
|
279 | mtime = self.get_node_mtime(node) | |||
|
280 | if mtime > indexed_time: | |||
|
281 | # The file has changed, delete it and add it to the list of | |||
|
282 | # files to reindex | |||
|
283 | log.debug('adding to reindex list %s mtime: %s vs %s' % ( | |||
|
284 | indexed_path, mtime, indexed_time) | |||
|
285 | ) | |||
|
286 | writer.delete_by_term('fileid', indexed_path) | |||
|
287 | writer_is_dirty = True | |||
|
288 | ||||
|
289 | to_index.add(indexed_path) | |||
|
290 | except (ChangesetError, NodeDoesNotExistError): | |||
|
291 | # This file was deleted since it was indexed | |||
|
292 | log.debug('removing from index %s' % indexed_path) | |||
|
293 | writer.delete_by_term('path', indexed_path) | |||
|
294 | writer_is_dirty = True | |||
201 |
|
295 | |||
202 |
# Loop over the |
|
296 | # Loop over the files in the filesystem | |
203 | for fields in reader.all_stored_fields(): |
|
297 | # Assume we have a function that gathers the filenames of the | |
204 | indexed_path = fields['path'] |
|
298 | # documents to be indexed | |
205 | indexed_repo_path = fields['repository'] |
|
299 | ri_cnt_total = 0 # indexed | |
206 | indexed_paths.add(indexed_path) |
|
300 | riwc_cnt_total = 0 # indexed with content | |
|
301 | for repo_name, repo in self.repo_paths.items(): | |||
|
302 | # skip indexing if there aren't any revisions | |||
|
303 | if len(repo) < 1: | |||
|
304 | continue | |||
|
305 | ri_cnt = 0 # indexed | |||
|
306 | riwc_cnt = 0 # indexed with content | |||
|
307 | for path in self.get_paths(repo): | |||
|
308 | path = safe_unicode(path) | |||
|
309 | if path in to_index or path not in indexed_paths: | |||
207 |
|
310 | |||
208 | if not indexed_repo_path in self.filtered_repo_update_paths: |
|
311 | # This is either a file that's changed, or a new file | |
|
312 | # that wasn't indexed before. So index it! | |||
|
313 | i, iwc = self.add_doc(writer, path, repo, repo_name) | |||
|
314 | writer_is_dirty = True | |||
|
315 | log.debug('re indexing %s' % path) | |||
|
316 | ri_cnt += i | |||
|
317 | ri_cnt_total += 1 | |||
|
318 | riwc_cnt += iwc | |||
|
319 | riwc_cnt_total += iwc | |||
|
320 | log.debug('added %s files %s with content for repo %s' % ( | |||
|
321 | ri_cnt + riwc_cnt, riwc_cnt, repo.path) | |||
|
322 | ) | |||
|
323 | log.debug('indexed %s files in total and %s with content' % ( | |||
|
324 | ri_cnt_total, riwc_cnt_total) | |||
|
325 | ) | |||
|
326 | finally: | |||
|
327 | if writer_is_dirty: | |||
|
328 | log.debug('>> COMMITING CHANGES <<') | |||
|
329 | writer.commit(merge=True) | |||
|
330 | log.debug('>>> FINISHED REBUILDING INDEX <<<') | |||
|
331 | else: | |||
|
332 | writer.cancel() | |||
|
333 | ||||
|
334 | def build_indexes(self): | |||
|
335 | if os.path.exists(self.index_location): | |||
|
336 | log.debug('removing previous index') | |||
|
337 | rmtree(self.index_location) | |||
|
338 | ||||
|
339 | if not os.path.exists(self.index_location): | |||
|
340 | os.mkdir(self.index_location) | |||
|
341 | ||||
|
342 | chgset_idx = create_in(self.index_location, CHGSETS_SCHEMA, indexname=CHGSET_IDX_NAME) | |||
|
343 | chgset_idx_writer = chgset_idx.writer() | |||
|
344 | ||||
|
345 | file_idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME) | |||
|
346 | file_idx_writer = file_idx.writer() | |||
|
347 | log.debug('BUILDING INDEX FOR EXTENSIONS %s ' | |||
|
348 | 'AND REPOS %s' % (INDEX_EXTENSIONS, self.repo_paths.keys())) | |||
|
349 | ||||
|
350 | for repo_name, repo in self.repo_paths.items(): | |||
|
351 | # skip indexing if there aren't any revisions | |||
|
352 | if len(repo) < 1: | |||
209 | continue |
|
353 | continue | |
210 |
|
354 | |||
211 | repo = self.repo_paths[indexed_repo_path] |
|
355 | self.index_files(file_idx_writer, repo_name, repo) | |
212 |
|
356 | self.index_changesets(chgset_idx_writer, repo_name, repo) | ||
213 | try: |
|
|||
214 | node = self.get_node(repo, indexed_path) |
|
|||
215 | # Check if this file was changed since it was indexed |
|
|||
216 | indexed_time = fields['modtime'] |
|
|||
217 | mtime = self.get_node_mtime(node) |
|
|||
218 | if mtime > indexed_time: |
|
|||
219 | # The file has changed, delete it and add it to the list of |
|
|||
220 | # files to reindex |
|
|||
221 | log.debug('adding to reindex list %s mtime: %s vs %s' % ( |
|
|||
222 | indexed_path, mtime, indexed_time) |
|
|||
223 | ) |
|
|||
224 | writer.delete_by_term('fileid', indexed_path) |
|
|||
225 |
|
||||
226 | to_index.add(indexed_path) |
|
|||
227 | except (ChangesetError, NodeDoesNotExistError): |
|
|||
228 | # This file was deleted since it was indexed |
|
|||
229 | log.debug('removing from index %s' % indexed_path) |
|
|||
230 | writer.delete_by_term('path', indexed_path) |
|
|||
231 |
|
357 | |||
232 | # Loop over the files in the filesystem |
|
358 | log.debug('>> COMMITING CHANGES <<') | |
233 | # Assume we have a function that gathers the filenames of the |
|
359 | file_idx_writer.commit(merge=True) | |
234 | # documents to be indexed |
|
360 | chgset_idx_writer.commit(merge=True) | |
235 | ri_cnt_total = 0 # indexed |
|
361 | log.debug('>>> FINISHED BUILDING INDEX <<<') | |
236 | riwc_cnt_total = 0 # indexed with content |
|
|||
237 | for repo_name, repo in self.repo_paths.items(): |
|
|||
238 | ri_cnt = 0 # indexed |
|
|||
239 | riwc_cnt = 0 # indexed with content |
|
|||
240 | for path in self.get_paths(repo): |
|
|||
241 | path = safe_unicode(path) |
|
|||
242 | if path in to_index or path not in indexed_paths: |
|
|||
243 |
|
362 | |||
244 | # This is either a file that's changed, or a new file |
|
363 | def update_indexes(self): | |
245 | # that wasn't indexed before. So index it! |
|
364 | self.update_file_index() | |
246 | i, iwc = self.add_doc(writer, path, repo, repo_name) |
|
365 | self.update_changeset_index() | |
247 | log.debug('re indexing %s' % path) |
|
|||
248 | ri_cnt += i |
|
|||
249 | ri_cnt_total += 1 |
|
|||
250 | riwc_cnt += iwc |
|
|||
251 | riwc_cnt_total += iwc |
|
|||
252 | log.debug('added %s files %s with content for repo %s' % ( |
|
|||
253 | ri_cnt + riwc_cnt, riwc_cnt, repo.path) |
|
|||
254 | ) |
|
|||
255 | log.debug('indexed %s files in total and %s with content' % ( |
|
|||
256 | ri_cnt_total, riwc_cnt_total) |
|
|||
257 | ) |
|
|||
258 | log.debug('>> COMMITING CHANGES <<') |
|
|||
259 | writer.commit(merge=True) |
|
|||
260 | log.debug('>>> FINISHED REBUILDING INDEX <<<') |
|
|||
261 |
|
366 | |||
262 | def run(self, full_index=False): |
|
367 | def run(self, full_index=False): | |
263 | """Run daemon""" |
|
368 | """Run daemon""" | |
264 | if full_index or self.initial: |
|
369 | if full_index or self.initial: | |
265 | self.build_index() |
|
370 | self.build_indexes() | |
266 | else: |
|
371 | else: | |
267 | self.update_index() |
|
372 | self.update_indexes() |
@@ -61,7 +61,7 b'' | |||||
61 | </div> |
|
61 | </div> | |
62 | <div class="select"> |
|
62 | <div class="select"> | |
63 | ${h.select('type',c.cur_type,[('content',_('File contents')), |
|
63 | ${h.select('type',c.cur_type,[('content',_('File contents')), | |
64 |
|
|
64 | ('commit',_('Commit messages')), | |
65 | ('path',_('File names')), |
|
65 | ('path',_('File names')), | |
66 | ##('repository',_('Repository names')), |
|
66 | ##('repository',_('Repository names')), | |
67 | ])} |
|
67 | ])} | |
@@ -72,13 +72,13 b'' | |||||
72 | </div> |
|
72 | </div> | |
73 | ${h.end_form()} |
|
73 | ${h.end_form()} | |
74 | <div class="search"> |
|
74 | <div class="search"> | |
75 |
%if c.cur_ |
|
75 | %if c.cur_type == 'content': | |
76 | <%include file='search_content.html'/> |
|
76 | <%include file='search_content.html'/> | |
77 |
%elif c.cur_ |
|
77 | %elif c.cur_type == 'path': | |
78 | <%include file='search_path.html'/> |
|
78 | <%include file='search_path.html'/> | |
79 |
%elif c.cur_ |
|
79 | %elif c.cur_type == 'commit': | |
80 | <%include file='search_commit.html'/> |
|
80 | <%include file='search_commit.html'/> | |
81 |
%elif c.cur_ |
|
81 | %elif c.cur_type == 'repository': | |
82 | <%include file='search_repository.html'/> |
|
82 | <%include file='search_repository.html'/> | |
83 | %endif |
|
83 | %endif | |
84 | </div> |
|
84 | </div> |
@@ -0,0 +1,44 b'' | |||||
|
1 | ##commit highligthing | |||
|
2 | ||||
|
3 | %for cnt,sr in enumerate(c.formated_results): | |||
|
4 | %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'): | |||
|
5 | <div class="table"> | |||
|
6 | <div id="body${cnt}" class="codeblock"> | |||
|
7 | <div class="code-header"> | |||
|
8 | <div class="search-path">${h.link_to(h.literal('%s » %s' % (sr['repository'],sr['f_path'])), | |||
|
9 | h.url('changeset_home',repo_name=sr['repository'],revision=sr['path']))} | |||
|
10 | </div> | |||
|
11 | </div> | |||
|
12 | <div class="left"> | |||
|
13 | <div class="author"> | |||
|
14 | <div class="gravatar"> | |||
|
15 | <img alt="gravatar" src="${h.gravatar_url(h.email(sr['author']),20)}"/> | |||
|
16 | </div> | |||
|
17 | <span>${h.person(sr['author'])}</span><br/> | |||
|
18 | <span><a href="mailto:${h.email_or_none(sr['author'])}">${h.email_or_none(sr['author'])}</a></span><br/> | |||
|
19 | </div> | |||
|
20 | %if sr['message_hl']: | |||
|
21 | <div class="search-code-body"> | |||
|
22 | <pre>${h.literal(sr['message_hl'])}</pre> | |||
|
23 | </div> | |||
|
24 | %else: | |||
|
25 | <div class="message">${h.urlify_commit(sr['message'], sr['repository'])}</div> | |||
|
26 | %endif | |||
|
27 | </div> | |||
|
28 | </div> | |||
|
29 | </div> | |||
|
30 | %else: | |||
|
31 | %if cnt == 0: | |||
|
32 | <div class="table"> | |||
|
33 | <div id="body${cnt}" class="codeblock"> | |||
|
34 | <div class="error">${_('Permission denied')}</div> | |||
|
35 | </div> | |||
|
36 | </div> | |||
|
37 | %endif | |||
|
38 | %endif | |||
|
39 | %endfor | |||
|
40 | %if c.cur_query and c.formated_results: | |||
|
41 | <div class="pagination-wh pagination-left"> | |||
|
42 | ${c.formated_results.pager('$link_previous ~2~ $link_next')} | |||
|
43 | </div> | |||
|
44 | %endif |
@@ -27,7 +27,7 b' class TestSearchController(TestControlle' | |||||
27 | self.log_user() |
|
27 | self.log_user() | |
28 | response = self.app.get(url(controller='search', action='index'), |
|
28 | response = self.app.get(url(controller='search', action='index'), | |
29 | {'q': 'def repo'}) |
|
29 | {'q': 'def repo'}) | |
30 |
response.mustcontain(' |
|
30 | response.mustcontain('10 results') | |
31 |
|
31 | |||
32 | def test_repo_search(self): |
|
32 | def test_repo_search(self): | |
33 | self.log_user() |
|
33 | self.log_user() | |
@@ -35,3 +35,44 b' class TestSearchController(TestControlle' | |||||
35 | {'q': 'repository:%s def test' % HG_REPO}) |
|
35 | {'q': 'repository:%s def test' % HG_REPO}) | |
36 |
|
36 | |||
37 | response.mustcontain('4 results') |
|
37 | response.mustcontain('4 results') | |
|
38 | ||||
|
39 | def test_search_last(self): | |||
|
40 | self.log_user() | |||
|
41 | response = self.app.get(url(controller='search', action='index'), | |||
|
42 | {'q': 'last:t', 'type': 'commit'}) | |||
|
43 | ||||
|
44 | response.mustcontain('1 results') | |||
|
45 | ||||
|
46 | def test_search_commit_message(self): | |||
|
47 | self.log_user() | |||
|
48 | response = self.app.get(url(controller='search', action='index'), | |||
|
49 | {'q': 'bother to ask where to fetch repo during tests', | |||
|
50 | 'type': 'commit'}) | |||
|
51 | ||||
|
52 | response.mustcontain('1 results') | |||
|
53 | response.mustcontain('a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1') | |||
|
54 | ||||
|
55 | def test_search_commit_changed_file(self): | |||
|
56 | self.log_user() | |||
|
57 | response = self.app.get(url(controller='search', action='index'), | |||
|
58 | {'q': 'changed:tests/utils.py', | |||
|
59 | 'type': 'commit'}) | |||
|
60 | ||||
|
61 | response.mustcontain('a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1') | |||
|
62 | ||||
|
63 | def test_search_commit_added_file(self): | |||
|
64 | self.log_user() | |||
|
65 | response = self.app.get(url(controller='search', action='index'), | |||
|
66 | {'q': 'added:README.rst', | |||
|
67 | 'type': 'commit'}) | |||
|
68 | ||||
|
69 | response.mustcontain('1 results') | |||
|
70 | response.mustcontain('3803844fdbd3b711175fc3da9bdacfcd6d29a6fb') | |||
|
71 | ||||
|
72 | def test_search_author(self): | |||
|
73 | self.log_user() | |||
|
74 | response = self.app.get(url(controller='search', action='index'), | |||
|
75 | {'q': 'author:marcin@python-blog.com revision:0', | |||
|
76 | 'type': 'commit'}) | |||
|
77 | ||||
|
78 | response.mustcontain('1 results') |
General Comments 0
You need to be logged in to leave comments.
Login now