##// END OF EJS Templates
search: fix bug where file path link was wrong when the repository...
dan -
r797:ff35bfc3 default
parent child Browse files
Show More
@@ -1,279 +1,279 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2012-2016 RhodeCode GmbH
3 # Copyright (C) 2012-2016 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Index schema for RhodeCode
22 Index schema for RhodeCode
23 """
23 """
24
24
25 from __future__ import absolute_import
25 from __future__ import absolute_import
26 import logging
26 import logging
27 import os
27 import os
28 import re
28 import re
29
29
30 from pylons.i18n.translation import _
30 from pylons.i18n.translation import _
31
31
32 from whoosh import query as query_lib, sorting
32 from whoosh import query as query_lib, sorting
33 from whoosh.highlight import HtmlFormatter, ContextFragmenter
33 from whoosh.highlight import HtmlFormatter, ContextFragmenter
34 from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
34 from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
35 from whoosh.qparser import QueryParser, QueryParserError
35 from whoosh.qparser import QueryParser, QueryParserError
36
36
37 import rhodecode.lib.helpers as h
37 import rhodecode.lib.helpers as h
38 from rhodecode.lib.index import BaseSearch
38 from rhodecode.lib.index import BaseSearch
39
39
40 log = logging.getLogger(__name__)
40 log = logging.getLogger(__name__)
41
41
42
42
43 try:
43 try:
44 # we first try to import from rhodecode tools, fallback to copies if
44 # we first try to import from rhodecode tools, fallback to copies if
45 # we're unable to
45 # we're unable to
46 from rhodecode_tools.lib.fts_index.whoosh_schema import (
46 from rhodecode_tools.lib.fts_index.whoosh_schema import (
47 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
47 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
48 COMMIT_SCHEMA)
48 COMMIT_SCHEMA)
49 except ImportError:
49 except ImportError:
50 log.warning('rhodecode_tools schema not available, doing a fallback '
50 log.warning('rhodecode_tools schema not available, doing a fallback '
51 'import from `rhodecode.lib.index.whoosh_fallback_schema`')
51 'import from `rhodecode.lib.index.whoosh_fallback_schema`')
52 from rhodecode.lib.index.whoosh_fallback_schema import (
52 from rhodecode.lib.index.whoosh_fallback_schema import (
53 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
53 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
54 COMMIT_SCHEMA)
54 COMMIT_SCHEMA)
55
55
56
56
57 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
57 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
58 FRAGMENTER = ContextFragmenter(200)
58 FRAGMENTER = ContextFragmenter(200)
59
59
60 log = logging.getLogger(__name__)
60 log = logging.getLogger(__name__)
61
61
62
62
63
63
64 class Search(BaseSearch):
64 class Search(BaseSearch):
65
65
66 name = 'whoosh'
66 name = 'whoosh'
67
67
68 def __init__(self, config):
68 def __init__(self, config):
69 self.config = config
69 self.config = config
70 if not os.path.isdir(self.config['location']):
70 if not os.path.isdir(self.config['location']):
71 os.makedirs(self.config['location'])
71 os.makedirs(self.config['location'])
72
72
73 opener = create_in
73 opener = create_in
74 if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
74 if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
75 opener = open_dir
75 opener = open_dir
76 file_index = opener(self.config['location'], schema=FILE_SCHEMA,
76 file_index = opener(self.config['location'], schema=FILE_SCHEMA,
77 indexname=FILE_INDEX_NAME)
77 indexname=FILE_INDEX_NAME)
78
78
79 opener = create_in
79 opener = create_in
80 if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
80 if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
81 opener = open_dir
81 opener = open_dir
82 changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
82 changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
83 indexname=COMMIT_INDEX_NAME)
83 indexname=COMMIT_INDEX_NAME)
84
84
85 self.commit_schema = COMMIT_SCHEMA
85 self.commit_schema = COMMIT_SCHEMA
86 self.commit_index = changeset_index
86 self.commit_index = changeset_index
87 self.file_schema = FILE_SCHEMA
87 self.file_schema = FILE_SCHEMA
88 self.file_index = file_index
88 self.file_index = file_index
89 self.searcher = None
89 self.searcher = None
90
90
91 def cleanup(self):
91 def cleanup(self):
92 if self.searcher:
92 if self.searcher:
93 self.searcher.close()
93 self.searcher.close()
94
94
95 def _extend_query(self, query):
95 def _extend_query(self, query):
96 hashes = re.compile('([0-9a-f]{5,40})').findall(query)
96 hashes = re.compile('([0-9a-f]{5,40})').findall(query)
97 if hashes:
97 if hashes:
98 hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
98 hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
99 query = u'(%s) OR %s' % (query, hashes_or_query)
99 query = u'(%s) OR %s' % (query, hashes_or_query)
100 return query
100 return query
101
101
102 def search(self, query, document_type, search_user, repo_name=None,
102 def search(self, query, document_type, search_user, repo_name=None,
103 requested_page=1, page_limit=10, sort=None):
103 requested_page=1, page_limit=10, sort=None):
104
104
105 original_query = query
105 original_query = query
106 query = self._extend_query(query)
106 query = self._extend_query(query)
107
107
108 log.debug(u'QUERY: %s on %s', query, document_type)
108 log.debug(u'QUERY: %s on %s', query, document_type)
109 result = {
109 result = {
110 'results': [],
110 'results': [],
111 'count': 0,
111 'count': 0,
112 'error': None,
112 'error': None,
113 'runtime': 0
113 'runtime': 0
114 }
114 }
115 search_type, index_name, schema_defn = self._prepare_for_search(
115 search_type, index_name, schema_defn = self._prepare_for_search(
116 document_type)
116 document_type)
117 self._init_searcher(index_name)
117 self._init_searcher(index_name)
118 try:
118 try:
119 qp = QueryParser(search_type, schema=schema_defn)
119 qp = QueryParser(search_type, schema=schema_defn)
120 allowed_repos_filter = self._get_repo_filter(
120 allowed_repos_filter = self._get_repo_filter(
121 search_user, repo_name)
121 search_user, repo_name)
122 try:
122 try:
123 query = qp.parse(unicode(query))
123 query = qp.parse(unicode(query))
124 log.debug('query: %s (%s)' % (query, repr(query)))
124 log.debug('query: %s (%s)' % (query, repr(query)))
125
125
126 reverse, sortedby = False, None
126 reverse, sortedby = False, None
127 if search_type == 'message':
127 if search_type == 'message':
128 if sort == 'oldfirst':
128 if sort == 'oldfirst':
129 sortedby = 'date'
129 sortedby = 'date'
130 reverse = False
130 reverse = False
131 elif sort == 'newfirst':
131 elif sort == 'newfirst':
132 sortedby = 'date'
132 sortedby = 'date'
133 reverse = True
133 reverse = True
134
134
135 whoosh_results = self.searcher.search(
135 whoosh_results = self.searcher.search(
136 query, filter=allowed_repos_filter, limit=None,
136 query, filter=allowed_repos_filter, limit=None,
137 sortedby=sortedby, reverse=reverse)
137 sortedby=sortedby, reverse=reverse)
138
138
139 # fixes for 32k limit that whoosh uses for highlight
139 # fixes for 32k limit that whoosh uses for highlight
140 whoosh_results.fragmenter.charlimit = None
140 whoosh_results.fragmenter.charlimit = None
141 res_ln = whoosh_results.scored_length()
141 res_ln = whoosh_results.scored_length()
142 result['runtime'] = whoosh_results.runtime
142 result['runtime'] = whoosh_results.runtime
143 result['count'] = res_ln
143 result['count'] = res_ln
144 result['results'] = WhooshResultWrapper(
144 result['results'] = WhooshResultWrapper(
145 search_type, res_ln, whoosh_results)
145 search_type, res_ln, whoosh_results)
146
146
147 except QueryParserError:
147 except QueryParserError:
148 result['error'] = _('Invalid search query. Try quoting it.')
148 result['error'] = _('Invalid search query. Try quoting it.')
149 except (EmptyIndexError, IOError, OSError):
149 except (EmptyIndexError, IOError, OSError):
150 msg = _('There is no index to search in. '
150 msg = _('There is no index to search in. '
151 'Please run whoosh indexer')
151 'Please run whoosh indexer')
152 log.exception(msg)
152 log.exception(msg)
153 result['error'] = msg
153 result['error'] = msg
154 except Exception:
154 except Exception:
155 msg = _('An error occurred during this search operation')
155 msg = _('An error occurred during this search operation')
156 log.exception(msg)
156 log.exception(msg)
157 result['error'] = msg
157 result['error'] = msg
158
158
159 return result
159 return result
160
160
161 def statistics(self):
161 def statistics(self):
162 stats = [
162 stats = [
163 {'key': _('Index Type'), 'value': 'Whoosh'},
163 {'key': _('Index Type'), 'value': 'Whoosh'},
164 {'key': _('File Index'), 'value': str(self.file_index)},
164 {'key': _('File Index'), 'value': str(self.file_index)},
165 {'key': _('Indexed documents'),
165 {'key': _('Indexed documents'),
166 'value': self.file_index.doc_count()},
166 'value': self.file_index.doc_count()},
167 {'key': _('Last update'),
167 {'key': _('Last update'),
168 'value': h.time_to_datetime(self.file_index.last_modified())},
168 'value': h.time_to_datetime(self.file_index.last_modified())},
169 {'key': _('Commit index'), 'value': str(self.commit_index)},
169 {'key': _('Commit index'), 'value': str(self.commit_index)},
170 {'key': _('Indexed documents'),
170 {'key': _('Indexed documents'),
171 'value': str(self.commit_index.doc_count())},
171 'value': str(self.commit_index.doc_count())},
172 {'key': _('Last update'),
172 {'key': _('Last update'),
173 'value': h.time_to_datetime(self.commit_index.last_modified())}
173 'value': h.time_to_datetime(self.commit_index.last_modified())}
174 ]
174 ]
175 return stats
175 return stats
176
176
177 def _get_repo_filter(self, auth_user, repo_name):
177 def _get_repo_filter(self, auth_user, repo_name):
178
178
179 allowed_to_search = [
179 allowed_to_search = [
180 repo for repo, perm in
180 repo for repo, perm in
181 auth_user.permissions['repositories'].items()
181 auth_user.permissions['repositories'].items()
182 if perm != 'repository.none']
182 if perm != 'repository.none']
183
183
184 if repo_name:
184 if repo_name:
185 repo_filter = [query_lib.Term('repository', repo_name)]
185 repo_filter = [query_lib.Term('repository', repo_name)]
186
186
187 elif 'hg.admin' in auth_user.permissions.get('global', []):
187 elif 'hg.admin' in auth_user.permissions.get('global', []):
188 return None
188 return None
189
189
190 else:
190 else:
191 repo_filter = [query_lib.Term('repository', _rn)
191 repo_filter = [query_lib.Term('repository', _rn)
192 for _rn in allowed_to_search]
192 for _rn in allowed_to_search]
193 # in case we're not allowed to search anywhere, it's a trick
193 # in case we're not allowed to search anywhere, it's a trick
194 # to tell whoosh we're filtering, on ALL results
194 # to tell whoosh we're filtering, on ALL results
195 repo_filter = repo_filter or [query_lib.Term('repository', '')]
195 repo_filter = repo_filter or [query_lib.Term('repository', '')]
196
196
197 return query_lib.Or(repo_filter)
197 return query_lib.Or(repo_filter)
198
198
199 def _prepare_for_search(self, cur_type):
199 def _prepare_for_search(self, cur_type):
200 search_type = {
200 search_type = {
201 'content': 'content',
201 'content': 'content',
202 'commit': 'message',
202 'commit': 'message',
203 'path': 'path',
203 'path': 'path',
204 'repository': 'repository'
204 'repository': 'repository'
205 }.get(cur_type, 'content')
205 }.get(cur_type, 'content')
206
206
207 index_name = {
207 index_name = {
208 'content': FILE_INDEX_NAME,
208 'content': FILE_INDEX_NAME,
209 'commit': COMMIT_INDEX_NAME,
209 'commit': COMMIT_INDEX_NAME,
210 'path': FILE_INDEX_NAME
210 'path': FILE_INDEX_NAME
211 }.get(cur_type, FILE_INDEX_NAME)
211 }.get(cur_type, FILE_INDEX_NAME)
212
212
213 schema_defn = {
213 schema_defn = {
214 'content': self.file_schema,
214 'content': self.file_schema,
215 'commit': self.commit_schema,
215 'commit': self.commit_schema,
216 'path': self.file_schema
216 'path': self.file_schema
217 }.get(cur_type, self.file_schema)
217 }.get(cur_type, self.file_schema)
218
218
219 log.debug('IDX: %s' % index_name)
219 log.debug('IDX: %s' % index_name)
220 log.debug('SCHEMA: %s' % schema_defn)
220 log.debug('SCHEMA: %s' % schema_defn)
221 return search_type, index_name, schema_defn
221 return search_type, index_name, schema_defn
222
222
223 def _init_searcher(self, index_name):
223 def _init_searcher(self, index_name):
224 idx = open_dir(self.config['location'], indexname=index_name)
224 idx = open_dir(self.config['location'], indexname=index_name)
225 self.searcher = idx.searcher()
225 self.searcher = idx.searcher()
226 return self.searcher
226 return self.searcher
227
227
228
228
229 class WhooshResultWrapper(object):
229 class WhooshResultWrapper(object):
230 def __init__(self, search_type, total_hits, results):
230 def __init__(self, search_type, total_hits, results):
231 self.search_type = search_type
231 self.search_type = search_type
232 self.results = results
232 self.results = results
233 self.total_hits = total_hits
233 self.total_hits = total_hits
234
234
235 def __str__(self):
235 def __str__(self):
236 return '<%s at %s>' % (self.__class__.__name__, len(self))
236 return '<%s at %s>' % (self.__class__.__name__, len(self))
237
237
238 def __repr__(self):
238 def __repr__(self):
239 return self.__str__()
239 return self.__str__()
240
240
241 def __len__(self):
241 def __len__(self):
242 return self.total_hits
242 return self.total_hits
243
243
244 def __iter__(self):
244 def __iter__(self):
245 """
245 """
246 Allows Iteration over results,and lazy generate content
246 Allows Iteration over results,and lazy generate content
247
247
248 *Requires* implementation of ``__getitem__`` method.
248 *Requires* implementation of ``__getitem__`` method.
249 """
249 """
250 for hit in self.results:
250 for hit in self.results:
251 yield self.get_full_content(hit)
251 yield self.get_full_content(hit)
252
252
253 def __getitem__(self, key):
253 def __getitem__(self, key):
254 """
254 """
255 Slicing of resultWrapper
255 Slicing of resultWrapper
256 """
256 """
257 i, j = key.start, key.stop
257 i, j = key.start, key.stop
258 for hit in self.results[i:j]:
258 for hit in self.results[i:j]:
259 yield self.get_full_content(hit)
259 yield self.get_full_content(hit)
260
260
261 def get_full_content(self, hit):
261 def get_full_content(self, hit):
262 # TODO: marcink: this feels like an overkill, there's a lot of data
262 # TODO: marcink: this feels like an overkill, there's a lot of data
263 # inside hit object, and we don't need all
263 # inside hit object, and we don't need all
264 res = dict(hit)
264 res = dict(hit)
265
265
266 f_path = '' # noqa
266 f_path = '' # noqa
267 if self.search_type in ['content', 'path']:
267 if self.search_type in ['content', 'path']:
268 f_path = res['path'].split(res['repository'])[-1]
268 f_path = res['path'][len(res['repository']):]
269 f_path = f_path.lstrip(os.sep)
269 f_path = f_path.lstrip(os.sep)
270
270
271 if self.search_type == 'content':
271 if self.search_type == 'content':
272 res.update({'content_short_hl': hit.highlights('content'),
272 res.update({'content_short_hl': hit.highlights('content'),
273 'f_path': f_path})
273 'f_path': f_path})
274 elif self.search_type == 'path':
274 elif self.search_type == 'path':
275 res.update({'f_path': f_path})
275 res.update({'f_path': f_path})
276 elif self.search_type == 'message':
276 elif self.search_type == 'message':
277 res.update({'message_hl': hit.highlights('message')})
277 res.update({'message_hl': hit.highlights('message')})
278
278
279 return res
279 return res
General Comments 0
You need to be logged in to leave comments. Login now