##// END OF EJS Templates
search: fixed search tests
super-admin -
r5113:9cd499cc default
parent child Browse files
Show More
@@ -1,201 +1,208 b''
1 # Copyright (C) 2010-2023 RhodeCode GmbH
1 # Copyright (C) 2010-2023 RhodeCode GmbH
2 #
2 #
3 # This program is free software: you can redistribute it and/or modify
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License, version 3
4 # it under the terms of the GNU Affero General Public License, version 3
5 # (only), as published by the Free Software Foundation.
5 # (only), as published by the Free Software Foundation.
6 #
6 #
7 # This program is distributed in the hope that it will be useful,
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
10 # GNU General Public License for more details.
11 #
11 #
12 # You should have received a copy of the GNU Affero General Public License
12 # You should have received a copy of the GNU Affero General Public License
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 #
14 #
15 # This program is dual-licensed. If you wish to learn more about the
15 # This program is dual-licensed. If you wish to learn more about the
16 # RhodeCode Enterprise Edition, including its added features, Support services,
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
18
19 import os
19 import os
20
20
21 import mock
21 import mock
22 import pytest
22 import pytest
23 from whoosh import query
23 from whoosh import query
24
24
25 from rhodecode.tests import (
25 from rhodecode.tests import (
26 TestController, route_path_generator, HG_REPO,
26 TestController, route_path_generator, HG_REPO,
27 TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
27 TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
28 from rhodecode.tests.utils import AssertResponse
28 from rhodecode.tests.utils import AssertResponse
29
29
30
30
31 def route_path(name, params=None, **kwargs):
31 def route_path(name, params=None, **kwargs):
32 from rhodecode.apps._base import ADMIN_PREFIX
32 from rhodecode.apps._base import ADMIN_PREFIX
33 url_defs = {
33 url_defs = {
34 'search':
34 'search':
35 ADMIN_PREFIX + '/search',
35 ADMIN_PREFIX + '/search',
36 'search_repo':
36 'search_repo':
37 '/{repo_name}/search',
37 '/{repo_name}/search',
38 }
38 }
39 return route_path_generator(url_defs, name=name, params=params, **kwargs)
39 return route_path_generator(url_defs, name=name, params=params, **kwargs)
40
40
41
41
42 class TestSearchController(TestController):
42 class TestSearchController(TestController):
43
43
44 def test_index(self):
44 def test_index(self):
45 self.log_user()
45 self.log_user()
46 response = self.app.get(route_path('search'))
46 response = self.app.get(route_path('search'))
47 assert_response = response.assert_response()
47 assert_response = response.assert_response()
48 assert_response.one_element_exists('input#q')
48 assert_response.one_element_exists('input#q')
49
49
50 def test_search_files_empty_search(self):
50 def test_search_files_empty_search(self):
51 if os.path.isdir(self.index_location):
51 if os.path.isdir(self.index_location):
52 pytest.skip('skipped due to existing index')
52 pytest.skip('skipped due to existing index')
53 else:
53 else:
54 self.log_user()
54 self.log_user()
55 response = self.app.get(route_path('search'),
55 response = self.app.get(route_path('search'),
56 {'q': HG_REPO})
56 {'q': HG_REPO})
57 response.mustcontain('There is no index to search in. '
57 response.mustcontain('There is no index to search in. '
58 'Please run whoosh indexer')
58 'Please run whoosh indexer')
59
59
60 def test_search_validation(self):
60 def test_search_validation(self):
61 self.log_user()
61 self.log_user()
62 response = self.app.get(route_path('search'),
62 response = self.app.get(route_path('search'),
63 {'q': query, 'type': 'content', 'page_limit': 1000})
63 {'q': query, 'type': 'content', 'page_limit': 1000})
64
64
65 response.mustcontain(
65 response.mustcontain(
66 'page_limit - 1000 is greater than maximum value 500')
66 'page_limit - 1000 is greater than maximum value 500')
67
67
68 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
68 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
69 ('todo', 23, [
69 ('todo', 23, [
70 'vcs/backends/hg/inmemory.py',
70 'vcs/backends/hg/inmemory.py',
71 'vcs/tests/test_git.py']),
71 'vcs/tests/test_git.py']),
72 ('extension:rst installation', 6, [
72 ('extension:rst installation', 6, [
73 'docs/index.rst',
73 'docs/index.rst',
74 'docs/installation.rst']),
74 'docs/installation.rst']),
75 ('def repo', 87, [
75 ('def repo', 87, [
76 'vcs/tests/test_git.py',
76 'vcs/tests/test_git.py',
77 'vcs/tests/test_changesets.py']),
77 'vcs/tests/test_changesets.py']),
78 ('repository:%s def test' % HG_REPO, 18, [
78 ('repository:%s def test' % HG_REPO, 18, [
79 'vcs/tests/test_git.py',
79 'vcs/tests/test_git.py',
80 'vcs/tests/test_changesets.py']),
80 'vcs/tests/test_changesets.py']),
81 ('"def main"', 9, [
81 ('"def main"', 9, [
82 'vcs/__init__.py',
82 'vcs/__init__.py',
83 'vcs/tests/__init__.py',
83 'vcs/tests/__init__.py',
84 'vcs/utils/progressbar.py']),
84 'vcs/utils/progressbar.py']),
85 ('owner:test_admin', 358, [
85 ('owner:test_admin', 358, [
86 'vcs/tests/base.py',
86 'vcs/tests/base.py',
87 'MANIFEST.in',
87 'MANIFEST.in',
88 'vcs/utils/termcolors.py',
88 'vcs/utils/termcolors.py',
89 'docs/theme/ADC/static/documentation.png']),
89 'docs/theme/ADC/static/documentation.png']),
90 ('owner:test_admin def main', 72, [
90 ('owner:test_admin def main', 72, [
91 'vcs/__init__.py',
91 'vcs/__init__.py',
92 'vcs/tests/test_utils_filesize.py',
92 'vcs/tests/test_utils_filesize.py',
93 'vcs/tests/test_cli.py']),
93 'vcs/tests/test_cli.py']),
94 ('owner:michaΕ‚ test', 0, []),
94 ('owner:michaΕ‚ test', 0, []),
95 ])
95 ])
96 def test_search_files(self, query, expected_hits, expected_paths):
96 def test_search_files(self, query, expected_hits, expected_paths):
97 self.log_user()
97 self.log_user()
98 response = self.app.get(route_path('search'),
98 response = self.app.get(route_path('search'),
99 {'q': query, 'type': 'content', 'page_limit': 500})
99 {'q': query, 'type': 'content', 'page_limit': 500})
100
100
101 response.mustcontain('%s results' % expected_hits)
101 response.mustcontain('%s results' % expected_hits)
102 for path in expected_paths:
102 for path in expected_paths:
103 response.mustcontain(path)
103 response.mustcontain(path)
104
104
105 @pytest.mark.parametrize("query, expected_hits, expected_commits", [
105 @pytest.mark.parametrize("query, expected_hits, expected_commits", [
106 ('bother to ask where to fetch repo during tests', 3, [
106 ('bother to ask where to fetch repo during tests', 3, [
107 ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1'),
107 ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1'),
108 ('git', 'c6eb379775c578a95dad8ddab53f963b80894850'),
108 ('git', 'c6eb379775c578a95dad8ddab53f963b80894850'),
109 ('svn', '98')]),
109 ('svn', '98')]),
110 ('michaΕ‚', 0, []),
110 ('michaΕ‚', 0, []),
111 ('changed:tests/utils.py', 36, [
111 ('changed:tests/utils.py', 36, [
112 ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')]),
112 ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')]),
113 ('changed:vcs/utils/archivers.py', 11, [
113 ('changed:vcs/utils/archivers.py', 11, [
114 ('hg', '25213a5fbb048dff8ba65d21e466a835536e5b70'),
114 ('hg', '25213a5fbb048dff8ba65d21e466a835536e5b70'),
115 ('hg', '47aedd538bf616eedcb0e7d630ea476df0e159c7'),
115 ('hg', '47aedd538bf616eedcb0e7d630ea476df0e159c7'),
116 ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
116 ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
117 ('hg', '04ad456aefd6461aea24f90b63954b6b1ce07b3e'),
117 ('hg', '04ad456aefd6461aea24f90b63954b6b1ce07b3e'),
118 ('git', 'c994f0de03b2a0aa848a04fc2c0d7e737dba31fc'),
118 ('git', 'c994f0de03b2a0aa848a04fc2c0d7e737dba31fc'),
119 ('git', 'd1f898326327e20524fe22417c22d71064fe54a1'),
119 ('git', 'd1f898326327e20524fe22417c22d71064fe54a1'),
120 ('git', 'fe568b4081755c12abf6ba673ba777fc02a415f3'),
120 ('git', 'fe568b4081755c12abf6ba673ba777fc02a415f3'),
121 ('git', 'bafe786f0d8c2ff7da5c1dcfcfa577de0b5e92f1')]),
121 ('git', 'bafe786f0d8c2ff7da5c1dcfcfa577de0b5e92f1')]),
122 ('added:README.rst', 3, [
122 ('added:README.rst', 3, [
123 ('hg', '3803844fdbd3b711175fc3da9bdacfcd6d29a6fb'),
123 ('hg', '3803844fdbd3b711175fc3da9bdacfcd6d29a6fb'),
124 ('git', 'ff7ca51e58c505fec0dd2491de52c622bb7a806b'),
124 ('git', 'ff7ca51e58c505fec0dd2491de52c622bb7a806b'),
125 ('svn', '8')]),
125 ('svn', '8')]),
126 ('changed:lazy.py', 15, [
126 ('changed:lazy.py', 15, [
127 ('hg', 'eaa291c5e6ae6126a203059de9854ccf7b5baa12'),
127 ('hg', 'eaa291c5e6ae6126a203059de9854ccf7b5baa12'),
128 ('git', '17438a11f72b93f56d0e08e7d1fa79a378578a82'),
128 ('git', '17438a11f72b93f56d0e08e7d1fa79a378578a82'),
129 ('svn', '82'),
129 ('svn', '82'),
130 ('svn', '262'),
130 ('svn', '262'),
131 ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
131 ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
132 ('git', '33fa3223355104431402a888fa77a4e9956feb3e')
132 ('git', '33fa3223355104431402a888fa77a4e9956feb3e')
133 ]),
133 ]),
134 ('author:marcin@python-blog.com '
134 ('author:marcin@python-blog.com '
135 'commit_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
135 'commit_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
136 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
136 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
137 ('b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
137 ('b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
138 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
138 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
139 ('b986218b', 1, [
139 ('b986218b', 1, [
140 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
140 ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
141 ])
141 ])
142 def test_search_commit_messages(
142 def test_search_commit_messages(
143 self, query, expected_hits, expected_commits, enabled_backends):
143 self, query, expected_hits, expected_commits, enabled_backends):
144 self.log_user()
144 self.log_user()
145 response = self.app.get(route_path('search'),
145 response = self.app.get(route_path('search'),
146 {'q': query, 'type': 'commit', 'page_limit': 500})
146 {'q': query, 'type': 'commit', 'page_limit': 500})
147
147
148 response.mustcontain('%s results' % expected_hits)
148 response.mustcontain('%s results' % expected_hits)
149 for backend, commit_id in expected_commits:
149 for backend, commit_id in expected_commits:
150 if backend in enabled_backends:
150 if backend in enabled_backends:
151 response.mustcontain(commit_id)
151 response.mustcontain(commit_id)
152
152
153 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
153 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
154 ('readme.rst', 3, []),
154 ('readme.rst', 3, []),
155 ('test*', 75, []),
155 ('test*', 75, []),
156 ('*model*', 1, []),
156 ('*model*', 1, []),
157 ('extension:rst', 48, []),
157 ('extension:rst', 48, []),
158 ('extension:rst api', 24, []),
158 ('extension:rst api', 24, []),
159 ])
159 ])
160 def test_search_file_paths(self, query, expected_hits, expected_paths):
160 def test_search_file_paths(self, query, expected_hits, expected_paths):
161 self.log_user()
161 self.log_user()
162 response = self.app.get(route_path('search'),
162 response = self.app.get(route_path('search'),
163 {'q': query, 'type': 'path', 'page_limit': 500})
163 {'q': query, 'type': 'path', 'page_limit': 500})
164
164
165 response.mustcontain('%s results' % expected_hits)
165 response.mustcontain('%s results' % expected_hits)
166 for path in expected_paths:
166 for path in expected_paths:
167 response.mustcontain(path)
167 response.mustcontain(path)
168
168
169 def test_search_commit_message_specific_repo(self, backend):
169 def test_search_commit_message_specific_repo(self, backend):
170 self.log_user()
170 self.log_user()
171 response = self.app.get(
171 response = self.app.get(
172 route_path('search_repo',repo_name=backend.repo_name),
172 route_path('search_repo',repo_name=backend.repo_name),
173 {'q': 'bother to ask where to fetch repo during tests',
173 {'q': 'bother to ask where to fetch repo during tests',
174 'type': 'commit'})
174 'type': 'commit'})
175
175
176 response.mustcontain('1 results')
176 response.mustcontain('1 results')
177
177
178 def test_filters_are_not_applied_for_admin_user(self):
178 def test_filters_are_not_applied_for_admin_user(self):
179 self.log_user()
179 self.log_user()
180 with mock.patch('whoosh.searching.Searcher.search') as search_mock:
180 with mock.patch('whoosh.searching.Searcher.search') as search_mock:
181
181 search_mock.return_value = mock.MagicMock(
182 scored_length=lambda: 100,
183 runtime=10
184 )
182 self.app.get(route_path('search'),
185 self.app.get(route_path('search'),
183 {'q': 'test query', 'type': 'commit'})
186 {'q': 'test query', 'type': 'commit'})
184 assert search_mock.call_count == 1
187 assert search_mock.call_count == 1
185 _, kwargs = search_mock.call_args
188 _, kwargs = search_mock.call_args
186 assert kwargs['filter'] is None
189 assert kwargs['filter'] is None
187
190
188 def test_filters_are_applied_for_normal_user(self, enabled_backends):
191 def test_filters_are_applied_for_normal_user(self, enabled_backends):
189 self.log_user(TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
192 self.log_user(TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
190 with mock.patch('whoosh.searching.Searcher.search') as search_mock:
193 with mock.patch('whoosh.searching.Searcher.search') as search_mock:
194 search_mock.return_value = mock.MagicMock(
195 scored_length=lambda: 100,
196 runtime=10
197 )
191 self.app.get(route_path('search'),
198 self.app.get(route_path('search'),
192 {'q': 'test query', 'type': 'commit'})
199 {'q': 'test query', 'type': 'commit'})
193 assert search_mock.call_count == 1
200 assert search_mock.call_count == 1
194 _, kwargs = search_mock.call_args
201 _, kwargs = search_mock.call_args
195 assert isinstance(kwargs['filter'], query.Or)
202 assert isinstance(kwargs['filter'], query.Or)
196 expected_repositories = [
203 expected_repositories = [
197 f'vcs_test_{b}' for b in enabled_backends]
204 f'vcs_test_{b}' for b in enabled_backends]
198 queried_repositories = [
205 queried_repositories = [
199 name for type_, name in kwargs['filter'].all_terms()]
206 name for type_, name in kwargs['filter'].all_terms()]
200 for repository in expected_repositories:
207 for repository in expected_repositories:
201 assert repository in queried_repositories
208 assert repository in queried_repositories
@@ -1,311 +1,311 b''
1
1
2
2
3 # Copyright (C) 2012-2023 RhodeCode GmbH
3 # Copyright (C) 2012-2023 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21 """
21 """
22 Index schema for RhodeCode
22 Index schema for RhodeCode
23 """
23 """
24
24
25
25
26 import os
26 import os
27 import re
27 import re
28 import logging
28 import logging
29
29
30 from whoosh import query as query_lib
30 from whoosh import query as query_lib
31 from whoosh.highlight import HtmlFormatter, ContextFragmenter
31 from whoosh.highlight import HtmlFormatter, ContextFragmenter
32 from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
32 from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
33 from whoosh.qparser import QueryParser, QueryParserError
33 from whoosh.qparser import QueryParser, QueryParserError
34
34
35 import rhodecode.lib.helpers as h
35 import rhodecode.lib.helpers as h
36 from rhodecode.lib.index import BaseSearcher
36 from rhodecode.lib.index import BaseSearcher
37 from rhodecode.lib.str_utils import safe_str
37 from rhodecode.lib.str_utils import safe_str
38
38
39 log = logging.getLogger(__name__)
39 log = logging.getLogger(__name__)
40
40
41
41
42 try:
42 try:
43 # we first try to import from rhodecode tools, fallback to copies if
43 # we first try to import from rhodecode tools, fallback to copies if
44 # we're unable to
44 # we're unable to
45 from rhodecode_tools.lib.fts_index.whoosh_schema import (
45 from rhodecode_tools.lib.fts_index.whoosh_schema import (
46 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
46 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
47 COMMIT_SCHEMA)
47 COMMIT_SCHEMA)
48 except ImportError:
48 except ImportError:
49 log.warning('rhodecode_tools schema not available, doing a fallback '
49 log.warning('rhodecode_tools schema not available, doing a fallback '
50 'import from `rhodecode.lib.index.whoosh_fallback_schema`')
50 'import from `rhodecode.lib.index.whoosh_fallback_schema`')
51 from rhodecode.lib.index.whoosh_fallback_schema import (
51 from rhodecode.lib.index.whoosh_fallback_schema import (
52 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
52 ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
53 COMMIT_SCHEMA)
53 COMMIT_SCHEMA)
54
54
55
55
56 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
56 FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
57 FRAGMENTER = ContextFragmenter(200)
57 FRAGMENTER = ContextFragmenter(200)
58
58
59 log = logging.getLogger(__name__)
59 log = logging.getLogger(__name__)
60
60
61
61
62 class WhooshSearcher(BaseSearcher):
62 class WhooshSearcher(BaseSearcher):
63 # this also shows in UI
63 # this also shows in UI
64 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
64 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
65 name = 'whoosh'
65 name = 'whoosh'
66
66
67 def __init__(self, config):
67 def __init__(self, config):
68 super(Searcher, self).__init__()
68 super(Searcher, self).__init__()
69 self.config = config
69 self.config = config
70 if not os.path.isdir(self.config['location']):
70 if not os.path.isdir(self.config['location']):
71 os.makedirs(self.config['location'])
71 os.makedirs(self.config['location'])
72
72
73 opener = create_in
73 opener = create_in
74 if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
74 if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
75 opener = open_dir
75 opener = open_dir
76 file_index = opener(self.config['location'], schema=FILE_SCHEMA,
76 file_index = opener(self.config['location'], schema=FILE_SCHEMA,
77 indexname=FILE_INDEX_NAME)
77 indexname=FILE_INDEX_NAME)
78
78
79 opener = create_in
79 opener = create_in
80 if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
80 if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
81 opener = open_dir
81 opener = open_dir
82 changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
82 changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
83 indexname=COMMIT_INDEX_NAME)
83 indexname=COMMIT_INDEX_NAME)
84
84
85 self.commit_schema = COMMIT_SCHEMA
85 self.commit_schema = COMMIT_SCHEMA
86 self.commit_index = changeset_index
86 self.commit_index = changeset_index
87 self.file_schema = FILE_SCHEMA
87 self.file_schema = FILE_SCHEMA
88 self.file_index = file_index
88 self.file_index = file_index
89 self.searcher = None
89 self.searcher = None
90
90
91 def cleanup(self):
91 def cleanup(self):
92 if self.searcher:
92 if self.searcher:
93 self.searcher.close()
93 self.searcher.close()
94
94
95 def _extend_query(self, query):
95 def _extend_query(self, query):
96 hashes = re.compile('([0-9a-f]{5,40})').findall(query)
96 hashes = re.compile('([0-9a-f]{5,40})').findall(query)
97 if hashes:
97 if hashes:
98 hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
98 hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
99 query = u'(%s) OR %s' % (query, hashes_or_query)
99 query = u'(%s) OR %s' % (query, hashes_or_query)
100 return query
100 return query
101
101
102 def sort_def(self, search_type, direction, sort_field):
102 def sort_def(self, search_type, direction, sort_field):
103
103
104 if search_type == 'commit':
104 if search_type == 'commit':
105 field_defs = {
105 field_defs = {
106 'message': 'message',
106 'message': 'message',
107 'date': 'date',
107 'date': 'date',
108 'author_email': 'author',
108 'author_email': 'author',
109 }
109 }
110 elif search_type == 'path':
110 elif search_type == 'path':
111 field_defs = {
111 field_defs = {
112 'file': 'path',
112 'file': 'path',
113 'size': 'size',
113 'size': 'size',
114 'lines': 'lines',
114 'lines': 'lines',
115 }
115 }
116 elif search_type == 'content':
116 elif search_type == 'content':
117 # NOTE(dan): content doesn't support any sorting
117 # NOTE(dan): content doesn't support any sorting
118 field_defs = {}
118 field_defs = {}
119 else:
119 else:
120 return ''
120 return ''
121
121
122 if sort_field in field_defs:
122 if sort_field in field_defs:
123 return field_defs[sort_field]
123 return field_defs[sort_field]
124
124
125 def search(self, query, document_type, search_user,
125 def search(self, query, document_type, search_user,
126 repo_name=None, repo_group_name=None,
126 repo_name=None, repo_group_name=None,
127 requested_page=1, page_limit=10, sort=None, raise_on_exc=True):
127 requested_page=1, page_limit=10, sort=None, raise_on_exc=True):
128
128
129 original_query = query
129 original_query = query
130 query = self._extend_query(query)
130 query = self._extend_query(query)
131
131
132 log.debug(u'QUERY: %s on %s', query, document_type)
132 log.debug('QUERY: %s on %s', query, document_type)
133 result = {
133 result = {
134 'results': [],
134 'results': [],
135 'count': 0,
135 'count': 0,
136 'error': None,
136 'error': None,
137 'runtime': 0
137 'runtime': 0
138 }
138 }
139 search_type, index_name, schema_defn = self._prepare_for_search(
139 search_type, index_name, schema_defn = self._prepare_for_search(
140 document_type)
140 document_type)
141 self._init_searcher(index_name)
141 self._init_searcher(index_name)
142 try:
142 try:
143 qp = QueryParser(search_type, schema=schema_defn)
143 qp = QueryParser(search_type, schema=schema_defn)
144 allowed_repos_filter = self._get_repo_filter(
144 allowed_repos_filter = self._get_repo_filter(
145 search_user, repo_name)
145 search_user, repo_name)
146 try:
146 try:
147 query = qp.parse(safe_str(query))
147 query = qp.parse(safe_str(query))
148 log.debug('query: %s (%s)', query, repr(query))
148 log.debug('query: %s (%s)', query, repr(query))
149
149
150 reverse, sorted_by = False, None
150 reverse, sorted_by = False, None
151 direction, sort_field = self.get_sort(search_type, sort)
151 direction, sort_field = self.get_sort(search_type, sort)
152 if sort_field:
152 if sort_field:
153 sort_definition = self.sort_def(search_type, direction, sort_field)
153 sort_definition = self.sort_def(search_type, direction, sort_field)
154 if sort_definition:
154 if sort_definition:
155 sorted_by = sort_definition
155 sorted_by = sort_definition
156 if direction == Searcher.DIRECTION_DESC:
156 if direction == Searcher.DIRECTION_DESC:
157 reverse = True
157 reverse = True
158 if direction == Searcher.DIRECTION_ASC:
158 if direction == Searcher.DIRECTION_ASC:
159 reverse = False
159 reverse = False
160
160
161 whoosh_results = self.searcher.search(
161 whoosh_results = self.searcher.search(
162 query, filter=allowed_repos_filter, limit=None,
162 query, filter=allowed_repos_filter, limit=None,
163 sortedby=sorted_by, reverse=reverse)
163 sortedby=sorted_by, reverse=reverse)
164
164
165 # fixes for 32k limit that whoosh uses for highlight
165 # fixes for 32k limit that whoosh uses for highlight
166 whoosh_results.fragmenter.charlimit = None
166 whoosh_results.fragmenter.charlimit = None
167 res_ln = whoosh_results.scored_length()
167 res_ln = whoosh_results.scored_length()
168 result['runtime'] = whoosh_results.runtime
168 result['runtime'] = whoosh_results.runtime
169 result['count'] = res_ln
169 result['count'] = res_ln
170 result['results'] = WhooshResultWrapper(
170 result['results'] = WhooshResultWrapper(
171 search_type, res_ln, whoosh_results)
171 search_type, res_ln, whoosh_results)
172
172
173 except QueryParserError:
173 except QueryParserError:
174 result['error'] = 'Invalid search query. Try quoting it.'
174 result['error'] = 'Invalid search query. Try quoting it.'
175 except (EmptyIndexError, IOError, OSError):
175 except (EmptyIndexError, IOError, OSError):
176 msg = 'There is no index to search in. Please run whoosh indexer'
176 msg = 'There is no index to search in. Please run whoosh indexer'
177 log.exception(msg)
177 log.exception(msg)
178 result['error'] = msg
178 result['error'] = msg
179 except Exception:
179 except Exception:
180 msg = 'An error occurred during this search operation'
180 msg = 'An error occurred during this search operation'
181 log.exception(msg)
181 log.exception(msg)
182 result['error'] = msg
182 result['error'] = msg
183
183
184 return result
184 return result
185
185
186 def statistics(self, translator):
186 def statistics(self, translator):
187 _ = translator
187 _ = translator
188 stats = [
188 stats = [
189 {'key': _('Index Type'), 'value': 'Whoosh'},
189 {'key': _('Index Type'), 'value': 'Whoosh'},
190 {'sep': True},
190 {'sep': True},
191
191
192 {'key': _('File Index'), 'value': str(self.file_index)},
192 {'key': _('File Index'), 'value': str(self.file_index)},
193 {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
193 {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
194 {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
194 {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
195
195
196 {'sep': True},
196 {'sep': True},
197
197
198 {'key': _('Commit index'), 'value': str(self.commit_index)},
198 {'key': _('Commit index'), 'value': str(self.commit_index)},
199 {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
199 {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
200 {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
200 {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
201 ]
201 ]
202 return stats
202 return stats
203
203
204 def _get_repo_filter(self, auth_user, repo_name):
204 def _get_repo_filter(self, auth_user, repo_name):
205
205
206 allowed_to_search = [
206 allowed_to_search = [
207 repo for repo, perm in
207 repo for repo, perm in
208 auth_user.permissions['repositories'].items()
208 auth_user.permissions['repositories'].items()
209 if perm != 'repository.none']
209 if perm != 'repository.none']
210
210
211 if repo_name:
211 if repo_name:
212 repo_filter = [query_lib.Term('repository', repo_name)]
212 repo_filter = [query_lib.Term('repository', repo_name)]
213
213
214 elif 'hg.admin' in auth_user.permissions.get('global', []):
214 elif 'hg.admin' in auth_user.permissions.get('global', []):
215 return None
215 return None
216
216
217 else:
217 else:
218 repo_filter = [query_lib.Term('repository', _rn)
218 repo_filter = [query_lib.Term('repository', _rn)
219 for _rn in allowed_to_search]
219 for _rn in allowed_to_search]
220 # in case we're not allowed to search anywhere, it's a trick
220 # in case we're not allowed to search anywhere, it's a trick
221 # to tell whoosh we're filtering, on ALL results
221 # to tell whoosh we're filtering, on ALL results
222 repo_filter = repo_filter or [query_lib.Term('repository', '')]
222 repo_filter = repo_filter or [query_lib.Term('repository', '')]
223
223
224 return query_lib.Or(repo_filter)
224 return query_lib.Or(repo_filter)
225
225
226 def _prepare_for_search(self, cur_type):
226 def _prepare_for_search(self, cur_type):
227 search_type = {
227 search_type = {
228 'content': 'content',
228 'content': 'content',
229 'commit': 'message',
229 'commit': 'message',
230 'path': 'path',
230 'path': 'path',
231 'repository': 'repository'
231 'repository': 'repository'
232 }.get(cur_type, 'content')
232 }.get(cur_type, 'content')
233
233
234 index_name = {
234 index_name = {
235 'content': FILE_INDEX_NAME,
235 'content': FILE_INDEX_NAME,
236 'commit': COMMIT_INDEX_NAME,
236 'commit': COMMIT_INDEX_NAME,
237 'path': FILE_INDEX_NAME
237 'path': FILE_INDEX_NAME
238 }.get(cur_type, FILE_INDEX_NAME)
238 }.get(cur_type, FILE_INDEX_NAME)
239
239
240 schema_defn = {
240 schema_defn = {
241 'content': self.file_schema,
241 'content': self.file_schema,
242 'commit': self.commit_schema,
242 'commit': self.commit_schema,
243 'path': self.file_schema
243 'path': self.file_schema
244 }.get(cur_type, self.file_schema)
244 }.get(cur_type, self.file_schema)
245
245
246 log.debug('IDX: %s', index_name)
246 log.debug('IDX: %s', index_name)
247 log.debug('SCHEMA: %s', schema_defn)
247 log.debug('SCHEMA: %s', schema_defn)
248 return search_type, index_name, schema_defn
248 return search_type, index_name, schema_defn
249
249
250 def _init_searcher(self, index_name):
250 def _init_searcher(self, index_name):
251 idx = open_dir(self.config['location'], indexname=index_name)
251 idx = open_dir(self.config['location'], indexname=index_name)
252 self.searcher = idx.searcher()
252 self.searcher = idx.searcher()
253 return self.searcher
253 return self.searcher
254
254
255
255
256 Searcher = WhooshSearcher
256 Searcher = WhooshSearcher
257
257
258
258
259 class WhooshResultWrapper(object):
259 class WhooshResultWrapper(object):
260 def __init__(self, search_type, total_hits, results):
260 def __init__(self, search_type, total_hits, results):
261 self.search_type = search_type
261 self.search_type = search_type
262 self.results = results
262 self.results = results
263 self.total_hits = total_hits
263 self.total_hits = total_hits
264
264
265 def __str__(self):
265 def __str__(self):
266 return '<%s at %s>' % (self.__class__.__name__, len(self))
266 return '<%s at %s>' % (self.__class__.__name__, len(self))
267
267
268 def __repr__(self):
268 def __repr__(self):
269 return self.__str__()
269 return self.__str__()
270
270
271 def __len__(self):
271 def __len__(self):
272 return self.total_hits
272 return self.total_hits
273
273
274 def __iter__(self):
274 def __iter__(self):
275 """
275 """
276 Allows Iteration over results,and lazy generate content
276 Allows Iteration over results,and lazy generate content
277
277
278 *Requires* implementation of ``__getitem__`` method.
278 *Requires* implementation of ``__getitem__`` method.
279 """
279 """
280 for hit in self.results:
280 for hit in self.results:
281 yield self.get_full_content(hit)
281 yield self.get_full_content(hit)
282
282
283 def __getitem__(self, key):
283 def __getitem__(self, key):
284 """
284 """
285 Slicing of resultWrapper
285 Slicing of resultWrapper
286 """
286 """
287 i, j = key.start, key.stop
287 i, j = key.start, key.stop
288 for hit in self.results[i:j]:
288 for hit in self.results[i:j]:
289 yield self.get_full_content(hit)
289 yield self.get_full_content(hit)
290
290
291 def get_full_content(self, hit):
291 def get_full_content(self, hit):
292 # TODO: marcink: this feels like an overkill, there's a lot of data
292 # TODO: marcink: this feels like an overkill, there's a lot of data
293 # inside hit object, and we don't need all
293 # inside hit object, and we don't need all
294 res = dict(hit)
294 res = dict(hit)
295 # elastic search uses that, we set it empty so it fallbacks to regular HL logic
295 # elastic search uses that, we set it empty so it fallbacks to regular HL logic
296 res['content_highlight'] = ''
296 res['content_highlight'] = ''
297
297
298 f_path = '' # pragma: no cover
298 f_path = '' # pragma: no cover
299 if self.search_type in ['content', 'path']:
299 if self.search_type in ['content', 'path']:
300 f_path = res['path'][len(res['repository']):]
300 f_path = res['path'][len(res['repository']):]
301 f_path = f_path.lstrip(os.sep)
301 f_path = f_path.lstrip(os.sep)
302
302
303 if self.search_type == 'content':
303 if self.search_type == 'content':
304 res.update({'content_short_hl': hit.highlights('content'),
304 res.update({'content_short_hl': hit.highlights('content'),
305 'f_path': f_path})
305 'f_path': f_path})
306 elif self.search_type == 'path':
306 elif self.search_type == 'path':
307 res.update({'f_path': f_path})
307 res.update({'f_path': f_path})
308 elif self.search_type == 'message':
308 elif self.search_type == 'message':
309 res.update({'message_hl': hit.highlights('message')})
309 res.update({'message_hl': hit.highlights('message')})
310
310
311 return res
311 return res
General Comments 0
You need to be logged in to leave comments. Login now