Show More
@@ -1,201 +1,208 b'' | |||||
1 | # Copyright (C) 2010-2023 RhodeCode GmbH |
|
1 | # Copyright (C) 2010-2023 RhodeCode GmbH | |
2 | # |
|
2 | # | |
3 | # This program is free software: you can redistribute it and/or modify |
|
3 | # This program is free software: you can redistribute it and/or modify | |
4 | # it under the terms of the GNU Affero General Public License, version 3 |
|
4 | # it under the terms of the GNU Affero General Public License, version 3 | |
5 | # (only), as published by the Free Software Foundation. |
|
5 | # (only), as published by the Free Software Foundation. | |
6 | # |
|
6 | # | |
7 | # This program is distributed in the hope that it will be useful, |
|
7 | # This program is distributed in the hope that it will be useful, | |
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 | # GNU General Public License for more details. |
|
10 | # GNU General Public License for more details. | |
11 | # |
|
11 | # | |
12 | # You should have received a copy of the GNU Affero General Public License |
|
12 | # You should have received a copy of the GNU Affero General Public License | |
13 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
13 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
14 | # |
|
14 | # | |
15 | # This program is dual-licensed. If you wish to learn more about the |
|
15 | # This program is dual-licensed. If you wish to learn more about the | |
16 | # RhodeCode Enterprise Edition, including its added features, Support services, |
|
16 | # RhodeCode Enterprise Edition, including its added features, Support services, | |
17 | # and proprietary license terms, please see https://rhodecode.com/licenses/ |
|
17 | # and proprietary license terms, please see https://rhodecode.com/licenses/ | |
18 |
|
18 | |||
19 | import os |
|
19 | import os | |
20 |
|
20 | |||
21 | import mock |
|
21 | import mock | |
22 | import pytest |
|
22 | import pytest | |
23 | from whoosh import query |
|
23 | from whoosh import query | |
24 |
|
24 | |||
25 | from rhodecode.tests import ( |
|
25 | from rhodecode.tests import ( | |
26 | TestController, route_path_generator, HG_REPO, |
|
26 | TestController, route_path_generator, HG_REPO, | |
27 | TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS) |
|
27 | TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS) | |
28 | from rhodecode.tests.utils import AssertResponse |
|
28 | from rhodecode.tests.utils import AssertResponse | |
29 |
|
29 | |||
30 |
|
30 | |||
31 | def route_path(name, params=None, **kwargs): |
|
31 | def route_path(name, params=None, **kwargs): | |
32 | from rhodecode.apps._base import ADMIN_PREFIX |
|
32 | from rhodecode.apps._base import ADMIN_PREFIX | |
33 | url_defs = { |
|
33 | url_defs = { | |
34 | 'search': |
|
34 | 'search': | |
35 | ADMIN_PREFIX + '/search', |
|
35 | ADMIN_PREFIX + '/search', | |
36 | 'search_repo': |
|
36 | 'search_repo': | |
37 | '/{repo_name}/search', |
|
37 | '/{repo_name}/search', | |
38 | } |
|
38 | } | |
39 | return route_path_generator(url_defs, name=name, params=params, **kwargs) |
|
39 | return route_path_generator(url_defs, name=name, params=params, **kwargs) | |
40 |
|
40 | |||
41 |
|
41 | |||
42 | class TestSearchController(TestController): |
|
42 | class TestSearchController(TestController): | |
43 |
|
43 | |||
44 | def test_index(self): |
|
44 | def test_index(self): | |
45 | self.log_user() |
|
45 | self.log_user() | |
46 | response = self.app.get(route_path('search')) |
|
46 | response = self.app.get(route_path('search')) | |
47 | assert_response = response.assert_response() |
|
47 | assert_response = response.assert_response() | |
48 | assert_response.one_element_exists('input#q') |
|
48 | assert_response.one_element_exists('input#q') | |
49 |
|
49 | |||
50 | def test_search_files_empty_search(self): |
|
50 | def test_search_files_empty_search(self): | |
51 | if os.path.isdir(self.index_location): |
|
51 | if os.path.isdir(self.index_location): | |
52 | pytest.skip('skipped due to existing index') |
|
52 | pytest.skip('skipped due to existing index') | |
53 | else: |
|
53 | else: | |
54 | self.log_user() |
|
54 | self.log_user() | |
55 | response = self.app.get(route_path('search'), |
|
55 | response = self.app.get(route_path('search'), | |
56 | {'q': HG_REPO}) |
|
56 | {'q': HG_REPO}) | |
57 | response.mustcontain('There is no index to search in. ' |
|
57 | response.mustcontain('There is no index to search in. ' | |
58 | 'Please run whoosh indexer') |
|
58 | 'Please run whoosh indexer') | |
59 |
|
59 | |||
60 | def test_search_validation(self): |
|
60 | def test_search_validation(self): | |
61 | self.log_user() |
|
61 | self.log_user() | |
62 | response = self.app.get(route_path('search'), |
|
62 | response = self.app.get(route_path('search'), | |
63 | {'q': query, 'type': 'content', 'page_limit': 1000}) |
|
63 | {'q': query, 'type': 'content', 'page_limit': 1000}) | |
64 |
|
64 | |||
65 | response.mustcontain( |
|
65 | response.mustcontain( | |
66 | 'page_limit - 1000 is greater than maximum value 500') |
|
66 | 'page_limit - 1000 is greater than maximum value 500') | |
67 |
|
67 | |||
68 | @pytest.mark.parametrize("query, expected_hits, expected_paths", [ |
|
68 | @pytest.mark.parametrize("query, expected_hits, expected_paths", [ | |
69 | ('todo', 23, [ |
|
69 | ('todo', 23, [ | |
70 | 'vcs/backends/hg/inmemory.py', |
|
70 | 'vcs/backends/hg/inmemory.py', | |
71 | 'vcs/tests/test_git.py']), |
|
71 | 'vcs/tests/test_git.py']), | |
72 | ('extension:rst installation', 6, [ |
|
72 | ('extension:rst installation', 6, [ | |
73 | 'docs/index.rst', |
|
73 | 'docs/index.rst', | |
74 | 'docs/installation.rst']), |
|
74 | 'docs/installation.rst']), | |
75 | ('def repo', 87, [ |
|
75 | ('def repo', 87, [ | |
76 | 'vcs/tests/test_git.py', |
|
76 | 'vcs/tests/test_git.py', | |
77 | 'vcs/tests/test_changesets.py']), |
|
77 | 'vcs/tests/test_changesets.py']), | |
78 | ('repository:%s def test' % HG_REPO, 18, [ |
|
78 | ('repository:%s def test' % HG_REPO, 18, [ | |
79 | 'vcs/tests/test_git.py', |
|
79 | 'vcs/tests/test_git.py', | |
80 | 'vcs/tests/test_changesets.py']), |
|
80 | 'vcs/tests/test_changesets.py']), | |
81 | ('"def main"', 9, [ |
|
81 | ('"def main"', 9, [ | |
82 | 'vcs/__init__.py', |
|
82 | 'vcs/__init__.py', | |
83 | 'vcs/tests/__init__.py', |
|
83 | 'vcs/tests/__init__.py', | |
84 | 'vcs/utils/progressbar.py']), |
|
84 | 'vcs/utils/progressbar.py']), | |
85 | ('owner:test_admin', 358, [ |
|
85 | ('owner:test_admin', 358, [ | |
86 | 'vcs/tests/base.py', |
|
86 | 'vcs/tests/base.py', | |
87 | 'MANIFEST.in', |
|
87 | 'MANIFEST.in', | |
88 | 'vcs/utils/termcolors.py', |
|
88 | 'vcs/utils/termcolors.py', | |
89 | 'docs/theme/ADC/static/documentation.png']), |
|
89 | 'docs/theme/ADC/static/documentation.png']), | |
90 | ('owner:test_admin def main', 72, [ |
|
90 | ('owner:test_admin def main', 72, [ | |
91 | 'vcs/__init__.py', |
|
91 | 'vcs/__init__.py', | |
92 | 'vcs/tests/test_utils_filesize.py', |
|
92 | 'vcs/tests/test_utils_filesize.py', | |
93 | 'vcs/tests/test_cli.py']), |
|
93 | 'vcs/tests/test_cli.py']), | |
94 | ('owner:michaΕ test', 0, []), |
|
94 | ('owner:michaΕ test', 0, []), | |
95 | ]) |
|
95 | ]) | |
96 | def test_search_files(self, query, expected_hits, expected_paths): |
|
96 | def test_search_files(self, query, expected_hits, expected_paths): | |
97 | self.log_user() |
|
97 | self.log_user() | |
98 | response = self.app.get(route_path('search'), |
|
98 | response = self.app.get(route_path('search'), | |
99 | {'q': query, 'type': 'content', 'page_limit': 500}) |
|
99 | {'q': query, 'type': 'content', 'page_limit': 500}) | |
100 |
|
100 | |||
101 | response.mustcontain('%s results' % expected_hits) |
|
101 | response.mustcontain('%s results' % expected_hits) | |
102 | for path in expected_paths: |
|
102 | for path in expected_paths: | |
103 | response.mustcontain(path) |
|
103 | response.mustcontain(path) | |
104 |
|
104 | |||
105 | @pytest.mark.parametrize("query, expected_hits, expected_commits", [ |
|
105 | @pytest.mark.parametrize("query, expected_hits, expected_commits", [ | |
106 | ('bother to ask where to fetch repo during tests', 3, [ |
|
106 | ('bother to ask where to fetch repo during tests', 3, [ | |
107 | ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1'), |
|
107 | ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1'), | |
108 | ('git', 'c6eb379775c578a95dad8ddab53f963b80894850'), |
|
108 | ('git', 'c6eb379775c578a95dad8ddab53f963b80894850'), | |
109 | ('svn', '98')]), |
|
109 | ('svn', '98')]), | |
110 | ('michaΕ', 0, []), |
|
110 | ('michaΕ', 0, []), | |
111 | ('changed:tests/utils.py', 36, [ |
|
111 | ('changed:tests/utils.py', 36, [ | |
112 | ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')]), |
|
112 | ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')]), | |
113 | ('changed:vcs/utils/archivers.py', 11, [ |
|
113 | ('changed:vcs/utils/archivers.py', 11, [ | |
114 | ('hg', '25213a5fbb048dff8ba65d21e466a835536e5b70'), |
|
114 | ('hg', '25213a5fbb048dff8ba65d21e466a835536e5b70'), | |
115 | ('hg', '47aedd538bf616eedcb0e7d630ea476df0e159c7'), |
|
115 | ('hg', '47aedd538bf616eedcb0e7d630ea476df0e159c7'), | |
116 | ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'), |
|
116 | ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'), | |
117 | ('hg', '04ad456aefd6461aea24f90b63954b6b1ce07b3e'), |
|
117 | ('hg', '04ad456aefd6461aea24f90b63954b6b1ce07b3e'), | |
118 | ('git', 'c994f0de03b2a0aa848a04fc2c0d7e737dba31fc'), |
|
118 | ('git', 'c994f0de03b2a0aa848a04fc2c0d7e737dba31fc'), | |
119 | ('git', 'd1f898326327e20524fe22417c22d71064fe54a1'), |
|
119 | ('git', 'd1f898326327e20524fe22417c22d71064fe54a1'), | |
120 | ('git', 'fe568b4081755c12abf6ba673ba777fc02a415f3'), |
|
120 | ('git', 'fe568b4081755c12abf6ba673ba777fc02a415f3'), | |
121 | ('git', 'bafe786f0d8c2ff7da5c1dcfcfa577de0b5e92f1')]), |
|
121 | ('git', 'bafe786f0d8c2ff7da5c1dcfcfa577de0b5e92f1')]), | |
122 | ('added:README.rst', 3, [ |
|
122 | ('added:README.rst', 3, [ | |
123 | ('hg', '3803844fdbd3b711175fc3da9bdacfcd6d29a6fb'), |
|
123 | ('hg', '3803844fdbd3b711175fc3da9bdacfcd6d29a6fb'), | |
124 | ('git', 'ff7ca51e58c505fec0dd2491de52c622bb7a806b'), |
|
124 | ('git', 'ff7ca51e58c505fec0dd2491de52c622bb7a806b'), | |
125 | ('svn', '8')]), |
|
125 | ('svn', '8')]), | |
126 | ('changed:lazy.py', 15, [ |
|
126 | ('changed:lazy.py', 15, [ | |
127 | ('hg', 'eaa291c5e6ae6126a203059de9854ccf7b5baa12'), |
|
127 | ('hg', 'eaa291c5e6ae6126a203059de9854ccf7b5baa12'), | |
128 | ('git', '17438a11f72b93f56d0e08e7d1fa79a378578a82'), |
|
128 | ('git', '17438a11f72b93f56d0e08e7d1fa79a378578a82'), | |
129 | ('svn', '82'), |
|
129 | ('svn', '82'), | |
130 | ('svn', '262'), |
|
130 | ('svn', '262'), | |
131 | ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'), |
|
131 | ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'), | |
132 | ('git', '33fa3223355104431402a888fa77a4e9956feb3e') |
|
132 | ('git', '33fa3223355104431402a888fa77a4e9956feb3e') | |
133 | ]), |
|
133 | ]), | |
134 | ('author:marcin@python-blog.com ' |
|
134 | ('author:marcin@python-blog.com ' | |
135 | 'commit_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [ |
|
135 | 'commit_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [ | |
136 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), |
|
136 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), | |
137 | ('b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [ |
|
137 | ('b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [ | |
138 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), |
|
138 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), | |
139 | ('b986218b', 1, [ |
|
139 | ('b986218b', 1, [ | |
140 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), |
|
140 | ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]), | |
141 | ]) |
|
141 | ]) | |
142 | def test_search_commit_messages( |
|
142 | def test_search_commit_messages( | |
143 | self, query, expected_hits, expected_commits, enabled_backends): |
|
143 | self, query, expected_hits, expected_commits, enabled_backends): | |
144 | self.log_user() |
|
144 | self.log_user() | |
145 | response = self.app.get(route_path('search'), |
|
145 | response = self.app.get(route_path('search'), | |
146 | {'q': query, 'type': 'commit', 'page_limit': 500}) |
|
146 | {'q': query, 'type': 'commit', 'page_limit': 500}) | |
147 |
|
147 | |||
148 | response.mustcontain('%s results' % expected_hits) |
|
148 | response.mustcontain('%s results' % expected_hits) | |
149 | for backend, commit_id in expected_commits: |
|
149 | for backend, commit_id in expected_commits: | |
150 | if backend in enabled_backends: |
|
150 | if backend in enabled_backends: | |
151 | response.mustcontain(commit_id) |
|
151 | response.mustcontain(commit_id) | |
152 |
|
152 | |||
153 | @pytest.mark.parametrize("query, expected_hits, expected_paths", [ |
|
153 | @pytest.mark.parametrize("query, expected_hits, expected_paths", [ | |
154 | ('readme.rst', 3, []), |
|
154 | ('readme.rst', 3, []), | |
155 | ('test*', 75, []), |
|
155 | ('test*', 75, []), | |
156 | ('*model*', 1, []), |
|
156 | ('*model*', 1, []), | |
157 | ('extension:rst', 48, []), |
|
157 | ('extension:rst', 48, []), | |
158 | ('extension:rst api', 24, []), |
|
158 | ('extension:rst api', 24, []), | |
159 | ]) |
|
159 | ]) | |
160 | def test_search_file_paths(self, query, expected_hits, expected_paths): |
|
160 | def test_search_file_paths(self, query, expected_hits, expected_paths): | |
161 | self.log_user() |
|
161 | self.log_user() | |
162 | response = self.app.get(route_path('search'), |
|
162 | response = self.app.get(route_path('search'), | |
163 | {'q': query, 'type': 'path', 'page_limit': 500}) |
|
163 | {'q': query, 'type': 'path', 'page_limit': 500}) | |
164 |
|
164 | |||
165 | response.mustcontain('%s results' % expected_hits) |
|
165 | response.mustcontain('%s results' % expected_hits) | |
166 | for path in expected_paths: |
|
166 | for path in expected_paths: | |
167 | response.mustcontain(path) |
|
167 | response.mustcontain(path) | |
168 |
|
168 | |||
169 | def test_search_commit_message_specific_repo(self, backend): |
|
169 | def test_search_commit_message_specific_repo(self, backend): | |
170 | self.log_user() |
|
170 | self.log_user() | |
171 | response = self.app.get( |
|
171 | response = self.app.get( | |
172 | route_path('search_repo',repo_name=backend.repo_name), |
|
172 | route_path('search_repo',repo_name=backend.repo_name), | |
173 | {'q': 'bother to ask where to fetch repo during tests', |
|
173 | {'q': 'bother to ask where to fetch repo during tests', | |
174 | 'type': 'commit'}) |
|
174 | 'type': 'commit'}) | |
175 |
|
175 | |||
176 | response.mustcontain('1 results') |
|
176 | response.mustcontain('1 results') | |
177 |
|
177 | |||
178 | def test_filters_are_not_applied_for_admin_user(self): |
|
178 | def test_filters_are_not_applied_for_admin_user(self): | |
179 | self.log_user() |
|
179 | self.log_user() | |
180 | with mock.patch('whoosh.searching.Searcher.search') as search_mock: |
|
180 | with mock.patch('whoosh.searching.Searcher.search') as search_mock: | |
181 |
|
181 | search_mock.return_value = mock.MagicMock( | ||
|
182 | scored_length=lambda: 100, | |||
|
183 | runtime=10 | |||
|
184 | ) | |||
182 | self.app.get(route_path('search'), |
|
185 | self.app.get(route_path('search'), | |
183 | {'q': 'test query', 'type': 'commit'}) |
|
186 | {'q': 'test query', 'type': 'commit'}) | |
184 | assert search_mock.call_count == 1 |
|
187 | assert search_mock.call_count == 1 | |
185 | _, kwargs = search_mock.call_args |
|
188 | _, kwargs = search_mock.call_args | |
186 | assert kwargs['filter'] is None |
|
189 | assert kwargs['filter'] is None | |
187 |
|
190 | |||
188 | def test_filters_are_applied_for_normal_user(self, enabled_backends): |
|
191 | def test_filters_are_applied_for_normal_user(self, enabled_backends): | |
189 | self.log_user(TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS) |
|
192 | self.log_user(TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS) | |
190 | with mock.patch('whoosh.searching.Searcher.search') as search_mock: |
|
193 | with mock.patch('whoosh.searching.Searcher.search') as search_mock: | |
|
194 | search_mock.return_value = mock.MagicMock( | |||
|
195 | scored_length=lambda: 100, | |||
|
196 | runtime=10 | |||
|
197 | ) | |||
191 | self.app.get(route_path('search'), |
|
198 | self.app.get(route_path('search'), | |
192 | {'q': 'test query', 'type': 'commit'}) |
|
199 | {'q': 'test query', 'type': 'commit'}) | |
193 | assert search_mock.call_count == 1 |
|
200 | assert search_mock.call_count == 1 | |
194 | _, kwargs = search_mock.call_args |
|
201 | _, kwargs = search_mock.call_args | |
195 | assert isinstance(kwargs['filter'], query.Or) |
|
202 | assert isinstance(kwargs['filter'], query.Or) | |
196 | expected_repositories = [ |
|
203 | expected_repositories = [ | |
197 | f'vcs_test_{b}' for b in enabled_backends] |
|
204 | f'vcs_test_{b}' for b in enabled_backends] | |
198 | queried_repositories = [ |
|
205 | queried_repositories = [ | |
199 | name for type_, name in kwargs['filter'].all_terms()] |
|
206 | name for type_, name in kwargs['filter'].all_terms()] | |
200 | for repository in expected_repositories: |
|
207 | for repository in expected_repositories: | |
201 | assert repository in queried_repositories |
|
208 | assert repository in queried_repositories |
@@ -1,311 +1,311 b'' | |||||
1 |
|
1 | |||
2 |
|
2 | |||
3 | # Copyright (C) 2012-2023 RhodeCode GmbH |
|
3 | # Copyright (C) 2012-2023 RhodeCode GmbH | |
4 | # |
|
4 | # | |
5 | # This program is free software: you can redistribute it and/or modify |
|
5 | # This program is free software: you can redistribute it and/or modify | |
6 | # it under the terms of the GNU Affero General Public License, version 3 |
|
6 | # it under the terms of the GNU Affero General Public License, version 3 | |
7 | # (only), as published by the Free Software Foundation. |
|
7 | # (only), as published by the Free Software Foundation. | |
8 | # |
|
8 | # | |
9 | # This program is distributed in the hope that it will be useful, |
|
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU General Public License for more details. |
|
12 | # GNU General Public License for more details. | |
13 | # |
|
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License |
|
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | # |
|
16 | # | |
17 | # This program is dual-licensed. If you wish to learn more about the |
|
17 | # This program is dual-licensed. If you wish to learn more about the | |
18 | # RhodeCode Enterprise Edition, including its added features, Support services, |
|
18 | # RhodeCode Enterprise Edition, including its added features, Support services, | |
19 | # and proprietary license terms, please see https://rhodecode.com/licenses/ |
|
19 | # and proprietary license terms, please see https://rhodecode.com/licenses/ | |
20 |
|
20 | |||
21 | """ |
|
21 | """ | |
22 | Index schema for RhodeCode |
|
22 | Index schema for RhodeCode | |
23 | """ |
|
23 | """ | |
24 |
|
24 | |||
25 |
|
25 | |||
26 | import os |
|
26 | import os | |
27 | import re |
|
27 | import re | |
28 | import logging |
|
28 | import logging | |
29 |
|
29 | |||
30 | from whoosh import query as query_lib |
|
30 | from whoosh import query as query_lib | |
31 | from whoosh.highlight import HtmlFormatter, ContextFragmenter |
|
31 | from whoosh.highlight import HtmlFormatter, ContextFragmenter | |
32 | from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError |
|
32 | from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError | |
33 | from whoosh.qparser import QueryParser, QueryParserError |
|
33 | from whoosh.qparser import QueryParser, QueryParserError | |
34 |
|
34 | |||
35 | import rhodecode.lib.helpers as h |
|
35 | import rhodecode.lib.helpers as h | |
36 | from rhodecode.lib.index import BaseSearcher |
|
36 | from rhodecode.lib.index import BaseSearcher | |
37 | from rhodecode.lib.str_utils import safe_str |
|
37 | from rhodecode.lib.str_utils import safe_str | |
38 |
|
38 | |||
39 | log = logging.getLogger(__name__) |
|
39 | log = logging.getLogger(__name__) | |
40 |
|
40 | |||
41 |
|
41 | |||
42 | try: |
|
42 | try: | |
43 | # we first try to import from rhodecode tools, fallback to copies if |
|
43 | # we first try to import from rhodecode tools, fallback to copies if | |
44 | # we're unable to |
|
44 | # we're unable to | |
45 | from rhodecode_tools.lib.fts_index.whoosh_schema import ( |
|
45 | from rhodecode_tools.lib.fts_index.whoosh_schema import ( | |
46 | ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME, |
|
46 | ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME, | |
47 | COMMIT_SCHEMA) |
|
47 | COMMIT_SCHEMA) | |
48 | except ImportError: |
|
48 | except ImportError: | |
49 | log.warning('rhodecode_tools schema not available, doing a fallback ' |
|
49 | log.warning('rhodecode_tools schema not available, doing a fallback ' | |
50 | 'import from `rhodecode.lib.index.whoosh_fallback_schema`') |
|
50 | 'import from `rhodecode.lib.index.whoosh_fallback_schema`') | |
51 | from rhodecode.lib.index.whoosh_fallback_schema import ( |
|
51 | from rhodecode.lib.index.whoosh_fallback_schema import ( | |
52 | ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME, |
|
52 | ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME, | |
53 | COMMIT_SCHEMA) |
|
53 | COMMIT_SCHEMA) | |
54 |
|
54 | |||
55 |
|
55 | |||
56 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') |
|
56 | FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') | |
57 | FRAGMENTER = ContextFragmenter(200) |
|
57 | FRAGMENTER = ContextFragmenter(200) | |
58 |
|
58 | |||
59 | log = logging.getLogger(__name__) |
|
59 | log = logging.getLogger(__name__) | |
60 |
|
60 | |||
61 |
|
61 | |||
62 | class WhooshSearcher(BaseSearcher): |
|
62 | class WhooshSearcher(BaseSearcher): | |
63 | # this also shows in UI |
|
63 | # this also shows in UI | |
64 | query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html' |
|
64 | query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html' | |
65 | name = 'whoosh' |
|
65 | name = 'whoosh' | |
66 |
|
66 | |||
67 | def __init__(self, config): |
|
67 | def __init__(self, config): | |
68 | super(Searcher, self).__init__() |
|
68 | super(Searcher, self).__init__() | |
69 | self.config = config |
|
69 | self.config = config | |
70 | if not os.path.isdir(self.config['location']): |
|
70 | if not os.path.isdir(self.config['location']): | |
71 | os.makedirs(self.config['location']) |
|
71 | os.makedirs(self.config['location']) | |
72 |
|
72 | |||
73 | opener = create_in |
|
73 | opener = create_in | |
74 | if exists_in(self.config['location'], indexname=FILE_INDEX_NAME): |
|
74 | if exists_in(self.config['location'], indexname=FILE_INDEX_NAME): | |
75 | opener = open_dir |
|
75 | opener = open_dir | |
76 | file_index = opener(self.config['location'], schema=FILE_SCHEMA, |
|
76 | file_index = opener(self.config['location'], schema=FILE_SCHEMA, | |
77 | indexname=FILE_INDEX_NAME) |
|
77 | indexname=FILE_INDEX_NAME) | |
78 |
|
78 | |||
79 | opener = create_in |
|
79 | opener = create_in | |
80 | if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME): |
|
80 | if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME): | |
81 | opener = open_dir |
|
81 | opener = open_dir | |
82 | changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA, |
|
82 | changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA, | |
83 | indexname=COMMIT_INDEX_NAME) |
|
83 | indexname=COMMIT_INDEX_NAME) | |
84 |
|
84 | |||
85 | self.commit_schema = COMMIT_SCHEMA |
|
85 | self.commit_schema = COMMIT_SCHEMA | |
86 | self.commit_index = changeset_index |
|
86 | self.commit_index = changeset_index | |
87 | self.file_schema = FILE_SCHEMA |
|
87 | self.file_schema = FILE_SCHEMA | |
88 | self.file_index = file_index |
|
88 | self.file_index = file_index | |
89 | self.searcher = None |
|
89 | self.searcher = None | |
90 |
|
90 | |||
91 | def cleanup(self): |
|
91 | def cleanup(self): | |
92 | if self.searcher: |
|
92 | if self.searcher: | |
93 | self.searcher.close() |
|
93 | self.searcher.close() | |
94 |
|
94 | |||
95 | def _extend_query(self, query): |
|
95 | def _extend_query(self, query): | |
96 | hashes = re.compile('([0-9a-f]{5,40})').findall(query) |
|
96 | hashes = re.compile('([0-9a-f]{5,40})').findall(query) | |
97 | if hashes: |
|
97 | if hashes: | |
98 | hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes) |
|
98 | hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes) | |
99 | query = u'(%s) OR %s' % (query, hashes_or_query) |
|
99 | query = u'(%s) OR %s' % (query, hashes_or_query) | |
100 | return query |
|
100 | return query | |
101 |
|
101 | |||
102 | def sort_def(self, search_type, direction, sort_field): |
|
102 | def sort_def(self, search_type, direction, sort_field): | |
103 |
|
103 | |||
104 | if search_type == 'commit': |
|
104 | if search_type == 'commit': | |
105 | field_defs = { |
|
105 | field_defs = { | |
106 | 'message': 'message', |
|
106 | 'message': 'message', | |
107 | 'date': 'date', |
|
107 | 'date': 'date', | |
108 | 'author_email': 'author', |
|
108 | 'author_email': 'author', | |
109 | } |
|
109 | } | |
110 | elif search_type == 'path': |
|
110 | elif search_type == 'path': | |
111 | field_defs = { |
|
111 | field_defs = { | |
112 | 'file': 'path', |
|
112 | 'file': 'path', | |
113 | 'size': 'size', |
|
113 | 'size': 'size', | |
114 | 'lines': 'lines', |
|
114 | 'lines': 'lines', | |
115 | } |
|
115 | } | |
116 | elif search_type == 'content': |
|
116 | elif search_type == 'content': | |
117 | # NOTE(dan): content doesn't support any sorting |
|
117 | # NOTE(dan): content doesn't support any sorting | |
118 | field_defs = {} |
|
118 | field_defs = {} | |
119 | else: |
|
119 | else: | |
120 | return '' |
|
120 | return '' | |
121 |
|
121 | |||
122 | if sort_field in field_defs: |
|
122 | if sort_field in field_defs: | |
123 | return field_defs[sort_field] |
|
123 | return field_defs[sort_field] | |
124 |
|
124 | |||
125 | def search(self, query, document_type, search_user, |
|
125 | def search(self, query, document_type, search_user, | |
126 | repo_name=None, repo_group_name=None, |
|
126 | repo_name=None, repo_group_name=None, | |
127 | requested_page=1, page_limit=10, sort=None, raise_on_exc=True): |
|
127 | requested_page=1, page_limit=10, sort=None, raise_on_exc=True): | |
128 |
|
128 | |||
129 | original_query = query |
|
129 | original_query = query | |
130 | query = self._extend_query(query) |
|
130 | query = self._extend_query(query) | |
131 |
|
131 | |||
132 |
log.debug( |
|
132 | log.debug('QUERY: %s on %s', query, document_type) | |
133 | result = { |
|
133 | result = { | |
134 | 'results': [], |
|
134 | 'results': [], | |
135 | 'count': 0, |
|
135 | 'count': 0, | |
136 | 'error': None, |
|
136 | 'error': None, | |
137 | 'runtime': 0 |
|
137 | 'runtime': 0 | |
138 | } |
|
138 | } | |
139 | search_type, index_name, schema_defn = self._prepare_for_search( |
|
139 | search_type, index_name, schema_defn = self._prepare_for_search( | |
140 | document_type) |
|
140 | document_type) | |
141 | self._init_searcher(index_name) |
|
141 | self._init_searcher(index_name) | |
142 | try: |
|
142 | try: | |
143 | qp = QueryParser(search_type, schema=schema_defn) |
|
143 | qp = QueryParser(search_type, schema=schema_defn) | |
144 | allowed_repos_filter = self._get_repo_filter( |
|
144 | allowed_repos_filter = self._get_repo_filter( | |
145 | search_user, repo_name) |
|
145 | search_user, repo_name) | |
146 | try: |
|
146 | try: | |
147 | query = qp.parse(safe_str(query)) |
|
147 | query = qp.parse(safe_str(query)) | |
148 | log.debug('query: %s (%s)', query, repr(query)) |
|
148 | log.debug('query: %s (%s)', query, repr(query)) | |
149 |
|
149 | |||
150 | reverse, sorted_by = False, None |
|
150 | reverse, sorted_by = False, None | |
151 | direction, sort_field = self.get_sort(search_type, sort) |
|
151 | direction, sort_field = self.get_sort(search_type, sort) | |
152 | if sort_field: |
|
152 | if sort_field: | |
153 | sort_definition = self.sort_def(search_type, direction, sort_field) |
|
153 | sort_definition = self.sort_def(search_type, direction, sort_field) | |
154 | if sort_definition: |
|
154 | if sort_definition: | |
155 | sorted_by = sort_definition |
|
155 | sorted_by = sort_definition | |
156 | if direction == Searcher.DIRECTION_DESC: |
|
156 | if direction == Searcher.DIRECTION_DESC: | |
157 | reverse = True |
|
157 | reverse = True | |
158 | if direction == Searcher.DIRECTION_ASC: |
|
158 | if direction == Searcher.DIRECTION_ASC: | |
159 | reverse = False |
|
159 | reverse = False | |
160 |
|
160 | |||
161 | whoosh_results = self.searcher.search( |
|
161 | whoosh_results = self.searcher.search( | |
162 | query, filter=allowed_repos_filter, limit=None, |
|
162 | query, filter=allowed_repos_filter, limit=None, | |
163 | sortedby=sorted_by, reverse=reverse) |
|
163 | sortedby=sorted_by, reverse=reverse) | |
164 |
|
164 | |||
165 | # fixes for 32k limit that whoosh uses for highlight |
|
165 | # fixes for 32k limit that whoosh uses for highlight | |
166 | whoosh_results.fragmenter.charlimit = None |
|
166 | whoosh_results.fragmenter.charlimit = None | |
167 | res_ln = whoosh_results.scored_length() |
|
167 | res_ln = whoosh_results.scored_length() | |
168 | result['runtime'] = whoosh_results.runtime |
|
168 | result['runtime'] = whoosh_results.runtime | |
169 | result['count'] = res_ln |
|
169 | result['count'] = res_ln | |
170 | result['results'] = WhooshResultWrapper( |
|
170 | result['results'] = WhooshResultWrapper( | |
171 | search_type, res_ln, whoosh_results) |
|
171 | search_type, res_ln, whoosh_results) | |
172 |
|
172 | |||
173 | except QueryParserError: |
|
173 | except QueryParserError: | |
174 | result['error'] = 'Invalid search query. Try quoting it.' |
|
174 | result['error'] = 'Invalid search query. Try quoting it.' | |
175 | except (EmptyIndexError, IOError, OSError): |
|
175 | except (EmptyIndexError, IOError, OSError): | |
176 | msg = 'There is no index to search in. Please run whoosh indexer' |
|
176 | msg = 'There is no index to search in. Please run whoosh indexer' | |
177 | log.exception(msg) |
|
177 | log.exception(msg) | |
178 | result['error'] = msg |
|
178 | result['error'] = msg | |
179 | except Exception: |
|
179 | except Exception: | |
180 | msg = 'An error occurred during this search operation' |
|
180 | msg = 'An error occurred during this search operation' | |
181 | log.exception(msg) |
|
181 | log.exception(msg) | |
182 | result['error'] = msg |
|
182 | result['error'] = msg | |
183 |
|
183 | |||
184 | return result |
|
184 | return result | |
185 |
|
185 | |||
186 | def statistics(self, translator): |
|
186 | def statistics(self, translator): | |
187 | _ = translator |
|
187 | _ = translator | |
188 | stats = [ |
|
188 | stats = [ | |
189 | {'key': _('Index Type'), 'value': 'Whoosh'}, |
|
189 | {'key': _('Index Type'), 'value': 'Whoosh'}, | |
190 | {'sep': True}, |
|
190 | {'sep': True}, | |
191 |
|
191 | |||
192 | {'key': _('File Index'), 'value': str(self.file_index)}, |
|
192 | {'key': _('File Index'), 'value': str(self.file_index)}, | |
193 | {'key': _('Indexed documents'), 'value': self.file_index.doc_count()}, |
|
193 | {'key': _('Indexed documents'), 'value': self.file_index.doc_count()}, | |
194 | {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())}, |
|
194 | {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())}, | |
195 |
|
195 | |||
196 | {'sep': True}, |
|
196 | {'sep': True}, | |
197 |
|
197 | |||
198 | {'key': _('Commit index'), 'value': str(self.commit_index)}, |
|
198 | {'key': _('Commit index'), 'value': str(self.commit_index)}, | |
199 | {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())}, |
|
199 | {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())}, | |
200 | {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())} |
|
200 | {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())} | |
201 | ] |
|
201 | ] | |
202 | return stats |
|
202 | return stats | |
203 |
|
203 | |||
204 | def _get_repo_filter(self, auth_user, repo_name): |
|
204 | def _get_repo_filter(self, auth_user, repo_name): | |
205 |
|
205 | |||
206 | allowed_to_search = [ |
|
206 | allowed_to_search = [ | |
207 | repo for repo, perm in |
|
207 | repo for repo, perm in | |
208 | auth_user.permissions['repositories'].items() |
|
208 | auth_user.permissions['repositories'].items() | |
209 | if perm != 'repository.none'] |
|
209 | if perm != 'repository.none'] | |
210 |
|
210 | |||
211 | if repo_name: |
|
211 | if repo_name: | |
212 | repo_filter = [query_lib.Term('repository', repo_name)] |
|
212 | repo_filter = [query_lib.Term('repository', repo_name)] | |
213 |
|
213 | |||
214 | elif 'hg.admin' in auth_user.permissions.get('global', []): |
|
214 | elif 'hg.admin' in auth_user.permissions.get('global', []): | |
215 | return None |
|
215 | return None | |
216 |
|
216 | |||
217 | else: |
|
217 | else: | |
218 | repo_filter = [query_lib.Term('repository', _rn) |
|
218 | repo_filter = [query_lib.Term('repository', _rn) | |
219 | for _rn in allowed_to_search] |
|
219 | for _rn in allowed_to_search] | |
220 | # in case we're not allowed to search anywhere, it's a trick |
|
220 | # in case we're not allowed to search anywhere, it's a trick | |
221 | # to tell whoosh we're filtering, on ALL results |
|
221 | # to tell whoosh we're filtering, on ALL results | |
222 | repo_filter = repo_filter or [query_lib.Term('repository', '')] |
|
222 | repo_filter = repo_filter or [query_lib.Term('repository', '')] | |
223 |
|
223 | |||
224 | return query_lib.Or(repo_filter) |
|
224 | return query_lib.Or(repo_filter) | |
225 |
|
225 | |||
226 | def _prepare_for_search(self, cur_type): |
|
226 | def _prepare_for_search(self, cur_type): | |
227 | search_type = { |
|
227 | search_type = { | |
228 | 'content': 'content', |
|
228 | 'content': 'content', | |
229 | 'commit': 'message', |
|
229 | 'commit': 'message', | |
230 | 'path': 'path', |
|
230 | 'path': 'path', | |
231 | 'repository': 'repository' |
|
231 | 'repository': 'repository' | |
232 | }.get(cur_type, 'content') |
|
232 | }.get(cur_type, 'content') | |
233 |
|
233 | |||
234 | index_name = { |
|
234 | index_name = { | |
235 | 'content': FILE_INDEX_NAME, |
|
235 | 'content': FILE_INDEX_NAME, | |
236 | 'commit': COMMIT_INDEX_NAME, |
|
236 | 'commit': COMMIT_INDEX_NAME, | |
237 | 'path': FILE_INDEX_NAME |
|
237 | 'path': FILE_INDEX_NAME | |
238 | }.get(cur_type, FILE_INDEX_NAME) |
|
238 | }.get(cur_type, FILE_INDEX_NAME) | |
239 |
|
239 | |||
240 | schema_defn = { |
|
240 | schema_defn = { | |
241 | 'content': self.file_schema, |
|
241 | 'content': self.file_schema, | |
242 | 'commit': self.commit_schema, |
|
242 | 'commit': self.commit_schema, | |
243 | 'path': self.file_schema |
|
243 | 'path': self.file_schema | |
244 | }.get(cur_type, self.file_schema) |
|
244 | }.get(cur_type, self.file_schema) | |
245 |
|
245 | |||
246 | log.debug('IDX: %s', index_name) |
|
246 | log.debug('IDX: %s', index_name) | |
247 | log.debug('SCHEMA: %s', schema_defn) |
|
247 | log.debug('SCHEMA: %s', schema_defn) | |
248 | return search_type, index_name, schema_defn |
|
248 | return search_type, index_name, schema_defn | |
249 |
|
249 | |||
250 | def _init_searcher(self, index_name): |
|
250 | def _init_searcher(self, index_name): | |
251 | idx = open_dir(self.config['location'], indexname=index_name) |
|
251 | idx = open_dir(self.config['location'], indexname=index_name) | |
252 | self.searcher = idx.searcher() |
|
252 | self.searcher = idx.searcher() | |
253 | return self.searcher |
|
253 | return self.searcher | |
254 |
|
254 | |||
255 |
|
255 | |||
256 | Searcher = WhooshSearcher |
|
256 | Searcher = WhooshSearcher | |
257 |
|
257 | |||
258 |
|
258 | |||
259 | class WhooshResultWrapper(object): |
|
259 | class WhooshResultWrapper(object): | |
260 | def __init__(self, search_type, total_hits, results): |
|
260 | def __init__(self, search_type, total_hits, results): | |
261 | self.search_type = search_type |
|
261 | self.search_type = search_type | |
262 | self.results = results |
|
262 | self.results = results | |
263 | self.total_hits = total_hits |
|
263 | self.total_hits = total_hits | |
264 |
|
264 | |||
265 | def __str__(self): |
|
265 | def __str__(self): | |
266 | return '<%s at %s>' % (self.__class__.__name__, len(self)) |
|
266 | return '<%s at %s>' % (self.__class__.__name__, len(self)) | |
267 |
|
267 | |||
268 | def __repr__(self): |
|
268 | def __repr__(self): | |
269 | return self.__str__() |
|
269 | return self.__str__() | |
270 |
|
270 | |||
271 | def __len__(self): |
|
271 | def __len__(self): | |
272 | return self.total_hits |
|
272 | return self.total_hits | |
273 |
|
273 | |||
274 | def __iter__(self): |
|
274 | def __iter__(self): | |
275 | """ |
|
275 | """ | |
276 | Allows Iteration over results,and lazy generate content |
|
276 | Allows Iteration over results,and lazy generate content | |
277 |
|
277 | |||
278 | *Requires* implementation of ``__getitem__`` method. |
|
278 | *Requires* implementation of ``__getitem__`` method. | |
279 | """ |
|
279 | """ | |
280 | for hit in self.results: |
|
280 | for hit in self.results: | |
281 | yield self.get_full_content(hit) |
|
281 | yield self.get_full_content(hit) | |
282 |
|
282 | |||
283 | def __getitem__(self, key): |
|
283 | def __getitem__(self, key): | |
284 | """ |
|
284 | """ | |
285 | Slicing of resultWrapper |
|
285 | Slicing of resultWrapper | |
286 | """ |
|
286 | """ | |
287 | i, j = key.start, key.stop |
|
287 | i, j = key.start, key.stop | |
288 | for hit in self.results[i:j]: |
|
288 | for hit in self.results[i:j]: | |
289 | yield self.get_full_content(hit) |
|
289 | yield self.get_full_content(hit) | |
290 |
|
290 | |||
291 | def get_full_content(self, hit): |
|
291 | def get_full_content(self, hit): | |
292 | # TODO: marcink: this feels like an overkill, there's a lot of data |
|
292 | # TODO: marcink: this feels like an overkill, there's a lot of data | |
293 | # inside hit object, and we don't need all |
|
293 | # inside hit object, and we don't need all | |
294 | res = dict(hit) |
|
294 | res = dict(hit) | |
295 | # elastic search uses that, we set it empty so it fallbacks to regular HL logic |
|
295 | # elastic search uses that, we set it empty so it fallbacks to regular HL logic | |
296 | res['content_highlight'] = '' |
|
296 | res['content_highlight'] = '' | |
297 |
|
297 | |||
298 | f_path = '' # pragma: no cover |
|
298 | f_path = '' # pragma: no cover | |
299 | if self.search_type in ['content', 'path']: |
|
299 | if self.search_type in ['content', 'path']: | |
300 | f_path = res['path'][len(res['repository']):] |
|
300 | f_path = res['path'][len(res['repository']):] | |
301 | f_path = f_path.lstrip(os.sep) |
|
301 | f_path = f_path.lstrip(os.sep) | |
302 |
|
302 | |||
303 | if self.search_type == 'content': |
|
303 | if self.search_type == 'content': | |
304 | res.update({'content_short_hl': hit.highlights('content'), |
|
304 | res.update({'content_short_hl': hit.highlights('content'), | |
305 | 'f_path': f_path}) |
|
305 | 'f_path': f_path}) | |
306 | elif self.search_type == 'path': |
|
306 | elif self.search_type == 'path': | |
307 | res.update({'f_path': f_path}) |
|
307 | res.update({'f_path': f_path}) | |
308 | elif self.search_type == 'message': |
|
308 | elif self.search_type == 'message': | |
309 | res.update({'message_hl': hit.highlights('message')}) |
|
309 | res.update({'message_hl': hit.highlights('message')}) | |
310 |
|
310 | |||
311 | return res |
|
311 | return res |
General Comments 0
You need to be logged in to leave comments.
Login now