rhodecode-enterprise-ce Commit - r5113:9cd499cc

search: fixed search tests

super-admin -

r5113:9cd499cc default

parent child

rhodecode/apps/search/tests/test_search.py

0 +8 -1

             # Copyright (C) 2010-2023 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             import os
             import mock
             import pytest
             from whoosh import query
             from rhodecode.tests import (
                 TestController, route_path_generator, HG_REPO,
                 TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
             from rhodecode.tests.utils import AssertResponse
             def route_path(name, params=None, **kwargs):
                 from rhodecode.apps._base import ADMIN_PREFIX
                 url_defs = {
                     'search':
                         ADMIN_PREFIX + '/search',
                     'search_repo':
                         '/{repo_name}/search',
                 }
                 return route_path_generator(url_defs, name=name, params=params, **kwargs)
             class TestSearchController(TestController):
                 def test_index(self):
                     self.log_user()
                     response = self.app.get(route_path('search'))
                     assert_response = response.assert_response()
                     assert_response.one_element_exists('input#q')
                 def test_search_files_empty_search(self):
                     if os.path.isdir(self.index_location):
                         pytest.skip('skipped due to existing index')
                     else:
                         self.log_user()
                         response = self.app.get(route_path('search'),
                                                 {'q': HG_REPO})
                         response.mustcontain('There is no index to search in. '
                                              'Please run whoosh indexer')
                 def test_search_validation(self):
                     self.log_user()
                     response = self.app.get(route_path('search'),
                         {'q': query, 'type': 'content', 'page_limit': 1000})
                     response.mustcontain(
                         'page_limit - 1000 is greater than maximum value 500')
                 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
                     ('todo', 23, [
                         'vcs/backends/hg/inmemory.py',
                         'vcs/tests/test_git.py']),
                     ('extension:rst installation', 6, [
                         'docs/index.rst',
                         'docs/installation.rst']),
                     ('def repo', 87, [
                         'vcs/tests/test_git.py',
                         'vcs/tests/test_changesets.py']),
                     ('repository:%s def test' % HG_REPO, 18, [
                         'vcs/tests/test_git.py',
                         'vcs/tests/test_changesets.py']),
                     ('"def main"', 9, [
                         'vcs/__init__.py',
                         'vcs/tests/__init__.py',
                         'vcs/utils/progressbar.py']),
                     ('owner:test_admin', 358, [
                         'vcs/tests/base.py',
                         'MANIFEST.in',
                         'vcs/utils/termcolors.py',
                         'docs/theme/ADC/static/documentation.png']),
                     ('owner:test_admin def main', 72, [
                         'vcs/__init__.py',
                         'vcs/tests/test_utils_filesize.py',
                         'vcs/tests/test_cli.py']),
                     ('owner:michał test', 0, []),
                 ])
                 def test_search_files(self, query, expected_hits, expected_paths):
                     self.log_user()
                     response = self.app.get(route_path('search'),
                         {'q': query, 'type': 'content', 'page_limit': 500})
                     response.mustcontain('%s results' % expected_hits)
                     for path in expected_paths:
                         response.mustcontain(path)
                 @pytest.mark.parametrize("query, expected_hits, expected_commits", [
                     ('bother to ask where to fetch repo during tests', 3, [
                         ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1'),
                         ('git', 'c6eb379775c578a95dad8ddab53f963b80894850'),
                         ('svn', '98')]),
                     ('michał', 0, []),
                     ('changed:tests/utils.py', 36, [
                         ('hg', 'a00c1b6f5d7a6ae678fd553a8b81d92367f7ecf1')]),
                     ('changed:vcs/utils/archivers.py', 11, [
                         ('hg', '25213a5fbb048dff8ba65d21e466a835536e5b70'),
                         ('hg', '47aedd538bf616eedcb0e7d630ea476df0e159c7'),
                         ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
                         ('hg', '04ad456aefd6461aea24f90b63954b6b1ce07b3e'),
                         ('git', 'c994f0de03b2a0aa848a04fc2c0d7e737dba31fc'),
                         ('git', 'd1f898326327e20524fe22417c22d71064fe54a1'),
                         ('git', 'fe568b4081755c12abf6ba673ba777fc02a415f3'),
                         ('git', 'bafe786f0d8c2ff7da5c1dcfcfa577de0b5e92f1')]),
                     ('added:README.rst', 3, [
                         ('hg', '3803844fdbd3b711175fc3da9bdacfcd6d29a6fb'),
                         ('git', 'ff7ca51e58c505fec0dd2491de52c622bb7a806b'),
                         ('svn', '8')]),
                     ('changed:lazy.py', 15, [
                         ('hg', 'eaa291c5e6ae6126a203059de9854ccf7b5baa12'),
                         ('git', '17438a11f72b93f56d0e08e7d1fa79a378578a82'),
                         ('svn', '82'),
                         ('svn', '262'),
                         ('hg', 'f5d23247fad4856a1dabd5838afade1e0eed24fb'),
                         ('git', '33fa3223355104431402a888fa77a4e9956feb3e')
                         ]),
                     ('author:marcin@python-blog.com '
                      'commit_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
                          ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
                     ('b986218ba1c9b0d6a259fac9b050b1724ed8e545', 1, [
                          ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
                     ('b986218b', 1, [
                          ('hg', 'b986218ba1c9b0d6a259fac9b050b1724ed8e545')]),
                 ])
                 def test_search_commit_messages(
                         self, query, expected_hits, expected_commits, enabled_backends):
                     self.log_user()
                     response = self.app.get(route_path('search'),
                         {'q': query, 'type': 'commit', 'page_limit': 500})
                     response.mustcontain('%s results' % expected_hits)
                     for backend, commit_id in expected_commits:
                         if backend in enabled_backends:
                             response.mustcontain(commit_id)
                 @pytest.mark.parametrize("query, expected_hits, expected_paths", [
                     ('readme.rst', 3, []),
                     ('test*', 75, []),
                     ('*model*', 1, []),
                     ('extension:rst', 48, []),
                     ('extension:rst api', 24, []),
                 ])
                 def test_search_file_paths(self, query, expected_hits, expected_paths):
                     self.log_user()
                     response = self.app.get(route_path('search'),
                         {'q': query, 'type': 'path', 'page_limit': 500})
                     response.mustcontain('%s results' % expected_hits)
                     for path in expected_paths:
                         response.mustcontain(path)
                 def test_search_commit_message_specific_repo(self, backend):
                     self.log_user()
                     response = self.app.get(
                         route_path('search_repo',repo_name=backend.repo_name),
                         {'q': 'bother to ask where to fetch repo during tests',
                          'type': 'commit'})
                     response.mustcontain('1 results')
                 def test_filters_are_not_applied_for_admin_user(self):
                     self.log_user()
                     with mock.patch('whoosh.searching.Searcher.search') as search_mock:
+                        search_mock.return_value = mock.MagicMock(
+                            scored_length=lambda: 100,
+                            runtime=10
+                        )
                         self.app.get(route_path('search'),
                             {'q': 'test query', 'type': 'commit'})
                     assert search_mock.call_count == 1
                     _, kwargs = search_mock.call_args
                     assert kwargs['filter'] is None
                 def test_filters_are_applied_for_normal_user(self, enabled_backends):
                     self.log_user(TEST_USER_REGULAR_LOGIN, TEST_USER_REGULAR_PASS)
                     with mock.patch('whoosh.searching.Searcher.search') as search_mock:
+                        search_mock.return_value = mock.MagicMock(
+                            scored_length=lambda: 100,
+                            runtime=10
+                        )
                         self.app.get(route_path('search'),
                             {'q': 'test query', 'type': 'commit'})
                     assert search_mock.call_count == 1
                     _, kwargs = search_mock.call_args
                     assert isinstance(kwargs['filter'], query.Or)
                     expected_repositories = [
                         f'vcs_test_{b}' for b in enabled_backends]
                     queried_repositories = [
                         name for type_, name in kwargs['filter'].all_terms()]
                     for repository in expected_repositories:
                         assert repository in queried_repositories

rhodecode/lib/index/whoosh.py

0 +1 -1

             # Copyright (C) 2012-2023 RhodeCode GmbH
             #
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU Affero General Public License, version 3
             # (only), as published by the Free Software Foundation.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU Affero General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             #
             # This program is dual-licensed. If you wish to learn more about the
             # RhodeCode Enterprise Edition, including its added features, Support services,
             # and proprietary license terms, please see https://rhodecode.com/licenses/
             """
             Index schema for RhodeCode
             """
             import os
             import re
             import logging
             from whoosh import query as query_lib
             from whoosh.highlight import HtmlFormatter, ContextFragmenter
             from whoosh.index import create_in, open_dir, exists_in, EmptyIndexError
             from whoosh.qparser import QueryParser, QueryParserError
             import rhodecode.lib.helpers as h
             from rhodecode.lib.index import BaseSearcher
             from rhodecode.lib.str_utils import safe_str
             log = logging.getLogger(__name__)
             try:
                 # we first try to import from rhodecode tools, fallback to copies if
                 # we're unable to
                 from rhodecode_tools.lib.fts_index.whoosh_schema import (
                     ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
                     COMMIT_SCHEMA)
             except ImportError:
                 log.warning('rhodecode_tools schema not available, doing a fallback '
                             'import from `rhodecode.lib.index.whoosh_fallback_schema`')
                 from rhodecode.lib.index.whoosh_fallback_schema import (
                     ANALYZER, FILE_INDEX_NAME, FILE_SCHEMA, COMMIT_INDEX_NAME,
                     COMMIT_SCHEMA)
             FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
             FRAGMENTER = ContextFragmenter(200)
             log = logging.getLogger(__name__)
             class WhooshSearcher(BaseSearcher):
                 # this also shows in UI
                 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
                 name = 'whoosh'
                 def __init__(self, config):
                     super(Searcher, self).__init__()
                     self.config = config
                     if not os.path.isdir(self.config['location']):
                         os.makedirs(self.config['location'])
                     opener = create_in
                     if exists_in(self.config['location'], indexname=FILE_INDEX_NAME):
                         opener = open_dir
                     file_index = opener(self.config['location'], schema=FILE_SCHEMA,
                                         indexname=FILE_INDEX_NAME)
                     opener = create_in
                     if exists_in(self.config['location'], indexname=COMMIT_INDEX_NAME):
                         opener = open_dir
                     changeset_index = opener(self.config['location'], schema=COMMIT_SCHEMA,
                                              indexname=COMMIT_INDEX_NAME)
                     self.commit_schema = COMMIT_SCHEMA
                     self.commit_index = changeset_index
                     self.file_schema = FILE_SCHEMA
                     self.file_index = file_index
                     self.searcher = None
                 def cleanup(self):
                     if self.searcher:
                         self.searcher.close()
                 def _extend_query(self, query):
                     hashes = re.compile('([0-9a-f]{5,40})').findall(query)
                     if hashes:
                         hashes_or_query = ' OR '.join('commit_id:%s*' % h for h in hashes)
                         query = u'(%s) OR %s' % (query, hashes_or_query)
                     return query
                 def sort_def(self, search_type, direction, sort_field):
                     if search_type == 'commit':
                         field_defs = {
                             'message': 'message',
                             'date': 'date',
                             'author_email': 'author',
                         }
                     elif search_type == 'path':
                         field_defs = {
                             'file': 'path',
                             'size': 'size',
                             'lines': 'lines',
                         }
                     elif search_type == 'content':
                         # NOTE(dan): content doesn't support any sorting
                         field_defs = {}
                     else:
                         return ''
                     if sort_field in field_defs:
                         return field_defs[sort_field]
                 def search(self, query, document_type, search_user,
                            repo_name=None, repo_group_name=None,
                            requested_page=1, page_limit=10, sort=None, raise_on_exc=True):
                     original_query = query
                     query = self._extend_query(query)
-                    log.debug(u'QUERY: %s on %s', query, document_type)
+                    log.debug('QUERY: %s on %s', query, document_type)
                     result = {
                         'results': [],
                         'count': 0,
                         'error': None,
                         'runtime': 0
                     }
                     search_type, index_name, schema_defn = self._prepare_for_search(
                         document_type)
                     self._init_searcher(index_name)
                     try:
                         qp = QueryParser(search_type, schema=schema_defn)
                         allowed_repos_filter = self._get_repo_filter(
                             search_user, repo_name)
                         try:
                             query = qp.parse(safe_str(query))
                             log.debug('query: %s (%s)', query, repr(query))
                             reverse, sorted_by = False, None
                             direction, sort_field = self.get_sort(search_type, sort)
                             if sort_field:
                                 sort_definition = self.sort_def(search_type, direction, sort_field)
                                 if sort_definition:
                                     sorted_by = sort_definition
                                     if direction == Searcher.DIRECTION_DESC:
                                         reverse = True
                                     if direction == Searcher.DIRECTION_ASC:
                                         reverse = False
                             whoosh_results = self.searcher.search(
                                 query, filter=allowed_repos_filter, limit=None,
                                 sortedby=sorted_by, reverse=reverse)
                             # fixes for 32k limit that whoosh uses for highlight
                             whoosh_results.fragmenter.charlimit = None
                             res_ln = whoosh_results.scored_length()
                             result['runtime'] = whoosh_results.runtime
                             result['count'] = res_ln
                             result['results'] = WhooshResultWrapper(
                                 search_type, res_ln, whoosh_results)
                         except QueryParserError:
                             result['error'] = 'Invalid search query. Try quoting it.'
                     except (EmptyIndexError, IOError, OSError):
                         msg = 'There is no index to search in. Please run whoosh indexer'
                         log.exception(msg)
                         result['error'] = msg
                     except Exception:
                         msg = 'An error occurred during this search operation'
                         log.exception(msg)
                         result['error'] = msg
                     return result
                 def statistics(self, translator):
                     _ = translator
                     stats = [
                         {'key': _('Index Type'), 'value': 'Whoosh'},
                         {'sep': True},
                         {'key': _('File Index'), 'value': str(self.file_index)},
                         {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
                         {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
                         {'sep': True},
                         {'key': _('Commit index'), 'value': str(self.commit_index)},
                         {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
                         {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
                     ]
                     return stats
                 def _get_repo_filter(self, auth_user, repo_name):
                     allowed_to_search = [
                         repo for repo, perm in
                         auth_user.permissions['repositories'].items()
                         if perm != 'repository.none']
                     if repo_name:
                         repo_filter = [query_lib.Term('repository', repo_name)]
                     elif 'hg.admin' in auth_user.permissions.get('global', []):
                         return None
                     else:
                         repo_filter = [query_lib.Term('repository', _rn)
                                        for _rn in allowed_to_search]
                         # in case we're not allowed to search anywhere, it's a trick
                         # to tell whoosh we're filtering, on ALL results
                         repo_filter = repo_filter or [query_lib.Term('repository', '')]
                     return query_lib.Or(repo_filter)
                 def _prepare_for_search(self, cur_type):
                     search_type = {
                         'content': 'content',
                         'commit': 'message',
                         'path': 'path',
                         'repository': 'repository'
                     }.get(cur_type, 'content')
                     index_name = {
                         'content': FILE_INDEX_NAME,
                         'commit': COMMIT_INDEX_NAME,
                         'path': FILE_INDEX_NAME
                     }.get(cur_type, FILE_INDEX_NAME)
                     schema_defn = {
                         'content': self.file_schema,
                         'commit': self.commit_schema,
                         'path': self.file_schema
                     }.get(cur_type, self.file_schema)
                     log.debug('IDX: %s', index_name)
                     log.debug('SCHEMA: %s', schema_defn)
                     return search_type, index_name, schema_defn
                 def _init_searcher(self, index_name):
                     idx = open_dir(self.config['location'], indexname=index_name)
                     self.searcher = idx.searcher()
                     return self.searcher
             Searcher = WhooshSearcher
             class WhooshResultWrapper(object):
                 def __init__(self, search_type, total_hits, results):
                     self.search_type = search_type
                     self.results = results
                     self.total_hits = total_hits
                 def __str__(self):
                     return '<%s at %s>' % (self.__class__.__name__, len(self))
                 def __repr__(self):
                     return self.__str__()
                 def __len__(self):
                     return self.total_hits
                 def __iter__(self):
                     """
                     Allows Iteration over results,and lazy generate content
                     *Requires* implementation of ``__getitem__`` method.
                     """
                     for hit in self.results:
                         yield self.get_full_content(hit)
                 def __getitem__(self, key):
                     """
                     Slicing of resultWrapper
                     """
                     i, j = key.start, key.stop
                     for hit in self.results[i:j]:
                         yield self.get_full_content(hit)
                 def get_full_content(self, hit):
                     # TODO: marcink: this feels like an overkill, there's a lot of data
                     # inside hit object, and we don't need all
                     res = dict(hit)
                     # elastic search uses that, we set it empty so it fallbacks to regular HL logic
                     res['content_highlight'] = ''
                     f_path = ''  # pragma: no cover
                     if self.search_type in ['content', 'path']:
                         f_path = res['path'][len(res['repository']):]
                         f_path = f_path.lstrip(os.sep)
                     if self.search_type == 'content':
                         res.update({'content_short_hl': hit.highlights('content'),
                                     'f_path': f_path})
                     elif self.search_type == 'path':
                         res.update({'f_path': f_path})
                     elif self.search_type == 'message':
                         res.update({'message_hl': hit.highlights('message')})
                     return res

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages