##// END OF EJS Templates
fold
fold

File last commit:

r5088:8f6d1ed6 default
r5236:673ed320 default
Show More
test_search_utils.py
99 lines | 3.8 KiB | text/x-python | PythonLexer
# Copyright (C) 2010-2023 RhodeCode GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
import copy
import mock
import pytest
from rhodecode.lib.index import search_utils
@pytest.mark.parametrize('test_text, expected_output', [
('some text', ['some', 'text']),
('some text', ['some', 'text']),
('some text "with a phrase"', ['some', 'text', 'with a phrase']),
('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
('"justphrase"', ['justphrase']),
('""', []),
('', []),
(' ', []),
('" "', []),
])
def test_extract_phrases(test_text, expected_output):
assert search_utils.extract_phrases(test_text) == expected_output
@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
('irrelevant', ['not found'], []),
('irrelevant', ['not found'], []),
])
def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
assert search_utils.get_matching_phrase_offsets(
test_text, text_phrases) == expected_output
@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
('irrelevant', ['not found'], []),
('irrelevant', ['not found'], []),
])
def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):
assert search_utils.get_matching_markers_offsets(test_text) == expected_output
def test_normalize_text_for_matching():
assert search_utils.normalize_text_for_matching(
'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
def test_get_matching_line_offsets():
words = '\n'.join([
'words words words',
'words words words',
'some text some',
'words words words',
'words words words',
'text here what'
])
total_lines, matched_offsets = \
search_utils.get_matching_line_offsets(words, terms='text')
assert total_lines == 6
assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}
def test_get_matching_line_offsets_using_markers():
words = '\n'.join([
'words words words',
'words words words',
'some __1__text__2__ some',
'words words words',
'words words words',
'__1__text__2__ here what'
])
total_lines, matched_offsets = \
search_utils.get_matching_line_offsets(words, terms=None,
markers=['__1__(.*?)__2__'])
assert total_lines == 6
assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}