rhodecode-enterprise-ce Files · rhodecode/tests/lib/test_search_utils.py

configs: synced ce/ee configs

super-admin - - Load All Authors

File last commit:

r5088:8f6d1ed6 default


                r5540:bf8e8ca3

default

Download file

             test_search_utils.py
        
                    99 lines
            
             | 3.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / tests / lib / test_search_utils.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        dan
    
search: add support for elastic search 6...

              r3319
            
        super-admin
    
copyrights: updated for 2023

              r5088
            
      # Copyright (C) 2010-2023 RhodeCode GmbH

        dan
    
search: add support for elastic search 6...

              r3319
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      import copy

      import mock

      import pytest

      from rhodecode.lib.index import search_utils

      @pytest.mark.parametrize('test_text, expected_output', [

          ('some text', ['some', 'text']),

          ('some    text', ['some', 'text']),

          ('some text "with  a phrase"', ['some', 'text', 'with  a phrase']),

          ('"a phrase" "another phrase"', ['a phrase', 'another phrase']),

          ('"justphrase"', ['justphrase']),

          ('""', []),

          ('', []),

          ('  ', []),

          ('"   "', []),

      ])

      def test_extract_phrases(test_text, expected_output):

          assert search_utils.extract_phrases(test_text) == expected_output

      @pytest.mark.parametrize('test_text, text_phrases, expected_output', [

          ('some text here', ['some', 'here'], [(0, 4), (10, 14)]),

          ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),

          ('irrelevant', ['not found'], []),

          ('irrelevant', ['not found'], []),

      ])

      def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):

          assert search_utils.get_matching_phrase_offsets(

              test_text, text_phrases) == expected_output

      @pytest.mark.parametrize('test_text, text_phrases, expected_output', [

          ('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),

          ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),

          ('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),

          ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),

          ('irrelevant', ['not found'], []),

          ('irrelevant', ['not found'], []),

      ])

      def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):

          assert search_utils.get_matching_markers_offsets(test_text) == expected_output

      def test_normalize_text_for_matching():

          assert search_utils.normalize_text_for_matching(

              'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe        jf  3r2f80h'

      def test_get_matching_line_offsets():

          words = '\n'.join([

              'words words words',

              'words words words',

              'some text some',

              'words words words',

              'words words words',

              'text here what'

          ])

          total_lines, matched_offsets = \

              search_utils.get_matching_line_offsets(words, terms='text')

          assert total_lines == 6

          assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}

      def test_get_matching_line_offsets_using_markers():

          words = '\n'.join([

              'words words words',

              'words words words',

              'some __1__text__2__ some',

              'words words words',

              'words words words',

              '__1__text__2__ here what'

          ])

          total_lines, matched_offsets = \

              search_utils.get_matching_line_offsets(words, terms=None,

                                                     markers=['__1__(.*?)__2__'])

          assert total_lines == 6

          assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

dan search: add support for elastic search 6...	r3319
super-admin copyrights: updated for 2023	r5088	# Copyright (C) 2010-2023 RhodeCode GmbH
dan search: add support for elastic search 6...	r3319	#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/

		import copy
		import mock
		import pytest

		from rhodecode.lib.index import search_utils


		@pytest.mark.parametrize('test_text, expected_output', [
		('some text', ['some', 'text']),
		('some text', ['some', 'text']),
		('some text "with a phrase"', ['some', 'text', 'with a phrase']),
		('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
		('"justphrase"', ['justphrase']),
		('""', []),
		('', []),
		(' ', []),
		('" "', []),
		])
		def test_extract_phrases(test_text, expected_output):
		assert search_utils.extract_phrases(test_text) == expected_output


		@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
		('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
		('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
		('irrelevant', ['not found'], []),
		('irrelevant', ['not found'], []),
		])
		def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
		assert search_utils.get_matching_phrase_offsets(
		test_text, text_phrases) == expected_output


		@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
		('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
		('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
		('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
		('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
		('irrelevant', ['not found'], []),
		('irrelevant', ['not found'], []),
		])
		def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):

		assert search_utils.get_matching_markers_offsets(test_text) == expected_output


		def test_normalize_text_for_matching():
		assert search_utils.normalize_text_for_matching(
		'OJjfe)#$@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'


		def test_get_matching_line_offsets():
		words = '\n'.join([
		'words words words',
		'words words words',
		'some text some',
		'words words words',
		'words words words',
		'text here what'
		])
		total_lines, matched_offsets = \
		search_utils.get_matching_line_offsets(words, terms='text')
		assert total_lines == 6
		assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}


		def test_get_matching_line_offsets_using_markers():
		words = '\n'.join([
		'words words words',
		'words words words',
		'some __1__text__2__ some',
		'words words words',
		'words words words',
		'__1__text__2__ here what'
		])
		total_lines, matched_offsets = \
		search_utils.get_matching_line_offsets(words, terms=None,
		markers=['__1__(.*?)__2__'])
		assert total_lines == 6
		assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}