##// END OF EJS Templates
search: add support for elastic search 6...
dan -
r3319:b8fd1d7a default
parent child Browse files
Show More
@@ -0,0 +1,257 b''
1 # -*- coding: utf-8 -*-
2
3 # Copyright (C) 2012-2018 RhodeCode GmbH
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 import re
21
22 import pygments.filter
23 import pygments.filters
24 from pygments.token import Comment
25
26 HL_BEG_MARKER = '__RCSearchHLMarkBEG__'
27 HL_END_MARKER = '__RCSearchHLMarkEND__'
28 HL_MARKER_RE = '{}(.*?){}'.format(HL_BEG_MARKER, HL_END_MARKER)
29
30
31 class ElasticSearchHLFilter(pygments.filters.Filter):
32 _names = [HL_BEG_MARKER, HL_END_MARKER]
33
34 def __init__(self, **options):
35 pygments.filters.Filter.__init__(self, **options)
36
37 def filter(self, lexer, stream):
38 def tokenize(_value):
39 for token in re.split('({}|{})'.format(
40 self._names[0], self._names[1]), _value):
41 if token:
42 yield token
43
44 hl = False
45 for ttype, value in stream:
46
47 if self._names[0] in value or self._names[1] in value:
48 for item in tokenize(value):
49 if item == self._names[0]:
50 # skip marker, but start HL
51 hl = True
52 continue
53 elif item == self._names[1]:
54 hl = False
55 continue
56
57 if hl:
58 yield Comment.ElasticMatch, item
59 else:
60 yield ttype, item
61 else:
62 if hl:
63 yield Comment.ElasticMatch, value
64 else:
65 yield ttype, value
66
67
68 def extract_phrases(text_query):
69 """
70 Extracts phrases from search term string making sure phrases
71 contained in double quotes are kept together - and discarding empty values
72 or fully whitespace values eg.
73
74 'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
75
76 """
77
78 in_phrase = False
79 buf = ''
80 phrases = []
81 for char in text_query:
82 if in_phrase:
83 if char == '"': # end phrase
84 phrases.append(buf)
85 buf = ''
86 in_phrase = False
87 continue
88 else:
89 buf += char
90 continue
91 else:
92 if char == '"': # start phrase
93 in_phrase = True
94 phrases.append(buf)
95 buf = ''
96 continue
97 elif char == ' ':
98 phrases.append(buf)
99 buf = ''
100 continue
101 else:
102 buf += char
103
104 phrases.append(buf)
105 phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
106 return phrases
107
108
109 def get_matching_phrase_offsets(text, phrases):
110 """
111 Returns a list of string offsets in `text` that the list of `terms` match
112
113 >>> get_matching_phrase_offsets('some text here', ['some', 'here'])
114 [(0, 4), (10, 14)]
115
116 """
117 phrases = phrases or []
118 offsets = []
119
120 for phrase in phrases:
121 for match in re.finditer(phrase, text):
122 offsets.append((match.start(), match.end()))
123
124 return offsets
125
126
127 def get_matching_markers_offsets(text, markers=None):
128 """
129 Returns a list of string offsets in `text` that the are between matching markers
130
131 >>> get_matching_markers_offsets('$1some$2 text $1here$2 marked', ['\$1(.*?)\$2'])
132 [(0, 5), (16, 22)]
133
134 """
135 markers = markers or [HL_MARKER_RE]
136 offsets = []
137
138 if markers:
139 for mark in markers:
140 for match in re.finditer(mark, text):
141 offsets.append((match.start(), match.end()))
142
143 return offsets
144
145
146 def normalize_text_for_matching(x):
147 """
148 Replaces all non alfanum characters to spaces and lower cases the string,
149 useful for comparing two text strings without punctuation
150 """
151 return re.sub(r'[^\w]', ' ', x.lower())
152
153
154 def get_matching_line_offsets(lines, terms=None, markers=None):
155 """ Return a set of `lines` indices (starting from 1) matching a
156 text search query, along with `context` lines above/below matching lines
157
158 :param lines: list of strings representing lines
159 :param terms: search term string to match in lines eg. 'some text'
160 :param markers: instead of terms, use highlight markers instead that
161 mark beginning and end for matched item. eg. ['START(.*?)END']
162
163 eg.
164
165 text = '''
166 words words words
167 words words words
168 some text some
169 words words words
170 words words words
171 text here what
172 '''
173 get_matching_line_offsets(text, 'text', context=1)
174 6, {3: [(5, 9)], 6: [(0, 4)]]
175
176 """
177 matching_lines = {}
178 line_index = 0
179
180 if terms:
181 phrases = [normalize_text_for_matching(phrase)
182 for phrase in extract_phrases(terms)]
183
184 for line_index, line in enumerate(lines.splitlines(), start=1):
185 normalized_line = normalize_text_for_matching(line)
186 match_offsets = get_matching_phrase_offsets(normalized_line, phrases)
187 if match_offsets:
188 matching_lines[line_index] = match_offsets
189
190 else:
191 markers = markers or [HL_MARKER_RE]
192 for line_index, line in enumerate(lines.splitlines(), start=1):
193 match_offsets = get_matching_markers_offsets(line, markers=markers)
194 if match_offsets:
195 matching_lines[line_index] = match_offsets
196
197 return line_index, matching_lines
198
199
200 def lucene_query_parser():
201 # from pyparsing lucene_grammar
202 from pyparsing import (
203 Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress,
204 Optional, Group, infixNotation, opAssoc, ParserElement, pyparsing_common)
205
206 ParserElement.enablePackrat()
207
208 COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^")
209 LPAR, RPAR = map(Suppress, "()")
210 and_, or_, not_, to_ = map(CaselessKeyword, "AND OR NOT TO".split())
211 keyword = and_ | or_ | not_ | to_
212
213 expression = Forward()
214
215 valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word")
216 valid_word.setParseAction(
217 lambda t: t[0]
218 .replace('\\\\', chr(127))
219 .replace('\\', '')
220 .replace(chr(127), '\\')
221 )
222
223 string = QuotedString('"')
224
225 required_modifier = Literal("+")("required")
226 prohibit_modifier = Literal("-")("prohibit")
227 integer = Regex(r"\d+").setParseAction(lambda t: int(t[0]))
228 proximity_modifier = Group(TILDE + integer("proximity"))
229 number = pyparsing_common.fnumber()
230 fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")
231
232 term = Forward()
233 field_name = valid_word().setName("fieldname")
234 incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
235 excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
236 range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
237 boost = (CARAT + number("boost"))
238
239 string_expr = Group(string + proximity_modifier) | string
240 word_expr = Group(valid_word + fuzzy_modifier) | valid_word
241 term << (Optional(field_name("field") + COLON) +
242 (word_expr | string_expr | range_search | Group(
243 LPAR + expression + RPAR)) +
244 Optional(boost))
245 term.setParseAction(lambda t: [t] if 'field' in t or 'boost' in t else None)
246
247 expression << infixNotation(
248 term,
249 [
250 (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT),
251 ((not_ | '!').setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT),
252 ((and_ | '&&').setParseAction(lambda: "AND"), 2, opAssoc.LEFT),
253 (Optional(or_ | '||').setParseAction(lambda: "OR"), 2, opAssoc.LEFT),
254 ]
255 )
256
257 return expression
@@ -0,0 +1,100 b''
1 # -*- coding: utf-8 -*-
2
3 # Copyright (C) 2010-2018 RhodeCode GmbH
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
21 import copy
22 import mock
23 import pytest
24
25 from rhodecode.lib.index import search_utils
26
27
28 @pytest.mark.parametrize('test_text, expected_output', [
29 ('some text', ['some', 'text']),
30 ('some text', ['some', 'text']),
31 ('some text "with a phrase"', ['some', 'text', 'with a phrase']),
32 ('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
33 ('"justphrase"', ['justphrase']),
34 ('""', []),
35 ('', []),
36 (' ', []),
37 ('" "', []),
38 ])
39 def test_extract_phrases(test_text, expected_output):
40 assert search_utils.extract_phrases(test_text) == expected_output
41
42
43 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
44 ('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
45 ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
46 ('irrelevant', ['not found'], []),
47 ('irrelevant', ['not found'], []),
48 ])
49 def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
50 assert search_utils.get_matching_phrase_offsets(
51 test_text, text_phrases) == expected_output
52
53
54 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
55 ('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
56 ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
57 ('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
58 ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
59 ('irrelevant', ['not found'], []),
60 ('irrelevant', ['not found'], []),
61 ])
62 def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):
63
64 assert search_utils.get_matching_markers_offsets(test_text) == expected_output
65
66
67 def test_normalize_text_for_matching():
68 assert search_utils.normalize_text_for_matching(
69 'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
70
71
72 def test_get_matching_line_offsets():
73 words = '\n'.join([
74 'words words words',
75 'words words words',
76 'some text some',
77 'words words words',
78 'words words words',
79 'text here what'
80 ])
81 total_lines, matched_offsets = \
82 search_utils.get_matching_line_offsets(words, terms='text')
83 assert total_lines == 6
84 assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}
85
86
87 def test_get_matching_line_offsets_using_markers():
88 words = '\n'.join([
89 'words words words',
90 'words words words',
91 'some __1__text__2__ some',
92 'words words words',
93 'words words words',
94 '__1__text__2__ here what'
95 ])
96 total_lines, matched_offsets = \
97 search_utils.get_matching_line_offsets(words, terms=None,
98 markers=['__1__(.*?)__2__'])
99 assert total_lines == 6
100 assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}
@@ -407,30 +407,75 b' self: super: {'
407 };
407 };
408 };
408 };
409 "elasticsearch" = super.buildPythonPackage {
409 "elasticsearch" = super.buildPythonPackage {
410 name = "elasticsearch-2.3.0";
410 name = "elasticsearch-6.3.1";
411 doCheck = false;
411 doCheck = false;
412 propagatedBuildInputs = [
412 propagatedBuildInputs = [
413 self."urllib3"
413 self."urllib3"
414 ];
414 ];
415 src = fetchurl {
415 src = fetchurl {
416 url = "https://files.pythonhosted.org/packages/10/35/5fd52c5f0b0ee405ed4b5195e8bce44c5e041787680dc7b94b8071cac600/elasticsearch-2.3.0.tar.gz";
416 url = "https://files.pythonhosted.org/packages/9d/ce/c4664e8380e379a9402ecfbaf158e56396da90d520daba21cfa840e0eb71/elasticsearch-6.3.1.tar.gz";
417 sha256 = "10ad2dk73xsys9vajwsncibs69asa63w1hgwz6lz1prjpyi80c5y";
417 sha256 = "12y93v0yn7a4xmf969239g8gb3l4cdkclfpbk1qc8hx5qkymrnma";
418 };
418 };
419 meta = {
419 meta = {
420 license = [ pkgs.lib.licenses.asl20 ];
420 license = [ pkgs.lib.licenses.asl20 ];
421 };
421 };
422 };
422 };
423 "elasticsearch-dsl" = super.buildPythonPackage {
423 "elasticsearch-dsl" = super.buildPythonPackage {
424 name = "elasticsearch-dsl-2.2.0";
424 name = "elasticsearch-dsl-6.3.1";
425 doCheck = false;
425 doCheck = false;
426 propagatedBuildInputs = [
426 propagatedBuildInputs = [
427 self."six"
427 self."six"
428 self."python-dateutil"
428 self."python-dateutil"
429 self."elasticsearch"
429 self."elasticsearch"
430 self."ipaddress"
431 ];
432 src = fetchurl {
433 url = "https://files.pythonhosted.org/packages/4c/0d/1549f50c591db6bb4e66cbcc8d34a6e537c3d89aa426b167c244fd46420a/elasticsearch-dsl-6.3.1.tar.gz";
434 sha256 = "1gh8a0shqi105k325hgwb9avrpdjh0mc6mxwfg9ba7g6lssb702z";
435 };
436 meta = {
437 license = [ pkgs.lib.licenses.asl20 ];
438 };
439 };
440 "elasticsearch1" = super.buildPythonPackage {
441 name = "elasticsearch1-1.10.0";
442 doCheck = false;
443 propagatedBuildInputs = [
444 self."urllib3"
430 ];
445 ];
431 src = fetchurl {
446 src = fetchurl {
432 url = "https://files.pythonhosted.org/packages/66/2f/52a086968788e58461641570f45c3207a52d46ebbe9b77dc22b6a8ffda66/elasticsearch-dsl-2.2.0.tar.gz";
447 url = "https://files.pythonhosted.org/packages/a6/eb/73e75f9681fa71e3157b8ee878534235d57f24ee64f0e77f8d995fb57076/elasticsearch1-1.10.0.tar.gz";
433 sha256 = "1g4kxzxsdwlsl2a9kscmx11pafgimhj7y8wrfksv8pgvpkfb9fwr";
448 sha256 = "0g89444kd5zwql4vbvyrmi2m6l6dcj6ga98j4hqxyyyz6z20aki2";
449 };
450 meta = {
451 license = [ pkgs.lib.licenses.asl20 ];
452 };
453 };
454 "elasticsearch1-dsl" = super.buildPythonPackage {
455 name = "elasticsearch1-dsl-0.0.12";
456 doCheck = false;
457 propagatedBuildInputs = [
458 self."six"
459 self."python-dateutil"
460 self."elasticsearch1"
461 ];
462 src = fetchurl {
463 url = "https://files.pythonhosted.org/packages/eb/9d/785342775cb10eddc9b8d7457d618a423b4f0b89d8b2b2d1bc27190d71db/elasticsearch1-dsl-0.0.12.tar.gz";
464 sha256 = "0ig1ly39v93hba0z975wnhbmzwj28w6w1sqlr2g7cn5spp732bhk";
465 };
466 meta = {
467 license = [ pkgs.lib.licenses.asl20 ];
468 };
469 };
470 "elasticsearch2" = super.buildPythonPackage {
471 name = "elasticsearch2-2.5.0";
472 doCheck = false;
473 propagatedBuildInputs = [
474 self."urllib3"
475 ];
476 src = fetchurl {
477 url = "https://files.pythonhosted.org/packages/84/77/63cf63d4ba11d913b5278406f2a37b0712bec6fc85edfb6151a33eaeba25/elasticsearch2-2.5.0.tar.gz";
478 sha256 = "0ky0q16lbvz022yv6q3pix7aamf026p1y994537ccjf0p0dxnbxr";
434 };
479 };
435 meta = {
480 meta = {
436 license = [ pkgs.lib.licenses.asl20 ];
481 license = [ pkgs.lib.licenses.asl20 ];
@@ -818,11 +863,11 b' self: super: {'
818 };
863 };
819 };
864 };
820 "markupsafe" = super.buildPythonPackage {
865 "markupsafe" = super.buildPythonPackage {
821 name = "markupsafe-1.0";
866 name = "markupsafe-1.1.0";
822 doCheck = false;
867 doCheck = false;
823 src = fetchurl {
868 src = fetchurl {
824 url = "https://files.pythonhosted.org/packages/4d/de/32d741db316d8fdb7680822dd37001ef7a448255de9699ab4bfcbdf4172b/MarkupSafe-1.0.tar.gz";
869 url = "https://files.pythonhosted.org/packages/ac/7e/1b4c2e05809a4414ebce0892fe1e32c14ace86ca7d50c70f00979ca9b3a3/MarkupSafe-1.1.0.tar.gz";
825 sha256 = "0rdn1s8x9ni7ss8rfiacj7x1085lx8mh2zdwqslnw8xc3l4nkgm6";
870 sha256 = "1lxirjypbdd3l9jl4vliilhfnhy7c7f2vlldqg1b0i74khn375sf";
826 };
871 };
827 meta = {
872 meta = {
828 license = [ pkgs.lib.licenses.bsdOriginal ];
873 license = [ pkgs.lib.licenses.bsdOriginal ];
@@ -1271,11 +1316,11 b' self: super: {'
1271 };
1316 };
1272 };
1317 };
1273 "pyparsing" = super.buildPythonPackage {
1318 "pyparsing" = super.buildPythonPackage {
1274 name = "pyparsing-1.5.7";
1319 name = "pyparsing-2.3.0";
1275 doCheck = false;
1320 doCheck = false;
1276 src = fetchurl {
1321 src = fetchurl {
1277 url = "https://files.pythonhosted.org/packages/6f/2c/47457771c02a8ff0f302b695e094ec309e30452232bd79198ee94fda689f/pyparsing-1.5.7.tar.gz";
1322 url = "https://files.pythonhosted.org/packages/d0/09/3e6a5eeb6e04467b737d55f8bba15247ac0876f98fae659e58cd744430c6/pyparsing-2.3.0.tar.gz";
1278 sha256 = "17z7ws076z977sclj628fvwrp8y9j2rvdjcsq42v129n1gwi8vk4";
1323 sha256 = "14k5v7n3xqw8kzf42x06bzp184spnlkya2dpjyflax6l3yrallzk";
1279 };
1324 };
1280 meta = {
1325 meta = {
1281 license = [ pkgs.lib.licenses.mit ];
1326 license = [ pkgs.lib.licenses.mit ];
@@ -1642,7 +1687,7 b' self: super: {'
1642 };
1687 };
1643 };
1688 };
1644 "rhodecode-enterprise-ce" = super.buildPythonPackage {
1689 "rhodecode-enterprise-ce" = super.buildPythonPackage {
1645 name = "rhodecode-enterprise-ce-4.15.0";
1690 name = "rhodecode-enterprise-ce-4.16.0";
1646 buildInputs = [
1691 buildInputs = [
1647 self."pytest"
1692 self."pytest"
1648 self."py"
1693 self."py"
@@ -1788,7 +1833,7 b' self: super: {'
1788 };
1833 };
1789 };
1834 };
1790 "rhodecode-tools" = super.buildPythonPackage {
1835 "rhodecode-tools" = super.buildPythonPackage {
1791 name = "rhodecode-tools-1.0.1";
1836 name = "rhodecode-tools-1.1.0";
1792 doCheck = false;
1837 doCheck = false;
1793 propagatedBuildInputs = [
1838 propagatedBuildInputs = [
1794 self."click"
1839 self."click"
@@ -1797,14 +1842,16 b' self: super: {'
1797 self."mako"
1842 self."mako"
1798 self."markupsafe"
1843 self."markupsafe"
1799 self."requests"
1844 self."requests"
1800 self."elasticsearch"
1801 self."elasticsearch-dsl"
1802 self."urllib3"
1845 self."urllib3"
1803 self."whoosh"
1846 self."whoosh"
1847 self."elasticsearch"
1848 self."elasticsearch-dsl"
1849 self."elasticsearch2"
1850 self."elasticsearch1-dsl"
1804 ];
1851 ];
1805 src = fetchurl {
1852 src = fetchurl {
1806 url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b";
1853 url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626";
1807 sha256 = "0nr300s4sg685qs4wgbwlplwriawrwi6jq79z37frcnpyc89gpvm";
1854 sha256 = "1wbnnfrzyp0d4ys55vj5vnfrzfhwlqgdhc8yv8i6kwinizf8hfrn";
1808 };
1855 };
1809 meta = {
1856 meta = {
1810 license = [ { fullName = "Apache 2.0 and Proprietary"; } ];
1857 license = [ { fullName = "Apache 2.0 and Proprietary"; } ];
@@ -1848,11 +1895,11 b' self: super: {'
1848 };
1895 };
1849 };
1896 };
1850 "setuptools" = super.buildPythonPackage {
1897 "setuptools" = super.buildPythonPackage {
1851 name = "setuptools-40.6.2";
1898 name = "setuptools-40.6.3";
1852 doCheck = false;
1899 doCheck = false;
1853 src = fetchurl {
1900 src = fetchurl {
1854 url = "https://files.pythonhosted.org/packages/b0/d1/8acb42f391cba52e35b131e442e80deffbb8d0676b93261d761b1f0ef8fb/setuptools-40.6.2.zip";
1901 url = "https://files.pythonhosted.org/packages/37/1b/b25507861991beeade31473868463dad0e58b1978c209de27384ae541b0b/setuptools-40.6.3.zip";
1855 sha256 = "0r2c5hapirlzm34h7pl1lgkm6gk7bcrlrdj28qgsvaqg3f74vfw6";
1902 sha256 = "1y085dnk574sxw9aymdng9gijvrsbw86hsv9hqnhv7y4d6nlsirv";
1856 };
1903 };
1857 meta = {
1904 meta = {
1858 license = [ pkgs.lib.licenses.mit ];
1905 license = [ pkgs.lib.licenses.mit ];
@@ -2043,11 +2090,11 b' self: super: {'
2043 };
2090 };
2044 };
2091 };
2045 "urllib3" = super.buildPythonPackage {
2092 "urllib3" = super.buildPythonPackage {
2046 name = "urllib3-1.21";
2093 name = "urllib3-1.24.1";
2047 doCheck = false;
2094 doCheck = false;
2048 src = fetchurl {
2095 src = fetchurl {
2049 url = "https://files.pythonhosted.org/packages/34/95/7b28259d0006ed681c424cd71a668363265eac92b67dddd018eb9a22bff8/urllib3-1.21.tar.gz";
2096 url = "https://files.pythonhosted.org/packages/b1/53/37d82ab391393565f2f831b8eedbffd57db5a718216f82f1a8b4d381a1c1/urllib3-1.24.1.tar.gz";
2050 sha256 = "0irnj4wvh2y36s4q3l2vas9qr9m766w6w418nb490j3mf8a8zw6h";
2097 sha256 = "08lwd9f3hqznyf32vnzwvp87pchx062nkbgyrf67rwlkgj0jk5fy";
2051 };
2098 };
2052 meta = {
2099 meta = {
2053 license = [ pkgs.lib.licenses.mit ];
2100 license = [ pkgs.lib.licenses.mit ];
@@ -36,7 +36,7 b' kombu==4.2.0'
36 lxml==4.2.5
36 lxml==4.2.5
37 mako==1.0.7
37 mako==1.0.7
38 markdown==2.6.11
38 markdown==2.6.11
39 markupsafe==1.0.0
39 markupsafe==1.1.0
40 msgpack-python==0.5.6
40 msgpack-python==0.5.6
41 pyotp==2.2.7
41 pyotp==2.2.7
42 packaging==15.2
42 packaging==15.2
@@ -51,7 +51,7 b' pycrypto==2.6.1'
51 pycurl==7.43.0.2
51 pycurl==7.43.0.2
52 pyflakes==0.8.1
52 pyflakes==0.8.1
53 pygments==2.3.0
53 pygments==2.3.0
54 pyparsing==1.5.7
54 pyparsing==2.3.0
55 pyramid-beaker==0.8
55 pyramid-beaker==0.8
56 pyramid-debugtoolbar==4.4.0
56 pyramid-debugtoolbar==4.4.0
57 pyramid-jinja2==2.7
57 pyramid-jinja2==2.7
@@ -79,7 +79,7 b' subprocess32==3.5.2'
79 supervisor==3.3.4
79 supervisor==3.3.4
80 tempita==0.5.2
80 tempita==0.5.2
81 translationstring==1.3
81 translationstring==1.3
82 urllib3==1.21
82 urllib3==1.24.1
83 urlobject==2.4.3
83 urlobject==2.4.3
84 venusian==1.1.0
84 venusian==1.1.0
85 weberror==0.10.3
85 weberror==0.10.3
@@ -123,7 +123,7 b' ipdb==0.11.0'
123 ipython==5.1.0
123 ipython==5.1.0
124
124
125 ## rhodecode-tools, special case
125 ## rhodecode-tools, special case
126 https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b#egg=rhodecode-tools==1.0.1
126 https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626#egg=rhodecode-tools==1.1.0
127
127
128 ## appenlight
128 ## appenlight
129 appenlight-client==0.6.26
129 appenlight-client==0.6.26
@@ -666,8 +666,8 b' class AdminSettingsView(BaseAppView):'
666 c = self.load_default_context()
666 c = self.load_default_context()
667 c.active = 'search'
667 c.active = 'search'
668
668
669 searcher = searcher_from_config(self.request.registry.settings)
669 c.searcher = searcher_from_config(self.request.registry.settings)
670 c.statistics = searcher.statistics(self.request.translate)
670 c.statistics = c.searcher.statistics(self.request.translate)
671
671
672 return self._get_template_context(c)
672 return self._get_template_context(c)
673
673
@@ -246,9 +246,9 b' class HomeView(BaseAppView):'
246 }
246 }
247 for obj in acl_iter]
247 for obj in acl_iter]
248
248
249 def _get_hash_commit_list(self, auth_user, query):
249 def _get_hash_commit_list(self, auth_user, searcher, query):
250 org_query = query
250 org_query = query
251 if not query or len(query) < 3:
251 if not query or len(query) < 3 or not searcher:
252 return []
252 return []
253
253
254 commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)
254 commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)
@@ -257,9 +257,8 b' class HomeView(BaseAppView):'
257 return []
257 return []
258 commit_hash = commit_hashes[0]
258 commit_hash = commit_hashes[0]
259
259
260 searcher = searcher_from_config(self.request.registry.settings)
261 result = searcher.search(
260 result = searcher.search(
262 'commit_id:%s*' % commit_hash, 'commit', auth_user,
261 'commit_id:{}*'.format(commit_hash), 'commit', auth_user,
263 raise_on_exc=False)
262 raise_on_exc=False)
264
263
265 return [
264 return [
@@ -303,6 +302,84 b' class HomeView(BaseAppView):'
303 }
302 }
304 return data
303 return data
305
304
305 def _get_default_search_queries(self, search_context, searcher, query):
306 if not searcher:
307 return []
308 is_es_6 = searcher.is_es_6
309
310 queries = []
311 repo_group_name, repo_name, repo_context = None, None, None
312
313 # repo group context
314 if search_context.get('search_context[repo_group_name]'):
315 repo_group_name = search_context.get('search_context[repo_group_name]')
316 if search_context.get('search_context[repo_name]'):
317 repo_name = search_context.get('search_context[repo_name]')
318 repo_context = search_context.get('search_context[repo_view_type]')
319
320 if is_es_6 and repo_name:
321 def query_modifier():
322 qry = '{} repo_name.raw:{} '.format(
323 query, searcher.escape_specials(repo_name))
324 return {'q': qry, 'type': 'content'}
325 label = u'Search for `{}` through files in this repository.'.format(query)
326 queries.append(
327 {
328 'id': -10,
329 'value': query,
330 'value_display': label,
331 'type': 'search',
332 'url': h.route_path(
333 'search_repo', repo_name=repo_name, _query=query_modifier())
334 }
335 )
336
337 def query_modifier():
338 qry = '{} repo_name.raw:{} '.format(
339 query, searcher.escape_specials(repo_name))
340 return {'q': qry, 'type': 'commit'}
341 label = u'Search for `{}` through commits in this repository.'.format(query)
342 queries.append(
343 {
344 'id': -10,
345 'value': query,
346 'value_display': label,
347 'type': 'search',
348 'url': h.route_path(
349 'search_repo', repo_name=repo_name, _query=query_modifier())
350 }
351 )
352
353 elif is_es_6 and repo_group_name:
354 def query_modifier():
355 qry = '{} repo_name.raw:{} '.format(
356 query, searcher.escape_specials(repo_group_name + '/*'))
357 return {'q': qry, 'type': 'content'}
358 label = u'Search for `{}` through files in this repository group'.format(query)
359 queries.append(
360 {
361 'id': -20,
362 'value': query,
363 'value_display': label,
364 'type': 'search',
365 'url': h.route_path('search', _query=query_modifier())
366 }
367 )
368
369 if not queries:
370 queries.append(
371 {
372 'id': -1,
373 'value': query,
374 'value_display': u'Search for: `{}`'.format(query),
375 'type': 'search',
376 'url': h.route_path('search',
377 _query={'q': query, 'type': 'content'})
378 }
379 )
380
381 return queries
382
306 @LoginRequired()
383 @LoginRequired()
307 @view_config(
384 @view_config(
308 route_name='goto_switcher_data', request_method='GET',
385 route_name='goto_switcher_data', request_method='GET',
@@ -315,26 +392,21 b' class HomeView(BaseAppView):'
315 query = self.request.GET.get('query')
392 query = self.request.GET.get('query')
316 log.debug('generating main filter data, query %s', query)
393 log.debug('generating main filter data, query %s', query)
317
394
318 default_search_val = u'Full text search for: `{}`'.format(query)
319 res = []
395 res = []
320 if not query:
396 if not query:
321 return {'suggestions': res}
397 return {'suggestions': res}
322
398
323 res.append({
399 searcher = searcher_from_config(self.request.registry.settings)
324 'id': -1,
400 for _q in self._get_default_search_queries(self.request.GET, searcher, query):
325 'value': query,
401 res.append(_q)
326 'value_display': default_search_val,
402
327 'type': 'search',
403 repo_group_id = safe_int(self.request.GET.get('search_context[repo_group_id]'))
328 'url': h.route_path(
329 'search', _query={'q': query})
330 })
331 repo_group_id = safe_int(self.request.GET.get('repo_group_id'))
332 if repo_group_id:
404 if repo_group_id:
333 repo_group = RepoGroup.get(repo_group_id)
405 repo_group = RepoGroup.get(repo_group_id)
334 composed_hint = '{}/{}'.format(repo_group.group_name, query)
406 composed_hint = '{}/{}'.format(repo_group.group_name, query)
335 show_hint = not query.startswith(repo_group.group_name)
407 show_hint = not query.startswith(repo_group.group_name)
336 if repo_group and show_hint:
408 if repo_group and show_hint:
337 hint = u'Group search: `{}`'.format(composed_hint)
409 hint = u'Repository search inside: `{}`'.format(composed_hint)
338 res.append({
410 res.append({
339 'id': -1,
411 'id': -1,
340 'value': composed_hint,
412 'value': composed_hint,
@@ -351,7 +423,7 b' class HomeView(BaseAppView):'
351 for serialized_repo in repos:
423 for serialized_repo in repos:
352 res.append(serialized_repo)
424 res.append(serialized_repo)
353
425
354 # TODO(marcink): permissions for that ?
426 # TODO(marcink): should all logged in users be allowed to search others?
355 allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER
427 allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER
356 if allowed_user_search:
428 if allowed_user_search:
357 users = self._get_user_list(query)
429 users = self._get_user_list(query)
@@ -362,7 +434,7 b' class HomeView(BaseAppView):'
362 for serialized_user_group in user_groups:
434 for serialized_user_group in user_groups:
363 res.append(serialized_user_group)
435 res.append(serialized_user_group)
364
436
365 commits = self._get_hash_commit_list(c.auth_user, query)
437 commits = self._get_hash_commit_list(c.auth_user, searcher, query)
366 if commits:
438 if commits:
367 unique_repos = collections.OrderedDict()
439 unique_repos = collections.OrderedDict()
368 for commit in commits:
440 for commit in commits:
@@ -45,11 +45,14 b' def search(request, tmpl_context, repo_n'
45 errors = []
45 errors = []
46 try:
46 try:
47 search_params = schema.deserialize(
47 search_params = schema.deserialize(
48 dict(search_query=request.GET.get('q'),
48 dict(
49 search_type=request.GET.get('type'),
49 search_query=request.GET.get('q'),
50 search_sort=request.GET.get('sort'),
50 search_type=request.GET.get('type'),
51 page_limit=request.GET.get('page_limit'),
51 search_sort=request.GET.get('sort'),
52 requested_page=request.GET.get('page'))
52 search_max_lines=request.GET.get('max_lines'),
53 page_limit=request.GET.get('page_limit'),
54 requested_page=request.GET.get('page'),
55 )
53 )
56 )
54 except validation_schema.Invalid as e:
57 except validation_schema.Invalid as e:
55 errors = e.children
58 errors = e.children
@@ -57,12 +60,13 b' def search(request, tmpl_context, repo_n'
57 def url_generator(**kw):
60 def url_generator(**kw):
58 q = urllib.quote(safe_str(search_query))
61 q = urllib.quote(safe_str(search_query))
59 return update_params(
62 return update_params(
60 "?q=%s&type=%s" % (q, safe_str(search_type)), **kw)
63 "?q=%s&type=%s&max_lines=%s" % (q, safe_str(search_type), search_max_lines), **kw)
61
64
62 c = tmpl_context
65 c = tmpl_context
63 search_query = search_params.get('search_query')
66 search_query = search_params.get('search_query')
64 search_type = search_params.get('search_type')
67 search_type = search_params.get('search_type')
65 search_sort = search_params.get('search_sort')
68 search_sort = search_params.get('search_sort')
69 search_max_lines = search_params.get('search_max_lines')
66 if search_params.get('search_query'):
70 if search_params.get('search_query'):
67 page_limit = search_params['page_limit']
71 page_limit = search_params['page_limit']
68 requested_page = search_params['requested_page']
72 requested_page = search_params['requested_page']
@@ -48,7 +48,6 b' import bleach'
48 from datetime import datetime
48 from datetime import datetime
49 from functools import partial
49 from functools import partial
50 from pygments.formatters.html import HtmlFormatter
50 from pygments.formatters.html import HtmlFormatter
51 from pygments import highlight as code_highlight
52 from pygments.lexers import (
51 from pygments.lexers import (
53 get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)
52 get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)
54
53
@@ -81,12 +80,14 b' from rhodecode.lib.utils2 import str2boo'
81 from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links
80 from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links
82 from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError
81 from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError
83 from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit
82 from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit
83 from rhodecode.lib.index.search_utils import get_matching_line_offsets
84 from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT
84 from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT
85 from rhodecode.model.changeset_status import ChangesetStatusModel
85 from rhodecode.model.changeset_status import ChangesetStatusModel
86 from rhodecode.model.db import Permission, User, Repository
86 from rhodecode.model.db import Permission, User, Repository
87 from rhodecode.model.repo_group import RepoGroupModel
87 from rhodecode.model.repo_group import RepoGroupModel
88 from rhodecode.model.settings import IssueTrackerSettingsModel
88 from rhodecode.model.settings import IssueTrackerSettingsModel
89
89
90
90 log = logging.getLogger(__name__)
91 log = logging.getLogger(__name__)
91
92
92
93
@@ -260,6 +261,21 b' def files_breadcrumbs(repo_name, commit_'
260 return literal('/'.join(url_segments))
261 return literal('/'.join(url_segments))
261
262
262
263
264 def code_highlight(code, lexer, formatter, use_hl_filter=False):
265 """
266 Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
267
268 If ``outfile`` is given and a valid file object (an object
269 with a ``write`` method), the result will be written to it, otherwise
270 it is returned as a string.
271 """
272 if use_hl_filter:
273 # add HL filter
274 from rhodecode.lib.index import search_utils
275 lexer.add_filter(search_utils.ElasticSearchHLFilter())
276 return pygments.format(pygments.lex(code, lexer), formatter)
277
278
263 class CodeHtmlFormatter(HtmlFormatter):
279 class CodeHtmlFormatter(HtmlFormatter):
264 """
280 """
265 My code Html Formatter for source codes
281 My code Html Formatter for source codes
@@ -386,110 +402,9 b' class SearchContentCodeHtmlFormatter(Cod'
386
402
387 current_line_number += 1
403 current_line_number += 1
388
404
389
390 yield 0, '</table>'
405 yield 0, '</table>'
391
406
392
407
393 def extract_phrases(text_query):
394 """
395 Extracts phrases from search term string making sure phrases
396 contained in double quotes are kept together - and discarding empty values
397 or fully whitespace values eg.
398
399 'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
400
401 """
402
403 in_phrase = False
404 buf = ''
405 phrases = []
406 for char in text_query:
407 if in_phrase:
408 if char == '"': # end phrase
409 phrases.append(buf)
410 buf = ''
411 in_phrase = False
412 continue
413 else:
414 buf += char
415 continue
416 else:
417 if char == '"': # start phrase
418 in_phrase = True
419 phrases.append(buf)
420 buf = ''
421 continue
422 elif char == ' ':
423 phrases.append(buf)
424 buf = ''
425 continue
426 else:
427 buf += char
428
429 phrases.append(buf)
430 phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
431 return phrases
432
433
434 def get_matching_offsets(text, phrases):
435 """
436 Returns a list of string offsets in `text` that the list of `terms` match
437
438 >>> get_matching_offsets('some text here', ['some', 'here'])
439 [(0, 4), (10, 14)]
440
441 """
442 offsets = []
443 for phrase in phrases:
444 for match in re.finditer(phrase, text):
445 offsets.append((match.start(), match.end()))
446
447 return offsets
448
449
450 def normalize_text_for_matching(x):
451 """
452 Replaces all non alnum characters to spaces and lower cases the string,
453 useful for comparing two text strings without punctuation
454 """
455 return re.sub(r'[^\w]', ' ', x.lower())
456
457
458 def get_matching_line_offsets(lines, terms):
459 """ Return a set of `lines` indices (starting from 1) matching a
460 text search query, along with `context` lines above/below matching lines
461
462 :param lines: list of strings representing lines
463 :param terms: search term string to match in lines eg. 'some text'
464 :param context: number of lines above/below a matching line to add to result
465 :param max_lines: cut off for lines of interest
466 eg.
467
468 text = '''
469 words words words
470 words words words
471 some text some
472 words words words
473 words words words
474 text here what
475 '''
476 get_matching_line_offsets(text, 'text', context=1)
477 {3: [(5, 9)], 6: [(0, 4)]]
478
479 """
480 matching_lines = {}
481 phrases = [normalize_text_for_matching(phrase)
482 for phrase in extract_phrases(terms)]
483
484 for line_index, line in enumerate(lines, start=1):
485 match_offsets = get_matching_offsets(
486 normalize_text_for_matching(line), phrases)
487 if match_offsets:
488 matching_lines[line_index] = match_offsets
489
490 return matching_lines
491
492
493 def hsv_to_rgb(h, s, v):
408 def hsv_to_rgb(h, s, v):
494 """ Convert hsv color values to rgb """
409 """ Convert hsv color values to rgb """
495
410
@@ -1904,25 +1819,6 b' def journal_filter_help(request):'
1904 ).format(actions=actions)
1819 ).format(actions=actions)
1905
1820
1906
1821
1907 def search_filter_help(searcher, request):
1908 _ = request.translate
1909
1910 terms = ''
1911 return _(
1912 'Example filter terms for `{searcher}` search:\n' +
1913 '{terms}\n' +
1914 'Generate wildcards using \'*\' character:\n' +
1915 ' "repo_name:vcs*" - search everything starting with \'vcs\'\n' +
1916 ' "repo_name:*vcs*" - search for repository containing \'vcs\'\n' +
1917 '\n' +
1918 'Optional AND / OR operators in queries\n' +
1919 ' "repo_name:vcs OR repo_name:test"\n' +
1920 ' "owner:test AND repo_name:test*"\n' +
1921 'More: {search_doc}'
1922 ).format(searcher=searcher.name,
1923 terms=terms, search_doc=searcher.query_lang_doc)
1924
1925
1926 def not_mapped_error(repo_name):
1822 def not_mapped_error(repo_name):
1927 from rhodecode.translation import _
1823 from rhodecode.translation import _
1928 flash(_('%s repository is not mapped to db perhaps'
1824 flash(_('%s repository is not mapped to db perhaps'
@@ -2107,3 +2003,15 b' def go_import_header(request, db_repo=No'
2107 def reviewer_as_json(*args, **kwargs):
2003 def reviewer_as_json(*args, **kwargs):
2108 from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json
2004 from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json
2109 return _reviewer_as_json(*args, **kwargs)
2005 return _reviewer_as_json(*args, **kwargs)
2006
2007
2008 def get_repo_view_type(request):
2009 route_name = request.matched_route.name
2010 route_to_view_type = {
2011 'repo_changelog': 'changelog',
2012 'repo_files': 'files',
2013 'repo_summary': 'summary',
2014 'repo_commit': 'commit'
2015
2016 }
2017 return route_to_view_type.get(route_name)
@@ -25,15 +25,27 b' Index schema for RhodeCode'
25 import importlib
25 import importlib
26 import logging
26 import logging
27
27
28 from rhodecode.lib.index.search_utils import normalize_text_for_matching
29
28 log = logging.getLogger(__name__)
30 log = logging.getLogger(__name__)
29
31
30 # leave defaults for backward compat
32 # leave defaults for backward compat
31 default_searcher = 'rhodecode.lib.index.whoosh'
33 default_searcher = 'rhodecode.lib.index.whoosh'
32 default_location = '%(here)s/data/index'
34 default_location = '%(here)s/data/index'
33
35
36 ES_VERSION_2 = '2'
37 ES_VERSION_6 = '6'
38 # for legacy reasons we keep 2 compat as default
39 DEFAULT_ES_VERSION = ES_VERSION_2
34
40
35 class BaseSearch(object):
41 from rhodecode_tools.lib.fts_index.elasticsearch_engine_6 import \
42 ES_CONFIG # pragma: no cover
43
44
45 class BaseSearcher(object):
36 query_lang_doc = ''
46 query_lang_doc = ''
47 es_version = None
48 name = None
37
49
38 def __init__(self):
50 def __init__(self):
39 pass
51 pass
@@ -45,15 +57,42 b' class BaseSearch(object):'
45 raise_on_exc=True):
57 raise_on_exc=True):
46 raise Exception('NotImplemented')
58 raise Exception('NotImplemented')
47
59
60 @staticmethod
61 def query_to_mark(query, default_field=None):
62 """
63 Formats the query to mark token for jquery.mark.js highlighting. ES could
64 have a different format optionally.
48
65
49 def searcher_from_config(config, prefix='search.'):
66 :param default_field:
67 :param query:
68 """
69 return ' '.join(normalize_text_for_matching(query).split())
70
71 @property
72 def is_es_6(self):
73 return self.es_version == ES_VERSION_6
74
75 def get_handlers(self):
76 return {}
77
78
79 def search_config(config, prefix='search.'):
50 _config = {}
80 _config = {}
51 for key in config.keys():
81 for key in config.keys():
52 if key.startswith(prefix):
82 if key.startswith(prefix):
53 _config[key[len(prefix):]] = config[key]
83 _config[key[len(prefix):]] = config[key]
84 return _config
85
86
87 def searcher_from_config(config, prefix='search.'):
88 _config = search_config(config, prefix)
54
89
55 if 'location' not in _config:
90 if 'location' not in _config:
56 _config['location'] = default_location
91 _config['location'] = default_location
92 if 'es_version' not in _config:
93 # use old legacy ES version set to 2
94 _config['es_version'] = '2'
95
57 imported = importlib.import_module(_config.get('module', default_searcher))
96 imported = importlib.import_module(_config.get('module', default_searcher))
58 searcher = imported.Search(config=_config)
97 searcher = imported.Searcher(config=_config)
59 return searcher
98 return searcher
@@ -33,7 +33,7 b' from whoosh.index import create_in, open'
33 from whoosh.qparser import QueryParser, QueryParserError
33 from whoosh.qparser import QueryParser, QueryParserError
34
34
35 import rhodecode.lib.helpers as h
35 import rhodecode.lib.helpers as h
36 from rhodecode.lib.index import BaseSearch
36 from rhodecode.lib.index import BaseSearcher
37 from rhodecode.lib.utils2 import safe_unicode
37 from rhodecode.lib.utils2 import safe_unicode
38
38
39 log = logging.getLogger(__name__)
39 log = logging.getLogger(__name__)
@@ -59,13 +59,13 b' FRAGMENTER = ContextFragmenter(200)'
59 log = logging.getLogger(__name__)
59 log = logging.getLogger(__name__)
60
60
61
61
62 class Search(BaseSearch):
62 class WhooshSearcher(BaseSearcher):
63 # this also shows in UI
63 # this also shows in UI
64 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
64 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
65 name = 'whoosh'
65 name = 'whoosh'
66
66
67 def __init__(self, config):
67 def __init__(self, config):
68 super(Search, self).__init__()
68 super(Searcher, self).__init__()
69 self.config = config
69 self.config = config
70 if not os.path.isdir(self.config['location']):
70 if not os.path.isdir(self.config['location']):
71 os.makedirs(self.config['location'])
71 os.makedirs(self.config['location'])
@@ -162,16 +162,17 b' class Search(BaseSearch):'
162 _ = translator
162 _ = translator
163 stats = [
163 stats = [
164 {'key': _('Index Type'), 'value': 'Whoosh'},
164 {'key': _('Index Type'), 'value': 'Whoosh'},
165 {'sep': True},
166
165 {'key': _('File Index'), 'value': str(self.file_index)},
167 {'key': _('File Index'), 'value': str(self.file_index)},
166 {'key': _('Indexed documents'),
168 {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
167 'value': self.file_index.doc_count()},
169 {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
168 {'key': _('Last update'),
170
169 'value': h.time_to_datetime(self.file_index.last_modified())},
171 {'sep': True},
172
170 {'key': _('Commit index'), 'value': str(self.commit_index)},
173 {'key': _('Commit index'), 'value': str(self.commit_index)},
171 {'key': _('Indexed documents'),
174 {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
172 'value': str(self.commit_index.doc_count())},
175 {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
173 {'key': _('Last update'),
174 'value': h.time_to_datetime(self.commit_index.last_modified())}
175 ]
176 ]
176 return stats
177 return stats
177
178
@@ -227,6 +228,9 b' class Search(BaseSearch):'
227 return self.searcher
228 return self.searcher
228
229
229
230
231 Searcher = WhooshSearcher
232
233
230 class WhooshResultWrapper(object):
234 class WhooshResultWrapper(object):
231 def __init__(self, search_type, total_hits, results):
235 def __init__(self, search_type, total_hits, results):
232 self.search_type = search_type
236 self.search_type = search_type
@@ -263,6 +267,8 b' class WhooshResultWrapper(object):'
263 # TODO: marcink: this feels like an overkill, there's a lot of data
267 # TODO: marcink: this feels like an overkill, there's a lot of data
264 # inside hit object, and we don't need all
268 # inside hit object, and we don't need all
265 res = dict(hit)
269 res = dict(hit)
270 # elastic search uses that, we set it empty so it fallbacks to regular HL logic
271 res['content_highlight'] = ''
266
272
267 f_path = '' # pragma: no cover
273 f_path = '' # pragma: no cover
268 if self.search_type in ['content', 'path']:
274 if self.search_type in ['content', 'path']:
@@ -1009,3 +1009,14 b' def glob2re(pat):'
1009 else:
1009 else:
1010 res = res + re.escape(c)
1010 res = res + re.escape(c)
1011 return res + '\Z(?ms)'
1011 return res + '\Z(?ms)'
1012
1013
1014 def parse_byte_string(size_str):
1015 match = re.match(r'(\d+)(MB|KB)', size_str, re.IGNORECASE)
1016 if not match:
1017 raise ValueError('Given size:%s is invalid, please make sure '
1018 'to use format of <num>(MB|KB)' % size_str)
1019
1020 _parts = match.groups()
1021 num, type_ = _parts
1022 return long(num) * {'mb': 1024*1024, 'kb': 1024}[type_.lower()]
@@ -58,7 +58,7 b' def author_name(author):'
58 to get the username
58 to get the username
59 """
59 """
60
60
61 if not author or not '@' in author:
61 if not author or '@' not in author:
62 return author
62 return author
63 else:
63 else:
64 return author.replace(author_email(author), '').replace('<', '')\
64 return author.replace(author_email(author), '').replace('<', '')\
@@ -34,6 +34,9 b' class SearchParamsSchema(colander.Mappin'
34 colander.String(),
34 colander.String(),
35 missing='newfirst',
35 missing='newfirst',
36 validator=colander.OneOf(['oldfirst', 'newfirst']))
36 validator=colander.OneOf(['oldfirst', 'newfirst']))
37 search_max_lines = colander.SchemaNode(
38 colander.Integer(),
39 missing=10)
37 page_limit = colander.SchemaNode(
40 page_limit = colander.SchemaNode(
38 colander.Integer(),
41 colander.Integer(),
39 missing=10,
42 missing=10,
@@ -572,6 +572,7 b' div.annotatediv { margin-left: 2px; marg'
572 .code-highlight, /* TODO: dan: merge codehilite into code-highlight */
572 .code-highlight, /* TODO: dan: merge codehilite into code-highlight */
573 /* This can be generated with `pygmentize -S default -f html` */
573 /* This can be generated with `pygmentize -S default -f html` */
574 .codehilite {
574 .codehilite {
575 .c-ElasticMatch { background-color: #faffa6; padding: 0.2em;}
575 .hll { background-color: #ffffcc }
576 .hll { background-color: #ffffcc }
576 .c { color: #408080; font-style: italic } /* Comment */
577 .c { color: #408080; font-style: italic } /* Comment */
577 .err, .codehilite .err { border: none } /* Error */
578 .err, .codehilite .err { border: none } /* Error */
@@ -640,6 +641,7 b' div.annotatediv { margin-left: 2px; marg'
640 .vi { color: #19177C } /* Name.Variable.Instance */
641 .vi { color: #19177C } /* Name.Variable.Instance */
641 .vm { color: #19177C } /* Name.Variable.Magic */
642 .vm { color: #19177C } /* Name.Variable.Magic */
642 .il { color: #666666 } /* Literal.Number.Integer.Long */
643 .il { color: #666666 } /* Literal.Number.Integer.Long */
644
643 }
645 }
644
646
645 /* customized pre blocks for markdown/rst */
647 /* customized pre blocks for markdown/rst */
@@ -166,7 +166,6 b' small,'
166
166
167 mark,
167 mark,
168 .mark {
168 .mark {
169 background-color: @rclightblue;
170 padding: .2em;
169 padding: .2em;
171 }
170 }
172
171
@@ -5,8 +5,13 b''
5 <div class="panel-body">
5 <div class="panel-body">
6 <dl class="dl-horizontal">
6 <dl class="dl-horizontal">
7 % for stat in c.statistics:
7 % for stat in c.statistics:
8 <dt>${stat['key']}</dt>
8 % if stat.get('sep'):
9 <dd>${stat['value']}</dd>
9 <dt></dt>
10 <dd>--</dd>
11 % else:
12 <dt>${stat['key']}</dt>
13 <dd>${stat['value']}</dd>
14 % endif
10 % endfor
15 % endfor
11 </dl>
16 </dl>
12 </div>
17 </div>
@@ -7,9 +7,12 b" go_import_header = ''"
7 if hasattr(c, 'rhodecode_db_repo'):
7 if hasattr(c, 'rhodecode_db_repo'):
8 c.template_context['repo_type'] = c.rhodecode_db_repo.repo_type
8 c.template_context['repo_type'] = c.rhodecode_db_repo.repo_type
9 c.template_context['repo_landing_commit'] = c.rhodecode_db_repo.landing_rev[1]
9 c.template_context['repo_landing_commit'] = c.rhodecode_db_repo.landing_rev[1]
10 ## check repo context
11 c.template_context['repo_view_type'] = h.get_repo_view_type(request)
10
12
11 if getattr(c, 'repo_group', None):
13 if getattr(c, 'repo_group', None):
12 c.template_context['repo_group_id'] = c.repo_group.group_id
14 c.template_context['repo_group_id'] = c.repo_group.group_id
15 c.template_context['repo_group_name'] = c.repo_group.group_name
13
16
14 if getattr(c, 'rhodecode_user', None) and c.rhodecode_user.user_id:
17 if getattr(c, 'rhodecode_user', None) and c.rhodecode_user.user_id:
15 c.template_context['rhodecode_user']['username'] = c.rhodecode_user.username
18 c.template_context['rhodecode_user']['username'] = c.rhodecode_user.username
@@ -23,6 +26,12 b" c.template_context['default_user'] = {"
23 'username': h.DEFAULT_USER,
26 'username': h.DEFAULT_USER,
24 'user_id': 1
27 'user_id': 1
25 }
28 }
29 c.template_context['search_context'] = {
30 'repo_group_id': c.template_context.get('repo_group_id'),
31 'repo_group_name': c.template_context.get('repo_group_name'),
32 'repo_name': c.template_context.get('repo_name'),
33 'repo_view_type': c.template_context.get('repo_view_type'),
34 }
26
35
27 %>
36 %>
28 <html xmlns="http://www.w3.org/1999/xhtml">
37 <html xmlns="http://www.w3.org/1999/xhtml">
@@ -18,10 +18,7 b''
18 %else:
18 %else:
19 ${_('Search inside all accessible repositories')}
19 ${_('Search inside all accessible repositories')}
20 %endif
20 %endif
21 %if c.cur_query:
21
22 &raquo;
23 ${c.cur_query}
24 %endif
25 </%def>
22 </%def>
26
23
27 <%def name="menu_bar_nav()">
24 <%def name="menu_bar_nav()">
@@ -59,7 +56,8 b''
59 <div class="fields">
56 <div class="fields">
60 ${h.text('q', c.cur_query, placeholder="Enter query...")}
57 ${h.text('q', c.cur_query, placeholder="Enter query...")}
61
58
62 ${h.select('type',c.search_type,[('content',_('File contents')), ('commit',_('Commit messages')), ('path',_('File names')),],id='id_search_type')}
59 ${h.select('type',c.search_type,[('content',_('Files')), ('path',_('File path')),('commit',_('Commits'))],id='id_search_type')}
60 ${h.hidden('max_lines', '10')}
63 <input type="submit" value="${_('Search')}" class="btn"/>
61 <input type="submit" value="${_('Search')}" class="btn"/>
64 <br/>
62 <br/>
65
63
@@ -72,8 +70,54 b''
72 </span>
70 </span>
73 % endfor
71 % endfor
74 <div class="field">
72 <div class="field">
75 <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Example Queries')}</p>
73 <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Query Langague examples')}</p>
76 <pre id="search-help" style="display: none">${h.tooltip(h.search_filter_help(c.searcher, request))}</pre>
74 <pre id="search-help" style="display: none">\
75
76 % if c.searcher.name == 'whoosh':
77 Example filter terms for `Whoosh` search:
78 query lang: <a href="${c.searcher.query_lang_doc}">Whoosh Query Language</a>
79 Whoosh has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
80
81 Generate wildcards using '*' character:
82 "repo_name:vcs*" - search everything starting with 'vcs'
83 "repo_name:*vcs*" - search for repository containing 'vcs'
84
85 Optional AND / OR operators in queries
86 "repo_name:vcs OR repo_name:test"
87 "owner:test AND repo_name:test*" AND extension:py
88
89 Move advanced search is available via ElasticSearch6 backend in EE edition.
90 % elif c.searcher.name == 'elasticsearch' and c.searcher.es_version == '2':
91 Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
92 ElasticSearch-2 has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
93
94 search type: content (File Content)
95 indexed fields: content
96
97 # search for `fix` string in all files
98 fix
99
100 search type: commit (Commit message)
101 indexed fields: message
102
103 search type: path (File name)
104 indexed fields: path
105
106 % else:
107 Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
108 query lang: <a href="${c.searcher.query_lang_doc}">ES 6 Query Language</a>
109 The reserved characters needed espace by `\`: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
110 % for handler in c.searcher.get_handlers().values():
111
112 search type: ${handler.search_type_label}
113 *indexed fields*: ${', '.join( [('\n ' if x[0]%4==0 else '')+x[1] for x in enumerate(handler.es_6_field_names)])}
114 % for entry in handler.es_6_example_queries:
115 ${entry.rstrip()}
116 % endfor
117 % endfor
118
119 % endif
120 </pre>
77 </div>
121 </div>
78
122
79 <div class="field">${c.runtime}</div>
123 <div class="field">${c.runtime}</div>
@@ -96,6 +140,7 b''
96 </div>
140 </div>
97 <script>
141 <script>
98 $(document).ready(function(){
142 $(document).ready(function(){
143 $('#q').autoGrowInput();
99 $("#id_search_type").select2({
144 $("#id_search_type").select2({
100 'containerCssClass': "drop-menu",
145 'containerCssClass': "drop-menu",
101 'dropdownCssClass': "drop-menu-dropdown",
146 'dropdownCssClass': "drop-menu-dropdown",
@@ -1,5 +1,7 b''
1 <%namespace name="base" file="/base/base.mako"/>
1 <%namespace name="base" file="/base/base.mako"/>
2
2
3 % if c.formatted_results:
4
3 <table class="rctable search-results">
5 <table class="rctable search-results">
4 <tr>
6 <tr>
5 <th>${_('Repository')}</th>
7 <th>${_('Repository')}</th>
@@ -50,14 +52,20 b''
50 </td>
52 </td>
51
53
52 <td class="td-user author">
54 <td class="td-user author">
53 ${base.gravatar_with_user(entry['author'])}
55 <%
56 ## es6 stores this as object
57 author = entry['author']
58 if isinstance(author, dict):
59 author = author['email']
60 %>
61 ${base.gravatar_with_user(author)}
54 </td>
62 </td>
55 </tr>
63 </tr>
56 % endif
64 % endif
57 %endfor
65 %endfor
58 </table>
66 </table>
59
67
60 %if c.cur_query and c.formatted_results:
68 %if c.cur_query:
61 <div class="pagination-wh pagination-left">
69 <div class="pagination-wh pagination-left">
62 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
70 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
63 </div>
71 </div>
@@ -79,4 +87,16 b''
79 target_expand.addClass('open');
87 target_expand.addClass('open');
80 }
88 }
81 });
89 });
90
91 $(".message.td-description").mark(
92 "${c.searcher.query_to_mark(c.cur_query, 'message')}",
93 {
94 "className": 'match',
95 "accuracy": "complementary",
96 "ignorePunctuation": ":._(){}[]!'+=".split("")
97 }
98 );
99
82 </script>
100 </script>
101
102 % endif
@@ -1,33 +1,10 b''
1 <%def name="highlight_text_file(terms, text, url, line_context=3,
2 max_lines=10,
3 mimetype=None, filepath=None)">
4 <%
5 lines = text.split('\n')
6 lines_of_interest = set()
7 matching_lines = h.get_matching_line_offsets(lines, terms)
8 shown_matching_lines = 0
9
1
10 for line_number in matching_lines:
2 <%def name="highlight_text_file(has_matched_content, file_content, lexer, html_formatter, matching_lines, shown_matching_lines, url, use_hl_filter)">
11 if len(lines_of_interest) < max_lines:
3 % if has_matched_content:
12 lines_of_interest |= set(range(
4 ${h.code_highlight(file_content, lexer, html_formatter, use_hl_filter=use_hl_filter)|n}
13 max(line_number - line_context, 0),
5 % else:
14 min(line_number + line_context, len(lines) + 1)))
6 ${_('No content matched')} <br/>
15 shown_matching_lines += 1
7 % endif
16
17 %>
18 ${h.code_highlight(
19 text,
20 h.get_lexer_safe(
21 mimetype=mimetype,
22 filepath=filepath,
23 ),
24 h.SearchContentCodeHtmlFormatter(
25 linenos=True,
26 cssclass="code-highlight",
27 url=url,
28 query_terms=terms,
29 only_line_numbers=lines_of_interest
30 ))|n}
31
8
32 %if len(matching_lines) > shown_matching_lines:
9 %if len(matching_lines) > shown_matching_lines:
33 <a href="${url}">
10 <a href="${url}">
@@ -37,12 +14,52 b' for line_number in matching_lines:'
37 </%def>
14 </%def>
38
15
39 <div class="search-results">
16 <div class="search-results">
17 <% query_mark = c.searcher.query_to_mark(c.cur_query, 'content') %>
18
40 %for entry in c.formatted_results:
19 %for entry in c.formatted_results:
20
21 <%
22 file_content = entry['content_highlight'] or entry['content']
23 mimetype = entry.get('mimetype')
24 filepath = entry.get('path')
25 max_lines = h.safe_int(request.GET.get('max_lines', '10'))
26 line_context = h.safe_int(request.GET.get('line_contenxt', '3'))
27
28 match_file_url=h.route_path('repo_files',repo_name=entry['repository'], commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path'], _query={"mark": query_mark})
29 terms = c.cur_query
30
31 if c.searcher.is_es_6:
32 # use empty terms so we default to markers usage
33 total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms=None)
34 else:
35 total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms)
36
37 shown_matching_lines = 0
38 lines_of_interest = set()
39 for line_number in matching_lines:
40 if len(lines_of_interest) < max_lines:
41 lines_of_interest |= set(range(
42 max(line_number - line_context, 0),
43 min(line_number + line_context, total_lines + 1)))
44 shown_matching_lines += 1
45 lexer = h.get_lexer_safe(mimetype=mimetype, filepath=filepath)
46
47 html_formatter = h.SearchContentCodeHtmlFormatter(
48 linenos=True,
49 cssclass="code-highlight",
50 url=match_file_url,
51 query_terms=terms,
52 only_line_numbers=lines_of_interest
53 )
54
55 has_matched_content = len(lines_of_interest) >= 1
56
57 %>
41 ## search results are additionally filtered, and this check is just a safe gate
58 ## search results are additionally filtered, and this check is just a safe gate
42 % if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):
59 % if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):
43 <div id="codeblock" class="codeblock">
60 <div id="codeblock" class="codeblock">
44 <div class="codeblock-header">
61 <div class="codeblock-header">
45 <h2>
62 <h1>
46 %if h.get_repo_type_by_name(entry.get('repository')) == 'hg':
63 %if h.get_repo_type_by_name(entry.get('repository')) == 'hg':
47 <i class="icon-hg"></i>
64 <i class="icon-hg"></i>
48 %elif h.get_repo_type_by_name(entry.get('repository')) == 'git':
65 %elif h.get_repo_type_by_name(entry.get('repository')) == 'git':
@@ -51,18 +68,39 b' for line_number in matching_lines:'
51 <i class="icon-svn"></i>
68 <i class="icon-svn"></i>
52 %endif
69 %endif
53 ${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}
70 ${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}
54 </h2>
71 </h1>
72
55 <div class="stats">
73 <div class="stats">
56 ${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}
74 <span class="stats-filename">
57 %if entry.get('lines'):
75 <strong>
58 | ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}
76 <i class="icon-file-text"></i>
59 %endif
77 ${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}
60 %if entry.get('size'):
78 </strong>
61 | ${h.format_byte_size_binary(entry['size'])}
79 </span>
62 %endif
80 <span class="item last"><i class="tooltip icon-clipboard clipboard-action" data-clipboard-text="${entry['f_path']}" title="${_('Copy the full path')}"></i></span>
63 %if entry.get('mimetype'):
81 <br/>
64 | ${entry.get('mimetype', "unknown mimetype")}
82 <span class="stats-first-item">
65 %endif
83 ${len(matching_lines)} ${_ungettext('search match', 'search matches', len(matching_lines))}
84 </span>
85
86 <span >
87 %if entry.get('lines'):
88 | ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}
89 %endif
90 </span>
91
92 <span>
93 %if entry.get('size'):
94 | ${h.format_byte_size_binary(entry['size'])}
95 %endif
96 </span>
97
98 <span>
99 %if entry.get('mimetype'):
100 | ${entry.get('mimetype', "unknown mimetype")}
101 %endif
102 </span>
103
66 </div>
104 </div>
67 <div class="buttons">
105 <div class="buttons">
68 <a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">
106 <a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">
@@ -74,10 +112,19 b' for line_number in matching_lines:'
74 </div>
112 </div>
75 </div>
113 </div>
76 <div class="code-body search-code-body">
114 <div class="code-body search-code-body">
77 ${highlight_text_file(c.cur_query, entry['content'],
115
78 url=h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']),
116 ${highlight_text_file(
79 mimetype=entry.get('mimetype'), filepath=entry.get('path'))}
117 has_matched_content=has_matched_content,
118 file_content=file_content,
119 lexer=lexer,
120 html_formatter=html_formatter,
121 matching_lines=matching_lines,
122 shown_matching_lines=shown_matching_lines,
123 url=match_file_url,
124 use_hl_filter=c.searcher.is_es_6
125 )}
80 </div>
126 </div>
127
81 </div>
128 </div>
82 % endif
129 % endif
83 %endfor
130 %endfor
@@ -91,10 +138,14 b' for line_number in matching_lines:'
91 %if c.cur_query:
138 %if c.cur_query:
92 <script type="text/javascript">
139 <script type="text/javascript">
93 $(function(){
140 $(function(){
94 $(".code").mark(
141 $(".search-code-body").mark(
95 '${' '.join(h.normalize_text_for_matching(c.cur_query).split())}',
142 "${query_mark}",
96 {"className": 'match',
143 {
97 });
144 "className": 'match',
145 "accuracy": "complementary",
146 "ignorePunctuation": ":._(){}[]!'+=".split("")
147 }
148 );
98 })
149 })
99 </script>
150 </script>
100 %endif No newline at end of file
151 %endif
@@ -1,3 +1,5 b''
1 % if c.formatted_results:
2
1 <table class="rctable search-results">
3 <table class="rctable search-results">
2 <tr>
4 <tr>
3 <th>${_('Repository')}</th>
5 <th>${_('Repository')}</th>
@@ -27,8 +29,10 b''
27 %endfor
29 %endfor
28 </table>
30 </table>
29
31
30 %if c.cur_query and c.formatted_results:
32 %if c.cur_query:
31 <div class="pagination-wh pagination-left">
33 <div class="pagination-wh pagination-left">
32 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
34 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
33 </div>
35 </div>
34 %endif No newline at end of file
36 %endif
37
38 % endif
@@ -208,44 +208,3 b' def test_get_visual_attr(baseapp):'
208 def test_chop_at(test_text, inclusive, expected_text):
208 def test_chop_at(test_text, inclusive, expected_text):
209 assert helpers.chop_at_smart(
209 assert helpers.chop_at_smart(
210 test_text, '\n', inclusive, '...') == expected_text
210 test_text, '\n', inclusive, '...') == expected_text
211
212
213 @pytest.mark.parametrize('test_text, expected_output', [
214 ('some text', ['some', 'text']),
215 ('some text', ['some', 'text']),
216 ('some text "with a phrase"', ['some', 'text', 'with a phrase']),
217 ('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
218 ('"justphrase"', ['justphrase']),
219 ('""', []),
220 ('', []),
221 (' ', []),
222 ('" "', []),
223 ])
224 def test_extract_phrases(test_text, expected_output):
225 assert helpers.extract_phrases(test_text) == expected_output
226
227
228 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
229 ('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
230 ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
231 ('irrelevant', ['not found'], []),
232 ('irrelevant', ['not found'], []),
233 ])
234 def test_get_matching_offsets(test_text, text_phrases, expected_output):
235 assert helpers.get_matching_offsets(
236 test_text, text_phrases) == expected_output
237
238
239 def test_normalize_text_for_matching():
240 assert helpers.normalize_text_for_matching(
241 'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
242
243
244 def test_get_matching_line_offsets():
245 assert helpers.get_matching_line_offsets([
246 'words words words',
247 'words words words',
248 'some text some',
249 'words words words',
250 'words words words',
251 'text here what'], 'text') == {3: [(5, 9)], 6: [(0, 4)]}
General Comments 0
You need to be logged in to leave comments. Login now