##// END OF EJS Templates
search: add support for elastic search 6...
dan -
r3319:b8fd1d7a default
parent child Browse files
Show More
@@ -0,0 +1,257 b''
1 # -*- coding: utf-8 -*-
2
3 # Copyright (C) 2012-2018 RhodeCode GmbH
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 import re
21
22 import pygments.filter
23 import pygments.filters
24 from pygments.token import Comment
25
26 HL_BEG_MARKER = '__RCSearchHLMarkBEG__'
27 HL_END_MARKER = '__RCSearchHLMarkEND__'
28 HL_MARKER_RE = '{}(.*?){}'.format(HL_BEG_MARKER, HL_END_MARKER)
29
30
31 class ElasticSearchHLFilter(pygments.filters.Filter):
32 _names = [HL_BEG_MARKER, HL_END_MARKER]
33
34 def __init__(self, **options):
35 pygments.filters.Filter.__init__(self, **options)
36
37 def filter(self, lexer, stream):
38 def tokenize(_value):
39 for token in re.split('({}|{})'.format(
40 self._names[0], self._names[1]), _value):
41 if token:
42 yield token
43
44 hl = False
45 for ttype, value in stream:
46
47 if self._names[0] in value or self._names[1] in value:
48 for item in tokenize(value):
49 if item == self._names[0]:
50 # skip marker, but start HL
51 hl = True
52 continue
53 elif item == self._names[1]:
54 hl = False
55 continue
56
57 if hl:
58 yield Comment.ElasticMatch, item
59 else:
60 yield ttype, item
61 else:
62 if hl:
63 yield Comment.ElasticMatch, value
64 else:
65 yield ttype, value
66
67
68 def extract_phrases(text_query):
69 """
70 Extracts phrases from search term string making sure phrases
71 contained in double quotes are kept together - and discarding empty values
72 or fully whitespace values eg.
73
74 'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
75
76 """
77
78 in_phrase = False
79 buf = ''
80 phrases = []
81 for char in text_query:
82 if in_phrase:
83 if char == '"': # end phrase
84 phrases.append(buf)
85 buf = ''
86 in_phrase = False
87 continue
88 else:
89 buf += char
90 continue
91 else:
92 if char == '"': # start phrase
93 in_phrase = True
94 phrases.append(buf)
95 buf = ''
96 continue
97 elif char == ' ':
98 phrases.append(buf)
99 buf = ''
100 continue
101 else:
102 buf += char
103
104 phrases.append(buf)
105 phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
106 return phrases
107
108
109 def get_matching_phrase_offsets(text, phrases):
110 """
111 Returns a list of string offsets in `text` that the list of `terms` match
112
113 >>> get_matching_phrase_offsets('some text here', ['some', 'here'])
114 [(0, 4), (10, 14)]
115
116 """
117 phrases = phrases or []
118 offsets = []
119
120 for phrase in phrases:
121 for match in re.finditer(phrase, text):
122 offsets.append((match.start(), match.end()))
123
124 return offsets
125
126
127 def get_matching_markers_offsets(text, markers=None):
128 """
129 Returns a list of string offsets in `text` that the are between matching markers
130
131 >>> get_matching_markers_offsets('$1some$2 text $1here$2 marked', ['\$1(.*?)\$2'])
132 [(0, 5), (16, 22)]
133
134 """
135 markers = markers or [HL_MARKER_RE]
136 offsets = []
137
138 if markers:
139 for mark in markers:
140 for match in re.finditer(mark, text):
141 offsets.append((match.start(), match.end()))
142
143 return offsets
144
145
146 def normalize_text_for_matching(x):
147 """
148 Replaces all non alfanum characters to spaces and lower cases the string,
149 useful for comparing two text strings without punctuation
150 """
151 return re.sub(r'[^\w]', ' ', x.lower())
152
153
154 def get_matching_line_offsets(lines, terms=None, markers=None):
155 """ Return a set of `lines` indices (starting from 1) matching a
156 text search query, along with `context` lines above/below matching lines
157
158 :param lines: list of strings representing lines
159 :param terms: search term string to match in lines eg. 'some text'
160 :param markers: instead of terms, use highlight markers instead that
161 mark beginning and end for matched item. eg. ['START(.*?)END']
162
163 eg.
164
165 text = '''
166 words words words
167 words words words
168 some text some
169 words words words
170 words words words
171 text here what
172 '''
173 get_matching_line_offsets(text, 'text', context=1)
174 6, {3: [(5, 9)], 6: [(0, 4)]]
175
176 """
177 matching_lines = {}
178 line_index = 0
179
180 if terms:
181 phrases = [normalize_text_for_matching(phrase)
182 for phrase in extract_phrases(terms)]
183
184 for line_index, line in enumerate(lines.splitlines(), start=1):
185 normalized_line = normalize_text_for_matching(line)
186 match_offsets = get_matching_phrase_offsets(normalized_line, phrases)
187 if match_offsets:
188 matching_lines[line_index] = match_offsets
189
190 else:
191 markers = markers or [HL_MARKER_RE]
192 for line_index, line in enumerate(lines.splitlines(), start=1):
193 match_offsets = get_matching_markers_offsets(line, markers=markers)
194 if match_offsets:
195 matching_lines[line_index] = match_offsets
196
197 return line_index, matching_lines
198
199
200 def lucene_query_parser():
201 # from pyparsing lucene_grammar
202 from pyparsing import (
203 Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress,
204 Optional, Group, infixNotation, opAssoc, ParserElement, pyparsing_common)
205
206 ParserElement.enablePackrat()
207
208 COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^")
209 LPAR, RPAR = map(Suppress, "()")
210 and_, or_, not_, to_ = map(CaselessKeyword, "AND OR NOT TO".split())
211 keyword = and_ | or_ | not_ | to_
212
213 expression = Forward()
214
215 valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word")
216 valid_word.setParseAction(
217 lambda t: t[0]
218 .replace('\\\\', chr(127))
219 .replace('\\', '')
220 .replace(chr(127), '\\')
221 )
222
223 string = QuotedString('"')
224
225 required_modifier = Literal("+")("required")
226 prohibit_modifier = Literal("-")("prohibit")
227 integer = Regex(r"\d+").setParseAction(lambda t: int(t[0]))
228 proximity_modifier = Group(TILDE + integer("proximity"))
229 number = pyparsing_common.fnumber()
230 fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")
231
232 term = Forward()
233 field_name = valid_word().setName("fieldname")
234 incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
235 excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
236 range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
237 boost = (CARAT + number("boost"))
238
239 string_expr = Group(string + proximity_modifier) | string
240 word_expr = Group(valid_word + fuzzy_modifier) | valid_word
241 term << (Optional(field_name("field") + COLON) +
242 (word_expr | string_expr | range_search | Group(
243 LPAR + expression + RPAR)) +
244 Optional(boost))
245 term.setParseAction(lambda t: [t] if 'field' in t or 'boost' in t else None)
246
247 expression << infixNotation(
248 term,
249 [
250 (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT),
251 ((not_ | '!').setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT),
252 ((and_ | '&&').setParseAction(lambda: "AND"), 2, opAssoc.LEFT),
253 (Optional(or_ | '||').setParseAction(lambda: "OR"), 2, opAssoc.LEFT),
254 ]
255 )
256
257 return expression
@@ -0,0 +1,100 b''
1 # -*- coding: utf-8 -*-
2
3 # Copyright (C) 2010-2018 RhodeCode GmbH
4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
21 import copy
22 import mock
23 import pytest
24
25 from rhodecode.lib.index import search_utils
26
27
28 @pytest.mark.parametrize('test_text, expected_output', [
29 ('some text', ['some', 'text']),
30 ('some text', ['some', 'text']),
31 ('some text "with a phrase"', ['some', 'text', 'with a phrase']),
32 ('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
33 ('"justphrase"', ['justphrase']),
34 ('""', []),
35 ('', []),
36 (' ', []),
37 ('" "', []),
38 ])
39 def test_extract_phrases(test_text, expected_output):
40 assert search_utils.extract_phrases(test_text) == expected_output
41
42
43 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
44 ('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
45 ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
46 ('irrelevant', ['not found'], []),
47 ('irrelevant', ['not found'], []),
48 ])
49 def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
50 assert search_utils.get_matching_phrase_offsets(
51 test_text, text_phrases) == expected_output
52
53
54 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
55 ('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
56 ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
57 ('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
58 ('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
59 ('irrelevant', ['not found'], []),
60 ('irrelevant', ['not found'], []),
61 ])
62 def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):
63
64 assert search_utils.get_matching_markers_offsets(test_text) == expected_output
65
66
67 def test_normalize_text_for_matching():
68 assert search_utils.normalize_text_for_matching(
69 'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
70
71
72 def test_get_matching_line_offsets():
73 words = '\n'.join([
74 'words words words',
75 'words words words',
76 'some text some',
77 'words words words',
78 'words words words',
79 'text here what'
80 ])
81 total_lines, matched_offsets = \
82 search_utils.get_matching_line_offsets(words, terms='text')
83 assert total_lines == 6
84 assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}
85
86
87 def test_get_matching_line_offsets_using_markers():
88 words = '\n'.join([
89 'words words words',
90 'words words words',
91 'some __1__text__2__ some',
92 'words words words',
93 'words words words',
94 '__1__text__2__ here what'
95 ])
96 total_lines, matched_offsets = \
97 search_utils.get_matching_line_offsets(words, terms=None,
98 markers=['__1__(.*?)__2__'])
99 assert total_lines == 6
100 assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}
@@ -407,30 +407,75 b' self: super: {'
407 407 };
408 408 };
409 409 "elasticsearch" = super.buildPythonPackage {
410 name = "elasticsearch-2.3.0";
410 name = "elasticsearch-6.3.1";
411 411 doCheck = false;
412 412 propagatedBuildInputs = [
413 413 self."urllib3"
414 414 ];
415 415 src = fetchurl {
416 url = "https://files.pythonhosted.org/packages/10/35/5fd52c5f0b0ee405ed4b5195e8bce44c5e041787680dc7b94b8071cac600/elasticsearch-2.3.0.tar.gz";
417 sha256 = "10ad2dk73xsys9vajwsncibs69asa63w1hgwz6lz1prjpyi80c5y";
416 url = "https://files.pythonhosted.org/packages/9d/ce/c4664e8380e379a9402ecfbaf158e56396da90d520daba21cfa840e0eb71/elasticsearch-6.3.1.tar.gz";
417 sha256 = "12y93v0yn7a4xmf969239g8gb3l4cdkclfpbk1qc8hx5qkymrnma";
418 418 };
419 419 meta = {
420 420 license = [ pkgs.lib.licenses.asl20 ];
421 421 };
422 422 };
423 423 "elasticsearch-dsl" = super.buildPythonPackage {
424 name = "elasticsearch-dsl-2.2.0";
424 name = "elasticsearch-dsl-6.3.1";
425 425 doCheck = false;
426 426 propagatedBuildInputs = [
427 427 self."six"
428 428 self."python-dateutil"
429 429 self."elasticsearch"
430 self."ipaddress"
431 ];
432 src = fetchurl {
433 url = "https://files.pythonhosted.org/packages/4c/0d/1549f50c591db6bb4e66cbcc8d34a6e537c3d89aa426b167c244fd46420a/elasticsearch-dsl-6.3.1.tar.gz";
434 sha256 = "1gh8a0shqi105k325hgwb9avrpdjh0mc6mxwfg9ba7g6lssb702z";
435 };
436 meta = {
437 license = [ pkgs.lib.licenses.asl20 ];
438 };
439 };
440 "elasticsearch1" = super.buildPythonPackage {
441 name = "elasticsearch1-1.10.0";
442 doCheck = false;
443 propagatedBuildInputs = [
444 self."urllib3"
430 445 ];
431 446 src = fetchurl {
432 url = "https://files.pythonhosted.org/packages/66/2f/52a086968788e58461641570f45c3207a52d46ebbe9b77dc22b6a8ffda66/elasticsearch-dsl-2.2.0.tar.gz";
433 sha256 = "1g4kxzxsdwlsl2a9kscmx11pafgimhj7y8wrfksv8pgvpkfb9fwr";
447 url = "https://files.pythonhosted.org/packages/a6/eb/73e75f9681fa71e3157b8ee878534235d57f24ee64f0e77f8d995fb57076/elasticsearch1-1.10.0.tar.gz";
448 sha256 = "0g89444kd5zwql4vbvyrmi2m6l6dcj6ga98j4hqxyyyz6z20aki2";
449 };
450 meta = {
451 license = [ pkgs.lib.licenses.asl20 ];
452 };
453 };
454 "elasticsearch1-dsl" = super.buildPythonPackage {
455 name = "elasticsearch1-dsl-0.0.12";
456 doCheck = false;
457 propagatedBuildInputs = [
458 self."six"
459 self."python-dateutil"
460 self."elasticsearch1"
461 ];
462 src = fetchurl {
463 url = "https://files.pythonhosted.org/packages/eb/9d/785342775cb10eddc9b8d7457d618a423b4f0b89d8b2b2d1bc27190d71db/elasticsearch1-dsl-0.0.12.tar.gz";
464 sha256 = "0ig1ly39v93hba0z975wnhbmzwj28w6w1sqlr2g7cn5spp732bhk";
465 };
466 meta = {
467 license = [ pkgs.lib.licenses.asl20 ];
468 };
469 };
470 "elasticsearch2" = super.buildPythonPackage {
471 name = "elasticsearch2-2.5.0";
472 doCheck = false;
473 propagatedBuildInputs = [
474 self."urllib3"
475 ];
476 src = fetchurl {
477 url = "https://files.pythonhosted.org/packages/84/77/63cf63d4ba11d913b5278406f2a37b0712bec6fc85edfb6151a33eaeba25/elasticsearch2-2.5.0.tar.gz";
478 sha256 = "0ky0q16lbvz022yv6q3pix7aamf026p1y994537ccjf0p0dxnbxr";
434 479 };
435 480 meta = {
436 481 license = [ pkgs.lib.licenses.asl20 ];
@@ -818,11 +863,11 b' self: super: {'
818 863 };
819 864 };
820 865 "markupsafe" = super.buildPythonPackage {
821 name = "markupsafe-1.0";
866 name = "markupsafe-1.1.0";
822 867 doCheck = false;
823 868 src = fetchurl {
824 url = "https://files.pythonhosted.org/packages/4d/de/32d741db316d8fdb7680822dd37001ef7a448255de9699ab4bfcbdf4172b/MarkupSafe-1.0.tar.gz";
825 sha256 = "0rdn1s8x9ni7ss8rfiacj7x1085lx8mh2zdwqslnw8xc3l4nkgm6";
869 url = "https://files.pythonhosted.org/packages/ac/7e/1b4c2e05809a4414ebce0892fe1e32c14ace86ca7d50c70f00979ca9b3a3/MarkupSafe-1.1.0.tar.gz";
870 sha256 = "1lxirjypbdd3l9jl4vliilhfnhy7c7f2vlldqg1b0i74khn375sf";
826 871 };
827 872 meta = {
828 873 license = [ pkgs.lib.licenses.bsdOriginal ];
@@ -1271,11 +1316,11 b' self: super: {'
1271 1316 };
1272 1317 };
1273 1318 "pyparsing" = super.buildPythonPackage {
1274 name = "pyparsing-1.5.7";
1319 name = "pyparsing-2.3.0";
1275 1320 doCheck = false;
1276 1321 src = fetchurl {
1277 url = "https://files.pythonhosted.org/packages/6f/2c/47457771c02a8ff0f302b695e094ec309e30452232bd79198ee94fda689f/pyparsing-1.5.7.tar.gz";
1278 sha256 = "17z7ws076z977sclj628fvwrp8y9j2rvdjcsq42v129n1gwi8vk4";
1322 url = "https://files.pythonhosted.org/packages/d0/09/3e6a5eeb6e04467b737d55f8bba15247ac0876f98fae659e58cd744430c6/pyparsing-2.3.0.tar.gz";
1323 sha256 = "14k5v7n3xqw8kzf42x06bzp184spnlkya2dpjyflax6l3yrallzk";
1279 1324 };
1280 1325 meta = {
1281 1326 license = [ pkgs.lib.licenses.mit ];
@@ -1642,7 +1687,7 b' self: super: {'
1642 1687 };
1643 1688 };
1644 1689 "rhodecode-enterprise-ce" = super.buildPythonPackage {
1645 name = "rhodecode-enterprise-ce-4.15.0";
1690 name = "rhodecode-enterprise-ce-4.16.0";
1646 1691 buildInputs = [
1647 1692 self."pytest"
1648 1693 self."py"
@@ -1788,7 +1833,7 b' self: super: {'
1788 1833 };
1789 1834 };
1790 1835 "rhodecode-tools" = super.buildPythonPackage {
1791 name = "rhodecode-tools-1.0.1";
1836 name = "rhodecode-tools-1.1.0";
1792 1837 doCheck = false;
1793 1838 propagatedBuildInputs = [
1794 1839 self."click"
@@ -1797,14 +1842,16 b' self: super: {'
1797 1842 self."mako"
1798 1843 self."markupsafe"
1799 1844 self."requests"
1800 self."elasticsearch"
1801 self."elasticsearch-dsl"
1802 1845 self."urllib3"
1803 1846 self."whoosh"
1847 self."elasticsearch"
1848 self."elasticsearch-dsl"
1849 self."elasticsearch2"
1850 self."elasticsearch1-dsl"
1804 1851 ];
1805 1852 src = fetchurl {
1806 url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b";
1807 sha256 = "0nr300s4sg685qs4wgbwlplwriawrwi6jq79z37frcnpyc89gpvm";
1853 url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626";
1854 sha256 = "1wbnnfrzyp0d4ys55vj5vnfrzfhwlqgdhc8yv8i6kwinizf8hfrn";
1808 1855 };
1809 1856 meta = {
1810 1857 license = [ { fullName = "Apache 2.0 and Proprietary"; } ];
@@ -1848,11 +1895,11 b' self: super: {'
1848 1895 };
1849 1896 };
1850 1897 "setuptools" = super.buildPythonPackage {
1851 name = "setuptools-40.6.2";
1898 name = "setuptools-40.6.3";
1852 1899 doCheck = false;
1853 1900 src = fetchurl {
1854 url = "https://files.pythonhosted.org/packages/b0/d1/8acb42f391cba52e35b131e442e80deffbb8d0676b93261d761b1f0ef8fb/setuptools-40.6.2.zip";
1855 sha256 = "0r2c5hapirlzm34h7pl1lgkm6gk7bcrlrdj28qgsvaqg3f74vfw6";
1901 url = "https://files.pythonhosted.org/packages/37/1b/b25507861991beeade31473868463dad0e58b1978c209de27384ae541b0b/setuptools-40.6.3.zip";
1902 sha256 = "1y085dnk574sxw9aymdng9gijvrsbw86hsv9hqnhv7y4d6nlsirv";
1856 1903 };
1857 1904 meta = {
1858 1905 license = [ pkgs.lib.licenses.mit ];
@@ -2043,11 +2090,11 b' self: super: {'
2043 2090 };
2044 2091 };
2045 2092 "urllib3" = super.buildPythonPackage {
2046 name = "urllib3-1.21";
2093 name = "urllib3-1.24.1";
2047 2094 doCheck = false;
2048 2095 src = fetchurl {
2049 url = "https://files.pythonhosted.org/packages/34/95/7b28259d0006ed681c424cd71a668363265eac92b67dddd018eb9a22bff8/urllib3-1.21.tar.gz";
2050 sha256 = "0irnj4wvh2y36s4q3l2vas9qr9m766w6w418nb490j3mf8a8zw6h";
2096 url = "https://files.pythonhosted.org/packages/b1/53/37d82ab391393565f2f831b8eedbffd57db5a718216f82f1a8b4d381a1c1/urllib3-1.24.1.tar.gz";
2097 sha256 = "08lwd9f3hqznyf32vnzwvp87pchx062nkbgyrf67rwlkgj0jk5fy";
2051 2098 };
2052 2099 meta = {
2053 2100 license = [ pkgs.lib.licenses.mit ];
@@ -36,7 +36,7 b' kombu==4.2.0'
36 36 lxml==4.2.5
37 37 mako==1.0.7
38 38 markdown==2.6.11
39 markupsafe==1.0.0
39 markupsafe==1.1.0
40 40 msgpack-python==0.5.6
41 41 pyotp==2.2.7
42 42 packaging==15.2
@@ -51,7 +51,7 b' pycrypto==2.6.1'
51 51 pycurl==7.43.0.2
52 52 pyflakes==0.8.1
53 53 pygments==2.3.0
54 pyparsing==1.5.7
54 pyparsing==2.3.0
55 55 pyramid-beaker==0.8
56 56 pyramid-debugtoolbar==4.4.0
57 57 pyramid-jinja2==2.7
@@ -79,7 +79,7 b' subprocess32==3.5.2'
79 79 supervisor==3.3.4
80 80 tempita==0.5.2
81 81 translationstring==1.3
82 urllib3==1.21
82 urllib3==1.24.1
83 83 urlobject==2.4.3
84 84 venusian==1.1.0
85 85 weberror==0.10.3
@@ -123,7 +123,7 b' ipdb==0.11.0'
123 123 ipython==5.1.0
124 124
125 125 ## rhodecode-tools, special case
126 https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b#egg=rhodecode-tools==1.0.1
126 https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626#egg=rhodecode-tools==1.1.0
127 127
128 128 ## appenlight
129 129 appenlight-client==0.6.26
@@ -666,8 +666,8 b' class AdminSettingsView(BaseAppView):'
666 666 c = self.load_default_context()
667 667 c.active = 'search'
668 668
669 searcher = searcher_from_config(self.request.registry.settings)
670 c.statistics = searcher.statistics(self.request.translate)
669 c.searcher = searcher_from_config(self.request.registry.settings)
670 c.statistics = c.searcher.statistics(self.request.translate)
671 671
672 672 return self._get_template_context(c)
673 673
@@ -246,9 +246,9 b' class HomeView(BaseAppView):'
246 246 }
247 247 for obj in acl_iter]
248 248
249 def _get_hash_commit_list(self, auth_user, query):
249 def _get_hash_commit_list(self, auth_user, searcher, query):
250 250 org_query = query
251 if not query or len(query) < 3:
251 if not query or len(query) < 3 or not searcher:
252 252 return []
253 253
254 254 commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)
@@ -257,9 +257,8 b' class HomeView(BaseAppView):'
257 257 return []
258 258 commit_hash = commit_hashes[0]
259 259
260 searcher = searcher_from_config(self.request.registry.settings)
261 260 result = searcher.search(
262 'commit_id:%s*' % commit_hash, 'commit', auth_user,
261 'commit_id:{}*'.format(commit_hash), 'commit', auth_user,
263 262 raise_on_exc=False)
264 263
265 264 return [
@@ -303,6 +302,84 b' class HomeView(BaseAppView):'
303 302 }
304 303 return data
305 304
305 def _get_default_search_queries(self, search_context, searcher, query):
306 if not searcher:
307 return []
308 is_es_6 = searcher.is_es_6
309
310 queries = []
311 repo_group_name, repo_name, repo_context = None, None, None
312
313 # repo group context
314 if search_context.get('search_context[repo_group_name]'):
315 repo_group_name = search_context.get('search_context[repo_group_name]')
316 if search_context.get('search_context[repo_name]'):
317 repo_name = search_context.get('search_context[repo_name]')
318 repo_context = search_context.get('search_context[repo_view_type]')
319
320 if is_es_6 and repo_name:
321 def query_modifier():
322 qry = '{} repo_name.raw:{} '.format(
323 query, searcher.escape_specials(repo_name))
324 return {'q': qry, 'type': 'content'}
325 label = u'Search for `{}` through files in this repository.'.format(query)
326 queries.append(
327 {
328 'id': -10,
329 'value': query,
330 'value_display': label,
331 'type': 'search',
332 'url': h.route_path(
333 'search_repo', repo_name=repo_name, _query=query_modifier())
334 }
335 )
336
337 def query_modifier():
338 qry = '{} repo_name.raw:{} '.format(
339 query, searcher.escape_specials(repo_name))
340 return {'q': qry, 'type': 'commit'}
341 label = u'Search for `{}` through commits in this repository.'.format(query)
342 queries.append(
343 {
344 'id': -10,
345 'value': query,
346 'value_display': label,
347 'type': 'search',
348 'url': h.route_path(
349 'search_repo', repo_name=repo_name, _query=query_modifier())
350 }
351 )
352
353 elif is_es_6 and repo_group_name:
354 def query_modifier():
355 qry = '{} repo_name.raw:{} '.format(
356 query, searcher.escape_specials(repo_group_name + '/*'))
357 return {'q': qry, 'type': 'content'}
358 label = u'Search for `{}` through files in this repository group'.format(query)
359 queries.append(
360 {
361 'id': -20,
362 'value': query,
363 'value_display': label,
364 'type': 'search',
365 'url': h.route_path('search', _query=query_modifier())
366 }
367 )
368
369 if not queries:
370 queries.append(
371 {
372 'id': -1,
373 'value': query,
374 'value_display': u'Search for: `{}`'.format(query),
375 'type': 'search',
376 'url': h.route_path('search',
377 _query={'q': query, 'type': 'content'})
378 }
379 )
380
381 return queries
382
306 383 @LoginRequired()
307 384 @view_config(
308 385 route_name='goto_switcher_data', request_method='GET',
@@ -315,26 +392,21 b' class HomeView(BaseAppView):'
315 392 query = self.request.GET.get('query')
316 393 log.debug('generating main filter data, query %s', query)
317 394
318 default_search_val = u'Full text search for: `{}`'.format(query)
319 395 res = []
320 396 if not query:
321 397 return {'suggestions': res}
322 398
323 res.append({
324 'id': -1,
325 'value': query,
326 'value_display': default_search_val,
327 'type': 'search',
328 'url': h.route_path(
329 'search', _query={'q': query})
330 })
331 repo_group_id = safe_int(self.request.GET.get('repo_group_id'))
399 searcher = searcher_from_config(self.request.registry.settings)
400 for _q in self._get_default_search_queries(self.request.GET, searcher, query):
401 res.append(_q)
402
403 repo_group_id = safe_int(self.request.GET.get('search_context[repo_group_id]'))
332 404 if repo_group_id:
333 405 repo_group = RepoGroup.get(repo_group_id)
334 406 composed_hint = '{}/{}'.format(repo_group.group_name, query)
335 407 show_hint = not query.startswith(repo_group.group_name)
336 408 if repo_group and show_hint:
337 hint = u'Group search: `{}`'.format(composed_hint)
409 hint = u'Repository search inside: `{}`'.format(composed_hint)
338 410 res.append({
339 411 'id': -1,
340 412 'value': composed_hint,
@@ -351,7 +423,7 b' class HomeView(BaseAppView):'
351 423 for serialized_repo in repos:
352 424 res.append(serialized_repo)
353 425
354 # TODO(marcink): permissions for that ?
426 # TODO(marcink): should all logged in users be allowed to search others?
355 427 allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER
356 428 if allowed_user_search:
357 429 users = self._get_user_list(query)
@@ -362,7 +434,7 b' class HomeView(BaseAppView):'
362 434 for serialized_user_group in user_groups:
363 435 res.append(serialized_user_group)
364 436
365 commits = self._get_hash_commit_list(c.auth_user, query)
437 commits = self._get_hash_commit_list(c.auth_user, searcher, query)
366 438 if commits:
367 439 unique_repos = collections.OrderedDict()
368 440 for commit in commits:
@@ -45,11 +45,14 b' def search(request, tmpl_context, repo_n'
45 45 errors = []
46 46 try:
47 47 search_params = schema.deserialize(
48 dict(search_query=request.GET.get('q'),
48 dict(
49 search_query=request.GET.get('q'),
49 50 search_type=request.GET.get('type'),
50 51 search_sort=request.GET.get('sort'),
52 search_max_lines=request.GET.get('max_lines'),
51 53 page_limit=request.GET.get('page_limit'),
52 requested_page=request.GET.get('page'))
54 requested_page=request.GET.get('page'),
55 )
53 56 )
54 57 except validation_schema.Invalid as e:
55 58 errors = e.children
@@ -57,12 +60,13 b' def search(request, tmpl_context, repo_n'
57 60 def url_generator(**kw):
58 61 q = urllib.quote(safe_str(search_query))
59 62 return update_params(
60 "?q=%s&type=%s" % (q, safe_str(search_type)), **kw)
63 "?q=%s&type=%s&max_lines=%s" % (q, safe_str(search_type), search_max_lines), **kw)
61 64
62 65 c = tmpl_context
63 66 search_query = search_params.get('search_query')
64 67 search_type = search_params.get('search_type')
65 68 search_sort = search_params.get('search_sort')
69 search_max_lines = search_params.get('search_max_lines')
66 70 if search_params.get('search_query'):
67 71 page_limit = search_params['page_limit']
68 72 requested_page = search_params['requested_page']
@@ -48,7 +48,6 b' import bleach'
48 48 from datetime import datetime
49 49 from functools import partial
50 50 from pygments.formatters.html import HtmlFormatter
51 from pygments import highlight as code_highlight
52 51 from pygments.lexers import (
53 52 get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)
54 53
@@ -81,12 +80,14 b' from rhodecode.lib.utils2 import str2boo'
81 80 from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links
82 81 from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError
83 82 from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit
83 from rhodecode.lib.index.search_utils import get_matching_line_offsets
84 84 from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT
85 85 from rhodecode.model.changeset_status import ChangesetStatusModel
86 86 from rhodecode.model.db import Permission, User, Repository
87 87 from rhodecode.model.repo_group import RepoGroupModel
88 88 from rhodecode.model.settings import IssueTrackerSettingsModel
89 89
90
90 91 log = logging.getLogger(__name__)
91 92
92 93
@@ -260,6 +261,21 b' def files_breadcrumbs(repo_name, commit_'
260 261 return literal('/'.join(url_segments))
261 262
262 263
264 def code_highlight(code, lexer, formatter, use_hl_filter=False):
265 """
266 Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
267
268 If ``outfile`` is given and a valid file object (an object
269 with a ``write`` method), the result will be written to it, otherwise
270 it is returned as a string.
271 """
272 if use_hl_filter:
273 # add HL filter
274 from rhodecode.lib.index import search_utils
275 lexer.add_filter(search_utils.ElasticSearchHLFilter())
276 return pygments.format(pygments.lex(code, lexer), formatter)
277
278
263 279 class CodeHtmlFormatter(HtmlFormatter):
264 280 """
265 281 My code Html Formatter for source codes
@@ -386,110 +402,9 b' class SearchContentCodeHtmlFormatter(Cod'
386 402
387 403 current_line_number += 1
388 404
389
390 405 yield 0, '</table>'
391 406
392 407
393 def extract_phrases(text_query):
394 """
395 Extracts phrases from search term string making sure phrases
396 contained in double quotes are kept together - and discarding empty values
397 or fully whitespace values eg.
398
399 'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
400
401 """
402
403 in_phrase = False
404 buf = ''
405 phrases = []
406 for char in text_query:
407 if in_phrase:
408 if char == '"': # end phrase
409 phrases.append(buf)
410 buf = ''
411 in_phrase = False
412 continue
413 else:
414 buf += char
415 continue
416 else:
417 if char == '"': # start phrase
418 in_phrase = True
419 phrases.append(buf)
420 buf = ''
421 continue
422 elif char == ' ':
423 phrases.append(buf)
424 buf = ''
425 continue
426 else:
427 buf += char
428
429 phrases.append(buf)
430 phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
431 return phrases
432
433
434 def get_matching_offsets(text, phrases):
435 """
436 Returns a list of string offsets in `text` that the list of `terms` match
437
438 >>> get_matching_offsets('some text here', ['some', 'here'])
439 [(0, 4), (10, 14)]
440
441 """
442 offsets = []
443 for phrase in phrases:
444 for match in re.finditer(phrase, text):
445 offsets.append((match.start(), match.end()))
446
447 return offsets
448
449
450 def normalize_text_for_matching(x):
451 """
452 Replaces all non alnum characters to spaces and lower cases the string,
453 useful for comparing two text strings without punctuation
454 """
455 return re.sub(r'[^\w]', ' ', x.lower())
456
457
458 def get_matching_line_offsets(lines, terms):
459 """ Return a set of `lines` indices (starting from 1) matching a
460 text search query, along with `context` lines above/below matching lines
461
462 :param lines: list of strings representing lines
463 :param terms: search term string to match in lines eg. 'some text'
464 :param context: number of lines above/below a matching line to add to result
465 :param max_lines: cut off for lines of interest
466 eg.
467
468 text = '''
469 words words words
470 words words words
471 some text some
472 words words words
473 words words words
474 text here what
475 '''
476 get_matching_line_offsets(text, 'text', context=1)
477 {3: [(5, 9)], 6: [(0, 4)]]
478
479 """
480 matching_lines = {}
481 phrases = [normalize_text_for_matching(phrase)
482 for phrase in extract_phrases(terms)]
483
484 for line_index, line in enumerate(lines, start=1):
485 match_offsets = get_matching_offsets(
486 normalize_text_for_matching(line), phrases)
487 if match_offsets:
488 matching_lines[line_index] = match_offsets
489
490 return matching_lines
491
492
493 408 def hsv_to_rgb(h, s, v):
494 409 """ Convert hsv color values to rgb """
495 410
@@ -1904,25 +1819,6 b' def journal_filter_help(request):'
1904 1819 ).format(actions=actions)
1905 1820
1906 1821
1907 def search_filter_help(searcher, request):
1908 _ = request.translate
1909
1910 terms = ''
1911 return _(
1912 'Example filter terms for `{searcher}` search:\n' +
1913 '{terms}\n' +
1914 'Generate wildcards using \'*\' character:\n' +
1915 ' "repo_name:vcs*" - search everything starting with \'vcs\'\n' +
1916 ' "repo_name:*vcs*" - search for repository containing \'vcs\'\n' +
1917 '\n' +
1918 'Optional AND / OR operators in queries\n' +
1919 ' "repo_name:vcs OR repo_name:test"\n' +
1920 ' "owner:test AND repo_name:test*"\n' +
1921 'More: {search_doc}'
1922 ).format(searcher=searcher.name,
1923 terms=terms, search_doc=searcher.query_lang_doc)
1924
1925
1926 1822 def not_mapped_error(repo_name):
1927 1823 from rhodecode.translation import _
1928 1824 flash(_('%s repository is not mapped to db perhaps'
@@ -2107,3 +2003,15 b' def go_import_header(request, db_repo=No'
2107 2003 def reviewer_as_json(*args, **kwargs):
2108 2004 from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json
2109 2005 return _reviewer_as_json(*args, **kwargs)
2006
2007
2008 def get_repo_view_type(request):
2009 route_name = request.matched_route.name
2010 route_to_view_type = {
2011 'repo_changelog': 'changelog',
2012 'repo_files': 'files',
2013 'repo_summary': 'summary',
2014 'repo_commit': 'commit'
2015
2016 }
2017 return route_to_view_type.get(route_name)
@@ -25,15 +25,27 b' Index schema for RhodeCode'
25 25 import importlib
26 26 import logging
27 27
28 from rhodecode.lib.index.search_utils import normalize_text_for_matching
29
28 30 log = logging.getLogger(__name__)
29 31
30 32 # leave defaults for backward compat
31 33 default_searcher = 'rhodecode.lib.index.whoosh'
32 34 default_location = '%(here)s/data/index'
33 35
36 ES_VERSION_2 = '2'
37 ES_VERSION_6 = '6'
38 # for legacy reasons we keep 2 compat as default
39 DEFAULT_ES_VERSION = ES_VERSION_2
34 40
35 class BaseSearch(object):
41 from rhodecode_tools.lib.fts_index.elasticsearch_engine_6 import \
42 ES_CONFIG # pragma: no cover
43
44
45 class BaseSearcher(object):
36 46 query_lang_doc = ''
47 es_version = None
48 name = None
37 49
38 50 def __init__(self):
39 51 pass
@@ -45,15 +57,42 b' class BaseSearch(object):'
45 57 raise_on_exc=True):
46 58 raise Exception('NotImplemented')
47 59
60 @staticmethod
61 def query_to_mark(query, default_field=None):
62 """
63 Formats the query to mark token for jquery.mark.js highlighting. ES could
64 have a different format optionally.
48 65
49 def searcher_from_config(config, prefix='search.'):
66 :param default_field:
67 :param query:
68 """
69 return ' '.join(normalize_text_for_matching(query).split())
70
71 @property
72 def is_es_6(self):
73 return self.es_version == ES_VERSION_6
74
75 def get_handlers(self):
76 return {}
77
78
79 def search_config(config, prefix='search.'):
50 80 _config = {}
51 81 for key in config.keys():
52 82 if key.startswith(prefix):
53 83 _config[key[len(prefix):]] = config[key]
84 return _config
85
86
87 def searcher_from_config(config, prefix='search.'):
88 _config = search_config(config, prefix)
54 89
55 90 if 'location' not in _config:
56 91 _config['location'] = default_location
92 if 'es_version' not in _config:
93 # use old legacy ES version set to 2
94 _config['es_version'] = '2'
95
57 96 imported = importlib.import_module(_config.get('module', default_searcher))
58 searcher = imported.Search(config=_config)
97 searcher = imported.Searcher(config=_config)
59 98 return searcher
@@ -33,7 +33,7 b' from whoosh.index import create_in, open'
33 33 from whoosh.qparser import QueryParser, QueryParserError
34 34
35 35 import rhodecode.lib.helpers as h
36 from rhodecode.lib.index import BaseSearch
36 from rhodecode.lib.index import BaseSearcher
37 37 from rhodecode.lib.utils2 import safe_unicode
38 38
39 39 log = logging.getLogger(__name__)
@@ -59,13 +59,13 b' FRAGMENTER = ContextFragmenter(200)'
59 59 log = logging.getLogger(__name__)
60 60
61 61
62 class Search(BaseSearch):
62 class WhooshSearcher(BaseSearcher):
63 63 # this also shows in UI
64 64 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
65 65 name = 'whoosh'
66 66
67 67 def __init__(self, config):
68 super(Search, self).__init__()
68 super(Searcher, self).__init__()
69 69 self.config = config
70 70 if not os.path.isdir(self.config['location']):
71 71 os.makedirs(self.config['location'])
@@ -162,16 +162,17 b' class Search(BaseSearch):'
162 162 _ = translator
163 163 stats = [
164 164 {'key': _('Index Type'), 'value': 'Whoosh'},
165 {'sep': True},
166
165 167 {'key': _('File Index'), 'value': str(self.file_index)},
166 {'key': _('Indexed documents'),
167 'value': self.file_index.doc_count()},
168 {'key': _('Last update'),
169 'value': h.time_to_datetime(self.file_index.last_modified())},
168 {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
169 {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
170
171 {'sep': True},
172
170 173 {'key': _('Commit index'), 'value': str(self.commit_index)},
171 {'key': _('Indexed documents'),
172 'value': str(self.commit_index.doc_count())},
173 {'key': _('Last update'),
174 'value': h.time_to_datetime(self.commit_index.last_modified())}
174 {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
175 {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
175 176 ]
176 177 return stats
177 178
@@ -227,6 +228,9 b' class Search(BaseSearch):'
227 228 return self.searcher
228 229
229 230
231 Searcher = WhooshSearcher
232
233
230 234 class WhooshResultWrapper(object):
231 235 def __init__(self, search_type, total_hits, results):
232 236 self.search_type = search_type
@@ -263,6 +267,8 b' class WhooshResultWrapper(object):'
263 267 # TODO: marcink: this feels like an overkill, there's a lot of data
264 268 # inside hit object, and we don't need all
265 269 res = dict(hit)
270 # elastic search uses that, we set it empty so it fallbacks to regular HL logic
271 res['content_highlight'] = ''
266 272
267 273 f_path = '' # pragma: no cover
268 274 if self.search_type in ['content', 'path']:
@@ -1009,3 +1009,14 b' def glob2re(pat):'
1009 1009 else:
1010 1010 res = res + re.escape(c)
1011 1011 return res + '\Z(?ms)'
1012
1013
1014 def parse_byte_string(size_str):
1015 match = re.match(r'(\d+)(MB|KB)', size_str, re.IGNORECASE)
1016 if not match:
1017 raise ValueError('Given size:%s is invalid, please make sure '
1018 'to use format of <num>(MB|KB)' % size_str)
1019
1020 _parts = match.groups()
1021 num, type_ = _parts
1022 return long(num) * {'mb': 1024*1024, 'kb': 1024}[type_.lower()]
@@ -58,7 +58,7 b' def author_name(author):'
58 58 to get the username
59 59 """
60 60
61 if not author or not '@' in author:
61 if not author or '@' not in author:
62 62 return author
63 63 else:
64 64 return author.replace(author_email(author), '').replace('<', '')\
@@ -34,6 +34,9 b' class SearchParamsSchema(colander.Mappin'
34 34 colander.String(),
35 35 missing='newfirst',
36 36 validator=colander.OneOf(['oldfirst', 'newfirst']))
37 search_max_lines = colander.SchemaNode(
38 colander.Integer(),
39 missing=10)
37 40 page_limit = colander.SchemaNode(
38 41 colander.Integer(),
39 42 missing=10,
@@ -572,6 +572,7 b' div.annotatediv { margin-left: 2px; marg'
572 572 .code-highlight, /* TODO: dan: merge codehilite into code-highlight */
573 573 /* This can be generated with `pygmentize -S default -f html` */
574 574 .codehilite {
575 .c-ElasticMatch { background-color: #faffa6; padding: 0.2em;}
575 576 .hll { background-color: #ffffcc }
576 577 .c { color: #408080; font-style: italic } /* Comment */
577 578 .err, .codehilite .err { border: none } /* Error */
@@ -640,6 +641,7 b' div.annotatediv { margin-left: 2px; marg'
640 641 .vi { color: #19177C } /* Name.Variable.Instance */
641 642 .vm { color: #19177C } /* Name.Variable.Magic */
642 643 .il { color: #666666 } /* Literal.Number.Integer.Long */
644
643 645 }
644 646
645 647 /* customized pre blocks for markdown/rst */
@@ -166,7 +166,6 b' small,'
166 166
167 167 mark,
168 168 .mark {
169 background-color: @rclightblue;
170 169 padding: .2em;
171 170 }
172 171
@@ -5,8 +5,13 b''
5 5 <div class="panel-body">
6 6 <dl class="dl-horizontal">
7 7 % for stat in c.statistics:
8 % if stat.get('sep'):
9 <dt></dt>
10 <dd>--</dd>
11 % else:
8 12 <dt>${stat['key']}</dt>
9 13 <dd>${stat['value']}</dd>
14 % endif
10 15 % endfor
11 16 </dl>
12 17 </div>
@@ -7,9 +7,12 b" go_import_header = ''"
7 7 if hasattr(c, 'rhodecode_db_repo'):
8 8 c.template_context['repo_type'] = c.rhodecode_db_repo.repo_type
9 9 c.template_context['repo_landing_commit'] = c.rhodecode_db_repo.landing_rev[1]
10 ## check repo context
11 c.template_context['repo_view_type'] = h.get_repo_view_type(request)
10 12
11 13 if getattr(c, 'repo_group', None):
12 14 c.template_context['repo_group_id'] = c.repo_group.group_id
15 c.template_context['repo_group_name'] = c.repo_group.group_name
13 16
14 17 if getattr(c, 'rhodecode_user', None) and c.rhodecode_user.user_id:
15 18 c.template_context['rhodecode_user']['username'] = c.rhodecode_user.username
@@ -23,6 +26,12 b" c.template_context['default_user'] = {"
23 26 'username': h.DEFAULT_USER,
24 27 'user_id': 1
25 28 }
29 c.template_context['search_context'] = {
30 'repo_group_id': c.template_context.get('repo_group_id'),
31 'repo_group_name': c.template_context.get('repo_group_name'),
32 'repo_name': c.template_context.get('repo_name'),
33 'repo_view_type': c.template_context.get('repo_view_type'),
34 }
26 35
27 36 %>
28 37 <html xmlns="http://www.w3.org/1999/xhtml">
@@ -18,10 +18,7 b''
18 18 %else:
19 19 ${_('Search inside all accessible repositories')}
20 20 %endif
21 %if c.cur_query:
22 &raquo;
23 ${c.cur_query}
24 %endif
21
25 22 </%def>
26 23
27 24 <%def name="menu_bar_nav()">
@@ -59,7 +56,8 b''
59 56 <div class="fields">
60 57 ${h.text('q', c.cur_query, placeholder="Enter query...")}
61 58
62 ${h.select('type',c.search_type,[('content',_('File contents')), ('commit',_('Commit messages')), ('path',_('File names')),],id='id_search_type')}
59 ${h.select('type',c.search_type,[('content',_('Files')), ('path',_('File path')),('commit',_('Commits'))],id='id_search_type')}
60 ${h.hidden('max_lines', '10')}
63 61 <input type="submit" value="${_('Search')}" class="btn"/>
64 62 <br/>
65 63
@@ -72,8 +70,54 b''
72 70 </span>
73 71 % endfor
74 72 <div class="field">
75 <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Example Queries')}</p>
76 <pre id="search-help" style="display: none">${h.tooltip(h.search_filter_help(c.searcher, request))}</pre>
73 <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Query Langague examples')}</p>
74 <pre id="search-help" style="display: none">\
75
76 % if c.searcher.name == 'whoosh':
77 Example filter terms for `Whoosh` search:
78 query lang: <a href="${c.searcher.query_lang_doc}">Whoosh Query Language</a>
79 Whoosh has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
80
81 Generate wildcards using '*' character:
82 "repo_name:vcs*" - search everything starting with 'vcs'
83 "repo_name:*vcs*" - search for repository containing 'vcs'
84
85 Optional AND / OR operators in queries
86 "repo_name:vcs OR repo_name:test"
87 "owner:test AND repo_name:test*" AND extension:py
88
89 Move advanced search is available via ElasticSearch6 backend in EE edition.
90 % elif c.searcher.name == 'elasticsearch' and c.searcher.es_version == '2':
91 Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
92 ElasticSearch-2 has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
93
94 search type: content (File Content)
95 indexed fields: content
96
97 # search for `fix` string in all files
98 fix
99
100 search type: commit (Commit message)
101 indexed fields: message
102
103 search type: path (File name)
104 indexed fields: path
105
106 % else:
107 Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
108 query lang: <a href="${c.searcher.query_lang_doc}">ES 6 Query Language</a>
109 The reserved characters needed espace by `\`: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
110 % for handler in c.searcher.get_handlers().values():
111
112 search type: ${handler.search_type_label}
113 *indexed fields*: ${', '.join( [('\n ' if x[0]%4==0 else '')+x[1] for x in enumerate(handler.es_6_field_names)])}
114 % for entry in handler.es_6_example_queries:
115 ${entry.rstrip()}
116 % endfor
117 % endfor
118
119 % endif
120 </pre>
77 121 </div>
78 122
79 123 <div class="field">${c.runtime}</div>
@@ -96,6 +140,7 b''
96 140 </div>
97 141 <script>
98 142 $(document).ready(function(){
143 $('#q').autoGrowInput();
99 144 $("#id_search_type").select2({
100 145 'containerCssClass': "drop-menu",
101 146 'dropdownCssClass': "drop-menu-dropdown",
@@ -1,5 +1,7 b''
1 1 <%namespace name="base" file="/base/base.mako"/>
2 2
3 % if c.formatted_results:
4
3 5 <table class="rctable search-results">
4 6 <tr>
5 7 <th>${_('Repository')}</th>
@@ -50,14 +52,20 b''
50 52 </td>
51 53
52 54 <td class="td-user author">
53 ${base.gravatar_with_user(entry['author'])}
55 <%
56 ## es6 stores this as object
57 author = entry['author']
58 if isinstance(author, dict):
59 author = author['email']
60 %>
61 ${base.gravatar_with_user(author)}
54 62 </td>
55 63 </tr>
56 64 % endif
57 65 %endfor
58 66 </table>
59 67
60 %if c.cur_query and c.formatted_results:
68 %if c.cur_query:
61 69 <div class="pagination-wh pagination-left">
62 70 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
63 71 </div>
@@ -79,4 +87,16 b''
79 87 target_expand.addClass('open');
80 88 }
81 89 });
90
91 $(".message.td-description").mark(
92 "${c.searcher.query_to_mark(c.cur_query, 'message')}",
93 {
94 "className": 'match',
95 "accuracy": "complementary",
96 "ignorePunctuation": ":._(){}[]!'+=".split("")
97 }
98 );
99
82 100 </script>
101
102 % endif
@@ -1,33 +1,10 b''
1 <%def name="highlight_text_file(terms, text, url, line_context=3,
2 max_lines=10,
3 mimetype=None, filepath=None)">
4 <%
5 lines = text.split('\n')
6 lines_of_interest = set()
7 matching_lines = h.get_matching_line_offsets(lines, terms)
8 shown_matching_lines = 0
9 1
10 for line_number in matching_lines:
11 if len(lines_of_interest) < max_lines:
12 lines_of_interest |= set(range(
13 max(line_number - line_context, 0),
14 min(line_number + line_context, len(lines) + 1)))
15 shown_matching_lines += 1
16
17 %>
18 ${h.code_highlight(
19 text,
20 h.get_lexer_safe(
21 mimetype=mimetype,
22 filepath=filepath,
23 ),
24 h.SearchContentCodeHtmlFormatter(
25 linenos=True,
26 cssclass="code-highlight",
27 url=url,
28 query_terms=terms,
29 only_line_numbers=lines_of_interest
30 ))|n}
2 <%def name="highlight_text_file(has_matched_content, file_content, lexer, html_formatter, matching_lines, shown_matching_lines, url, use_hl_filter)">
3 % if has_matched_content:
4 ${h.code_highlight(file_content, lexer, html_formatter, use_hl_filter=use_hl_filter)|n}
5 % else:
6 ${_('No content matched')} <br/>
7 % endif
31 8
32 9 %if len(matching_lines) > shown_matching_lines:
33 10 <a href="${url}">
@@ -37,12 +14,52 b' for line_number in matching_lines:'
37 14 </%def>
38 15
39 16 <div class="search-results">
17 <% query_mark = c.searcher.query_to_mark(c.cur_query, 'content') %>
18
40 19 %for entry in c.formatted_results:
20
21 <%
22 file_content = entry['content_highlight'] or entry['content']
23 mimetype = entry.get('mimetype')
24 filepath = entry.get('path')
25 max_lines = h.safe_int(request.GET.get('max_lines', '10'))
26 line_context = h.safe_int(request.GET.get('line_contenxt', '3'))
27
28 match_file_url=h.route_path('repo_files',repo_name=entry['repository'], commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path'], _query={"mark": query_mark})
29 terms = c.cur_query
30
31 if c.searcher.is_es_6:
32 # use empty terms so we default to markers usage
33 total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms=None)
34 else:
35 total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms)
36
37 shown_matching_lines = 0
38 lines_of_interest = set()
39 for line_number in matching_lines:
40 if len(lines_of_interest) < max_lines:
41 lines_of_interest |= set(range(
42 max(line_number - line_context, 0),
43 min(line_number + line_context, total_lines + 1)))
44 shown_matching_lines += 1
45 lexer = h.get_lexer_safe(mimetype=mimetype, filepath=filepath)
46
47 html_formatter = h.SearchContentCodeHtmlFormatter(
48 linenos=True,
49 cssclass="code-highlight",
50 url=match_file_url,
51 query_terms=terms,
52 only_line_numbers=lines_of_interest
53 )
54
55 has_matched_content = len(lines_of_interest) >= 1
56
57 %>
41 58 ## search results are additionally filtered, and this check is just a safe gate
42 59 % if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):
43 60 <div id="codeblock" class="codeblock">
44 61 <div class="codeblock-header">
45 <h2>
62 <h1>
46 63 %if h.get_repo_type_by_name(entry.get('repository')) == 'hg':
47 64 <i class="icon-hg"></i>
48 65 %elif h.get_repo_type_by_name(entry.get('repository')) == 'git':
@@ -51,18 +68,39 b' for line_number in matching_lines:'
51 68 <i class="icon-svn"></i>
52 69 %endif
53 70 ${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}
54 </h2>
71 </h1>
72
55 73 <div class="stats">
74 <span class="stats-filename">
75 <strong>
76 <i class="icon-file-text"></i>
56 77 ${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}
78 </strong>
79 </span>
80 <span class="item last"><i class="tooltip icon-clipboard clipboard-action" data-clipboard-text="${entry['f_path']}" title="${_('Copy the full path')}"></i></span>
81 <br/>
82 <span class="stats-first-item">
83 ${len(matching_lines)} ${_ungettext('search match', 'search matches', len(matching_lines))}
84 </span>
85
86 <span >
57 87 %if entry.get('lines'):
58 88 | ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}
59 89 %endif
90 </span>
91
92 <span>
60 93 %if entry.get('size'):
61 94 | ${h.format_byte_size_binary(entry['size'])}
62 95 %endif
96 </span>
97
98 <span>
63 99 %if entry.get('mimetype'):
64 100 | ${entry.get('mimetype', "unknown mimetype")}
65 101 %endif
102 </span>
103
66 104 </div>
67 105 <div class="buttons">
68 106 <a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">
@@ -74,10 +112,19 b' for line_number in matching_lines:'
74 112 </div>
75 113 </div>
76 114 <div class="code-body search-code-body">
77 ${highlight_text_file(c.cur_query, entry['content'],
78 url=h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']),
79 mimetype=entry.get('mimetype'), filepath=entry.get('path'))}
115
116 ${highlight_text_file(
117 has_matched_content=has_matched_content,
118 file_content=file_content,
119 lexer=lexer,
120 html_formatter=html_formatter,
121 matching_lines=matching_lines,
122 shown_matching_lines=shown_matching_lines,
123 url=match_file_url,
124 use_hl_filter=c.searcher.is_es_6
125 )}
80 126 </div>
127
81 128 </div>
82 129 % endif
83 130 %endfor
@@ -91,10 +138,14 b' for line_number in matching_lines:'
91 138 %if c.cur_query:
92 139 <script type="text/javascript">
93 140 $(function(){
94 $(".code").mark(
95 '${' '.join(h.normalize_text_for_matching(c.cur_query).split())}',
96 {"className": 'match',
97 });
141 $(".search-code-body").mark(
142 "${query_mark}",
143 {
144 "className": 'match',
145 "accuracy": "complementary",
146 "ignorePunctuation": ":._(){}[]!'+=".split("")
147 }
148 );
98 149 })
99 150 </script>
100 %endif No newline at end of file
151 %endif
@@ -1,3 +1,5 b''
1 % if c.formatted_results:
2
1 3 <table class="rctable search-results">
2 4 <tr>
3 5 <th>${_('Repository')}</th>
@@ -27,8 +29,10 b''
27 29 %endfor
28 30 </table>
29 31
30 %if c.cur_query and c.formatted_results:
32 %if c.cur_query:
31 33 <div class="pagination-wh pagination-left">
32 34 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
33 35 </div>
34 %endif No newline at end of file
36 %endif
37
38 % endif
@@ -208,44 +208,3 b' def test_get_visual_attr(baseapp):'
208 208 def test_chop_at(test_text, inclusive, expected_text):
209 209 assert helpers.chop_at_smart(
210 210 test_text, '\n', inclusive, '...') == expected_text
211
212
213 @pytest.mark.parametrize('test_text, expected_output', [
214 ('some text', ['some', 'text']),
215 ('some text', ['some', 'text']),
216 ('some text "with a phrase"', ['some', 'text', 'with a phrase']),
217 ('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
218 ('"justphrase"', ['justphrase']),
219 ('""', []),
220 ('', []),
221 (' ', []),
222 ('" "', []),
223 ])
224 def test_extract_phrases(test_text, expected_output):
225 assert helpers.extract_phrases(test_text) == expected_output
226
227
228 @pytest.mark.parametrize('test_text, text_phrases, expected_output', [
229 ('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
230 ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
231 ('irrelevant', ['not found'], []),
232 ('irrelevant', ['not found'], []),
233 ])
234 def test_get_matching_offsets(test_text, text_phrases, expected_output):
235 assert helpers.get_matching_offsets(
236 test_text, text_phrases) == expected_output
237
238
239 def test_normalize_text_for_matching():
240 assert helpers.normalize_text_for_matching(
241 'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
242
243
244 def test_get_matching_line_offsets():
245 assert helpers.get_matching_line_offsets([
246 'words words words',
247 'words words words',
248 'some text some',
249 'words words words',
250 'words words words',
251 'text here what'], 'text') == {3: [(5, 9)], 6: [(0, 4)]}
General Comments 0
You need to be logged in to leave comments. Login now