rhodecode-enterprise-ce Commit - r3319:b8fd1d7a

search: add support for elastic search 6...

dan -

r3319:b8fd1d7a default

parent child

rhodecode/lib/index/search_utils.py

0 created 644 +257 0

@@ -0,0 +1,257 b''
	1	# -- coding: utf-8 --
	2
	3	# Copyright (C) 2012-2018 RhodeCode GmbH
	4	#
	5	# This program is free software: you can redistribute it and/or modify
	6	# it under the terms of the GNU Affero General Public License, version 3
	7	# (only), as published by the Free Software Foundation.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16	#
	17	# This program is dual-licensed. If you wish to learn more about the
	18	# RhodeCode Enterprise Edition, including its added features, Support services,
	19	# and proprietary license terms, please see https://rhodecode.com/licenses/
	20	import re
	21
	22	import pygments.filter
	23	import pygments.filters
	24	from pygments.token import Comment
	25
	26	HL_BEG_MARKER = '__RCSearchHLMarkBEG__'
	27	HL_END_MARKER = '__RCSearchHLMarkEND__'
	28	HL_MARKER_RE = '{}(.*?){}'.format(HL_BEG_MARKER, HL_END_MARKER)
	29
	30
	31	class ElasticSearchHLFilter(pygments.filters.Filter):
	32	_names = [HL_BEG_MARKER, HL_END_MARKER]
	33
	34	def __init__(self, **options):
	35	pygments.filters.Filter.__init__(self, **options)
	36
	37	def filter(self, lexer, stream):
	38	def tokenize(_value):
	39	for token in re.split('({}\|{})'.format(
	40	self._names[0], self._names[1]), _value):
	41	if token:
	42	yield token
	43
	44	hl = False
	45	for ttype, value in stream:
	46
	47	if self._names[0] in value or self._names[1] in value:
	48	for item in tokenize(value):
	49	if item == self._names[0]:
	50	# skip marker, but start HL
	51	hl = True
	52	continue
	53	elif item == self._names[1]:
	54	hl = False
	55	continue
	56
	57	if hl:
	58	yield Comment.ElasticMatch, item
	59	else:
	60	yield ttype, item
	61	else:
	62	if hl:
	63	yield Comment.ElasticMatch, value
	64	else:
	65	yield ttype, value
	66
	67
	68	def extract_phrases(text_query):
	69	"""
	70	Extracts phrases from search term string making sure phrases
	71	contained in double quotes are kept together - and discarding empty values
	72	or fully whitespace values eg.
	73
	74	'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
	75
	76	"""
	77
	78	in_phrase = False
	79	buf = ''
	80	phrases = []
	81	for char in text_query:
	82	if in_phrase:
	83	if char == '"': # end phrase
	84	phrases.append(buf)
	85	buf = ''
	86	in_phrase = False
	87	continue
	88	else:
	89	buf += char
	90	continue
	91	else:
	92	if char == '"': # start phrase
	93	in_phrase = True
	94	phrases.append(buf)
	95	buf = ''
	96	continue
	97	elif char == ' ':
	98	phrases.append(buf)
	99	buf = ''
	100	continue
	101	else:
	102	buf += char
	103
	104	phrases.append(buf)
	105	phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
	106	return phrases
	107
	108
	109	def get_matching_phrase_offsets(text, phrases):
	110	"""
	111	Returns a list of string offsets in `text` that the list of `terms` match
	112
	113	>>> get_matching_phrase_offsets('some text here', ['some', 'here'])
	114	[(0, 4), (10, 14)]
	115
	116	"""
	117	phrases = phrases or []
	118	offsets = []
	119
	120	for phrase in phrases:
	121	for match in re.finditer(phrase, text):
	122	offsets.append((match.start(), match.end()))
	123
	124	return offsets
	125
	126
	127	def get_matching_markers_offsets(text, markers=None):
	128	"""
	129	Returns a list of string offsets in `text` that the are between matching markers
	130
	131	>>> get_matching_markers_offsets('$1some$2 text $1here$2 marked', ['\$1(.*?)\$2'])
	132	[(0, 5), (16, 22)]
	133
	134	"""
	135	markers = markers or [HL_MARKER_RE]
	136	offsets = []
	137
	138	if markers:
	139	for mark in markers:
	140	for match in re.finditer(mark, text):
	141	offsets.append((match.start(), match.end()))
	142
	143	return offsets
	144
	145
	146	def normalize_text_for_matching(x):
	147	"""
	148	Replaces all non alfanum characters to spaces and lower cases the string,
	149	useful for comparing two text strings without punctuation
	150	"""
	151	return re.sub(r'[^\w]', ' ', x.lower())
	152
	153
	154	def get_matching_line_offsets(lines, terms=None, markers=None):
	155	""" Return a set of `lines` indices (starting from 1) matching a
	156	text search query, along with `context` lines above/below matching lines
	157
	158	:param lines: list of strings representing lines
	159	:param terms: search term string to match in lines eg. 'some text'
	160	:param markers: instead of terms, use highlight markers instead that
	161	mark beginning and end for matched item. eg. ['START(.*?)END']
	162
	163	eg.
	164
	165	text = '''
	166	words words words
	167	words words words
	168	some text some
	169	words words words
	170	words words words
	171	text here what
	172	'''
	173	get_matching_line_offsets(text, 'text', context=1)
	174	6, {3: [(5, 9)], 6: [(0, 4)]]
	175
	176	"""
	177	matching_lines = {}
	178	line_index = 0
	179
	180	if terms:
	181	phrases = [normalize_text_for_matching(phrase)
	182	for phrase in extract_phrases(terms)]
	183
	184	for line_index, line in enumerate(lines.splitlines(), start=1):
	185	normalized_line = normalize_text_for_matching(line)
	186	match_offsets = get_matching_phrase_offsets(normalized_line, phrases)
	187	if match_offsets:
	188	matching_lines[line_index] = match_offsets
	189
	190	else:
	191	markers = markers or [HL_MARKER_RE]
	192	for line_index, line in enumerate(lines.splitlines(), start=1):
	193	match_offsets = get_matching_markers_offsets(line, markers=markers)
	194	if match_offsets:
	195	matching_lines[line_index] = match_offsets
	196
	197	return line_index, matching_lines
	198
	199
	200	def lucene_query_parser():
	201	# from pyparsing lucene_grammar
	202	from pyparsing import (
	203	Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress,
	204	Optional, Group, infixNotation, opAssoc, ParserElement, pyparsing_common)
	205
	206	ParserElement.enablePackrat()
	207
	208	COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^")
	209	LPAR, RPAR = map(Suppress, "()")
	210	and_, or_, not_, to_ = map(CaselessKeyword, "AND OR NOT TO".split())
	211	keyword = and_ \| or_ \| not_ \| to_
	212
	213	expression = Forward()
	214
	215	valid_word = Regex(r'([a-zA-Z0-9_+.-]\|\\[!(){}\[\]^"~?\\:])+').setName("word")
	216	valid_word.setParseAction(
	217	lambda t: t[0]
	218	.replace('\\\\', chr(127))
	219	.replace('\\', '')
	220	.replace(chr(127), '\\')
	221	)
	222
	223	string = QuotedString('"')
	224
	225	required_modifier = Literal("+")("required")
	226	prohibit_modifier = Literal("-")("prohibit")
	227	integer = Regex(r"\d+").setParseAction(lambda t: int(t[0]))
	228	proximity_modifier = Group(TILDE + integer("proximity"))
	229	number = pyparsing_common.fnumber()
	230	fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")
	231
	232	term = Forward()
	233	field_name = valid_word().setName("fieldname")
	234	incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
	235	excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
	236	range_search = incl_range_search("incl_range") \| excl_range_search("excl_range")
	237	boost = (CARAT + number("boost"))
	238
	239	string_expr = Group(string + proximity_modifier) \| string
	240	word_expr = Group(valid_word + fuzzy_modifier) \| valid_word
	241	term << (Optional(field_name("field") + COLON) +
	242	(word_expr \| string_expr \| range_search \| Group(
	243	LPAR + expression + RPAR)) +
	244	Optional(boost))
	245	term.setParseAction(lambda t: [t] if 'field' in t or 'boost' in t else None)
	246
	247	expression << infixNotation(
	248	term,
	249	[
	250	(required_modifier \| prohibit_modifier, 1, opAssoc.RIGHT),
	251	((not_ \| '!').setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT),
	252	((and_ \| '&&').setParseAction(lambda: "AND"), 2, opAssoc.LEFT),
	253	(Optional(or_ \| '\|\|').setParseAction(lambda: "OR"), 2, opAssoc.LEFT),
	254	]
	255	)
	256
	257	return expression

rhodecode/tests/lib/test_search_utils.py

0 created 644 +100 0

@@ -0,0 +1,100 b''
	1	# -- coding: utf-8 --
	2
	3	# Copyright (C) 2010-2018 RhodeCode GmbH
	4	#
	5	# This program is free software: you can redistribute it and/or modify
	6	# it under the terms of the GNU Affero General Public License, version 3
	7	# (only), as published by the Free Software Foundation.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16	#
	17	# This program is dual-licensed. If you wish to learn more about the
	18	# RhodeCode Enterprise Edition, including its added features, Support services,
	19	# and proprietary license terms, please see https://rhodecode.com/licenses/
	20
	21	import copy
	22	import mock
	23	import pytest
	24
	25	from rhodecode.lib.index import search_utils
	26
	27
	28	@pytest.mark.parametrize('test_text, expected_output', [
	29	('some text', ['some', 'text']),
	30	('some text', ['some', 'text']),
	31	('some text "with a phrase"', ['some', 'text', 'with a phrase']),
	32	('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
	33	('"justphrase"', ['justphrase']),
	34	('""', []),
	35	('', []),
	36	(' ', []),
	37	('" "', []),
	38	])
	39	def test_extract_phrases(test_text, expected_output):
	40	assert search_utils.extract_phrases(test_text) == expected_output
	41
	42
	43	@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
	44	('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
	45	('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
	46	('irrelevant', ['not found'], []),
	47	('irrelevant', ['not found'], []),
	48	])
	49	def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
	50	assert search_utils.get_matching_phrase_offsets(
	51	test_text, text_phrases) == expected_output
	52
	53
	54	@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
	55	('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
	56	('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
	57	('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
	58	('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
	59	('irrelevant', ['not found'], []),
	60	('irrelevant', ['not found'], []),
	61	])
	62	def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):
	63
	64	assert search_utils.get_matching_markers_offsets(test_text) == expected_output
	65
	66
	67	def test_normalize_text_for_matching():
	68	assert search_utils.normalize_text_for_matching(
	69	'OJjfe)#$@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
	70
	71
	72	def test_get_matching_line_offsets():
	73	words = '\n'.join([
	74	'words words words',
	75	'words words words',
	76	'some text some',
	77	'words words words',
	78	'words words words',
	79	'text here what'
	80	])
	81	total_lines, matched_offsets = \
	82	search_utils.get_matching_line_offsets(words, terms='text')
	83	assert total_lines == 6
	84	assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}
	85
	86
	87	def test_get_matching_line_offsets_using_markers():
	88	words = '\n'.join([
	89	'words words words',
	90	'words words words',
	91	'some __1__text__2__ some',
	92	'words words words',
	93	'words words words',
	94	'__1__text__2__ here what'
	95	])
	96	total_lines, matched_offsets = \
	97	search_utils.get_matching_line_offsets(words, terms=None,
	98	markers=['__1__(.*?)__2__'])
	99	assert total_lines == 6
	100	assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}

pkgs/python-packages.nix

0 +71 -24

                 };
               };
               "elasticsearch" = super.buildPythonPackage {
-                name = "elasticsearch-2.3.0";
+                name = "elasticsearch-6.3.1";
                 doCheck = false;
                 propagatedBuildInputs = [
                   self."urllib3"
                 ];
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/10/35/5fd52c5f0b0ee405ed4b5195e8bce44c5e041787680dc7b94b8071cac600/elasticsearch-2.3.0.tar.gz";
+                  url = "https://files.pythonhosted.org/packages/9d/ce/c4664e8380e379a9402ecfbaf158e56396da90d520daba21cfa840e0eb71/elasticsearch-6.3.1.tar.gz";
-                  sha256 = "10ad2dk73xsys9vajwsncibs69asa63w1hgwz6lz1prjpyi80c5y";
+                  sha256 = "12y93v0yn7a4xmf969239g8gb3l4cdkclfpbk1qc8hx5qkymrnma";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.asl20 ];
                 };
               };
               "elasticsearch-dsl" = super.buildPythonPackage {
-                name = "elasticsearch-dsl-2.2.0";
+                name = "elasticsearch-dsl-6.3.1";
                 doCheck = false;
                 propagatedBuildInputs = [
                   self."six"
                   self."python-dateutil"
                   self."elasticsearch"
+                  self."ipaddress"
+                ];
+                src = fetchurl {
+                  url = "https://files.pythonhosted.org/packages/4c/0d/1549f50c591db6bb4e66cbcc8d34a6e537c3d89aa426b167c244fd46420a/elasticsearch-dsl-6.3.1.tar.gz";
+                  sha256 = "1gh8a0shqi105k325hgwb9avrpdjh0mc6mxwfg9ba7g6lssb702z";
+                };
+                meta = {
+                  license = [ pkgs.lib.licenses.asl20 ];
+                };
+              };
+              "elasticsearch1" = super.buildPythonPackage {
+                name = "elasticsearch1-1.10.0";
+                doCheck = false;
+                propagatedBuildInputs = [
+                  self."urllib3"
                 ];
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/66/2f/52a086968788e58461641570f45c3207a52d46ebbe9b77dc22b6a8ffda66/elasticsearch-dsl-2.2.0.tar.gz";
+                  url = "https://files.pythonhosted.org/packages/a6/eb/73e75f9681fa71e3157b8ee878534235d57f24ee64f0e77f8d995fb57076/elasticsearch1-1.10.0.tar.gz";
-                  sha256 = "1g4kxzxsdwlsl2a9kscmx11pafgimhj7y8wrfksv8pgvpkfb9fwr";
+                  sha256 = "0g89444kd5zwql4vbvyrmi2m6l6dcj6ga98j4hqxyyyz6z20aki2";
+                };
+                meta = {
+                  license = [ pkgs.lib.licenses.asl20 ];
+                };
+              };
+              "elasticsearch1-dsl" = super.buildPythonPackage {
+                name = "elasticsearch1-dsl-0.0.12";
+                doCheck = false;
+                propagatedBuildInputs = [
+                  self."six"
+                  self."python-dateutil"
+                  self."elasticsearch1"
+                ];
+                src = fetchurl {
+                  url = "https://files.pythonhosted.org/packages/eb/9d/785342775cb10eddc9b8d7457d618a423b4f0b89d8b2b2d1bc27190d71db/elasticsearch1-dsl-0.0.12.tar.gz";
+                  sha256 = "0ig1ly39v93hba0z975wnhbmzwj28w6w1sqlr2g7cn5spp732bhk";
+                };
+                meta = {
+                  license = [ pkgs.lib.licenses.asl20 ];
+                };
+              };
+              "elasticsearch2" = super.buildPythonPackage {
+                name = "elasticsearch2-2.5.0";
+                doCheck = false;
+                propagatedBuildInputs = [
+                  self."urllib3"
+                ];
+                src = fetchurl {
+                  url = "https://files.pythonhosted.org/packages/84/77/63cf63d4ba11d913b5278406f2a37b0712bec6fc85edfb6151a33eaeba25/elasticsearch2-2.5.0.tar.gz";
+                  sha256 = "0ky0q16lbvz022yv6q3pix7aamf026p1y994537ccjf0p0dxnbxr";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.asl20 ];
                 };
               };
               "markupsafe" = super.buildPythonPackage {
-                name = "markupsafe-1.0";
+                name = "markupsafe-1.1.0";
                 doCheck = false;
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/4d/de/32d741db316d8fdb7680822dd37001ef7a448255de9699ab4bfcbdf4172b/MarkupSafe-1.0.tar.gz";
+                  url = "https://files.pythonhosted.org/packages/ac/7e/1b4c2e05809a4414ebce0892fe1e32c14ace86ca7d50c70f00979ca9b3a3/MarkupSafe-1.1.0.tar.gz";
-                  sha256 = "0rdn1s8x9ni7ss8rfiacj7x1085lx8mh2zdwqslnw8xc3l4nkgm6";
+                  sha256 = "1lxirjypbdd3l9jl4vliilhfnhy7c7f2vlldqg1b0i74khn375sf";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.bsdOriginal ];
                 };
               };
               "pyparsing" = super.buildPythonPackage {
-                name = "pyparsing-1.5.7";
+                name = "pyparsing-2.3.0";
                 doCheck = false;
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/6f/2c/47457771c02a8ff0f302b695e094ec309e30452232bd79198ee94fda689f/pyparsing-1.5.7.tar.gz";
+                  url = "https://files.pythonhosted.org/packages/d0/09/3e6a5eeb6e04467b737d55f8bba15247ac0876f98fae659e58cd744430c6/pyparsing-2.3.0.tar.gz";
-                  sha256 = "17z7ws076z977sclj628fvwrp8y9j2rvdjcsq42v129n1gwi8vk4";
+                  sha256 = "14k5v7n3xqw8kzf42x06bzp184spnlkya2dpjyflax6l3yrallzk";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.mit ];
                 };
               };
               "rhodecode-enterprise-ce" = super.buildPythonPackage {
-                name = "rhodecode-enterprise-ce-4.15.0";
+                name = "rhodecode-enterprise-ce-4.16.0";
                 buildInputs = [
                   self."pytest"
                   self."py"
                 };
               };
               "rhodecode-tools" = super.buildPythonPackage {
-                name = "rhodecode-tools-1.0.1";
+                name = "rhodecode-tools-1.1.0";
                 doCheck = false;
                 propagatedBuildInputs = [
                   self."click"
                   self."mako"
                   self."markupsafe"
                   self."requests"
-                  self."elasticsearch"
-                  self."elasticsearch-dsl"
                   self."urllib3"
                   self."whoosh"
+                  self."elasticsearch"
+                  self."elasticsearch-dsl"
+                  self."elasticsearch2"
+                  self."elasticsearch1-dsl"
                 ];
                 src = fetchurl {
-                  url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b";
+                  url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626";
-                  sha256 = "0nr300s4sg685qs4wgbwlplwriawrwi6jq79z37frcnpyc89gpvm";
+                  sha256 = "1wbnnfrzyp0d4ys55vj5vnfrzfhwlqgdhc8yv8i6kwinizf8hfrn";
                 };
                 meta = {
                   license = [ { fullName = "Apache 2.0 and Proprietary"; } ];
                 };
               };
               "setuptools" = super.buildPythonPackage {
-                name = "setuptools-40.6.2";
+                name = "setuptools-40.6.3";
                 doCheck = false;
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/b0/d1/8acb42f391cba52e35b131e442e80deffbb8d0676b93261d761b1f0ef8fb/setuptools-40.6.2.zip";
+                  url = "https://files.pythonhosted.org/packages/37/1b/b25507861991beeade31473868463dad0e58b1978c209de27384ae541b0b/setuptools-40.6.3.zip";
-                  sha256 = "0r2c5hapirlzm34h7pl1lgkm6gk7bcrlrdj28qgsvaqg3f74vfw6";
+                  sha256 = "1y085dnk574sxw9aymdng9gijvrsbw86hsv9hqnhv7y4d6nlsirv";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.mit ];
                 };
               };
               "urllib3" = super.buildPythonPackage {
-                name = "urllib3-1.21";
+                name = "urllib3-1.24.1";
                 doCheck = false;
                 src = fetchurl {
-                  url = "https://files.pythonhosted.org/packages/34/95/7b28259d0006ed681c424cd71a668363265eac92b67dddd018eb9a22bff8/urllib3-1.21.tar.gz";
+                  url = "https://files.pythonhosted.org/packages/b1/53/37d82ab391393565f2f831b8eedbffd57db5a718216f82f1a8b4d381a1c1/urllib3-1.24.1.tar.gz";
-                  sha256 = "0irnj4wvh2y36s4q3l2vas9qr9m766w6w418nb490j3mf8a8zw6h";
+                  sha256 = "08lwd9f3hqznyf32vnzwvp87pchx062nkbgyrf67rwlkgj0jk5fy";
                 };
                 meta = {
                   license = [ pkgs.lib.licenses.mit ];

requirements.txt

0 +4 -4

             lxml==4.2.5
             mako==1.0.7
             markdown==2.6.11
-            markupsafe==1.0.0
+            markupsafe==1.1.0
             msgpack-python==0.5.6
             pyotp==2.2.7
             packaging==15.2
             pycurl==7.43.0.2
             pyflakes==0.8.1
             pygments==2.3.0
-            pyparsing==1.5.7
+            pyparsing==2.3.0
             pyramid-beaker==0.8
             pyramid-debugtoolbar==4.4.0
             pyramid-jinja2==2.7
             supervisor==3.3.4
             tempita==0.5.2
             translationstring==1.3
-            urllib3==1.21
+            urllib3==1.24.1
             urlobject==2.4.3
             venusian==1.1.0
             weberror==0.10.3
             ipython==5.1.0
             ## rhodecode-tools, special case
-            https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b#egg=rhodecode-tools==1.0.1
+            https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626#egg=rhodecode-tools==1.1.0
             ## appenlight
             appenlight-client==0.6.26

rhodecode/apps/admin/views/settings.py

0 +2 -2

                     c = self.load_default_context()
                     c.active = 'search'
-                    searcher = searcher_from_config(self.request.registry.settings)
+                    c.searcher = searcher_from_config(self.request.registry.settings)
-                    c.statistics = searcher.statistics(self.request.translate)
+                    c.statistics = c.searcher.statistics(self.request.translate)
                     return self._get_template_context(c)

rhodecode/apps/home/views.py

0 +89 -17

@@ -246,9 +246,9 b' class HomeView(BaseAppView):'
246	}	246	}
247	for obj in acl_iter]	247	for obj in acl_iter]
248		248
249	def _get_hash_commit_list(self, auth_user, query):	249	def _get_hash_commit_list(self, auth_user, searcher, query):
250	org_query = query	250	org_query = query
251	if not query or len(query) < 3:	251	if not query or len(query) < 3 or not searcher:
252	return []	252	return []
253		253
254	commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)	254	commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)
@@ -257,9 +257,8 b' class HomeView(BaseAppView):'
257	return []	257	return []
258	commit_hash = commit_hashes[0]	258	commit_hash = commit_hashes[0]
259		259
260	searcher = searcher_from_config(self.request.registry.settings)
261	result = searcher.search(	260	result = searcher.search(
262	'commit_id:%s*' % commit_hash, 'commit', auth_user,	261	'commit_id:{}*'.format(commit_hash), 'commit', auth_user,
263	raise_on_exc=False)	262	raise_on_exc=False)
264		263
265	return [	264	return [
@@ -303,6 +302,84 b' class HomeView(BaseAppView):'
303	}	302	}
304	return data	303	return data
305		304
		305	def _get_default_search_queries(self, search_context, searcher, query):
		306	if not searcher:
		307	return []
		308	is_es_6 = searcher.is_es_6
		309
		310	queries = []
		311	repo_group_name, repo_name, repo_context = None, None, None
		312
		313	# repo group context
		314	if search_context.get('search_context[repo_group_name]'):
		315	repo_group_name = search_context.get('search_context[repo_group_name]')
		316	if search_context.get('search_context[repo_name]'):
		317	repo_name = search_context.get('search_context[repo_name]')
		318	repo_context = search_context.get('search_context[repo_view_type]')
		319
		320	if is_es_6 and repo_name:
		321	def query_modifier():
		322	qry = '{} repo_name.raw:{} '.format(
		323	query, searcher.escape_specials(repo_name))
		324	return {'q': qry, 'type': 'content'}
		325	label = u'Search for `{}` through files in this repository.'.format(query)
		326	queries.append(
		327	{
		328	'id': -10,
		329	'value': query,
		330	'value_display': label,
		331	'type': 'search',
		332	'url': h.route_path(
		333	'search_repo', repo_name=repo_name, _query=query_modifier())
		334	}
		335	)
		336
		337	def query_modifier():
		338	qry = '{} repo_name.raw:{} '.format(
		339	query, searcher.escape_specials(repo_name))
		340	return {'q': qry, 'type': 'commit'}
		341	label = u'Search for `{}` through commits in this repository.'.format(query)
		342	queries.append(
		343	{
		344	'id': -10,
		345	'value': query,
		346	'value_display': label,
		347	'type': 'search',
		348	'url': h.route_path(
		349	'search_repo', repo_name=repo_name, _query=query_modifier())
		350	}
		351	)
		352
		353	elif is_es_6 and repo_group_name:
		354	def query_modifier():
		355	qry = '{} repo_name.raw:{} '.format(
		356	query, searcher.escape_specials(repo_group_name + '/*'))
		357	return {'q': qry, 'type': 'content'}
		358	label = u'Search for `{}` through files in this repository group'.format(query)
		359	queries.append(
		360	{
		361	'id': -20,
		362	'value': query,
		363	'value_display': label,
		364	'type': 'search',
		365	'url': h.route_path('search', _query=query_modifier())
		366	}
		367	)
		368
		369	if not queries:
		370	queries.append(
		371	{
		372	'id': -1,
		373	'value': query,
		374	'value_display': u'Search for: `{}`'.format(query),
		375	'type': 'search',
		376	'url': h.route_path('search',
		377	_query={'q': query, 'type': 'content'})
		378	}
		379	)
		380
		381	return queries
		382
306	@LoginRequired()	383	@LoginRequired()
307	@view_config(	384	@view_config(
308	route_name='goto_switcher_data', request_method='GET',	385	route_name='goto_switcher_data', request_method='GET',
@@ -315,26 +392,21 b' class HomeView(BaseAppView):'
315	query = self.request.GET.get('query')	392	query = self.request.GET.get('query')
316	log.debug('generating main filter data, query %s', query)	393	log.debug('generating main filter data, query %s', query)
317		394
318	default_search_val = u'Full text search for: `{}`'.format(query)
319	res = []	395	res = []
320	if not query:	396	if not query:
321	return {'suggestions': res}	397	return {'suggestions': res}
322		398
323	res.append({	399	searcher = searcher_from_config(self.request.registry.settings)
324	'id': -1,	400	for _q in self._get_default_search_queries(self.request.GET, searcher, query):
325	'value': query,	401	res.append(_q)
326	'value_display': default_search_val,	402
327	'type': 'search',	403	repo_group_id = safe_int(self.request.GET.get('search_context[repo_group_id]'))
328	'url': h.route_path(
329	'search', _query={'q': query})
330	})
331	repo_group_id = safe_int(self.request.GET.get('repo_group_id'))
332	if repo_group_id:	404	if repo_group_id:
333	repo_group = RepoGroup.get(repo_group_id)	405	repo_group = RepoGroup.get(repo_group_id)
334	composed_hint = '{}/{}'.format(repo_group.group_name, query)	406	composed_hint = '{}/{}'.format(repo_group.group_name, query)
335	show_hint = not query.startswith(repo_group.group_name)	407	show_hint = not query.startswith(repo_group.group_name)
336	if repo_group and show_hint:	408	if repo_group and show_hint:
337	hint = u'~~Group~~ search: `{}`'.format(composed_hint)	409	hint = u'Repository search inside: `{}`'.format(composed_hint)
338	res.append({	410	res.append({
339	'id': -1,	411	'id': -1,
340	'value': composed_hint,	412	'value': composed_hint,
@@ -351,7 +423,7 b' class HomeView(BaseAppView):'
351	for serialized_repo in repos:	423	for serialized_repo in repos:
352	res.append(serialized_repo)	424	res.append(serialized_repo)
353		425
354	# TODO(marcink): permissions for that ?	426	# TODO(marcink): should all logged in users be allowed to search others?
355	allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER	427	allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER
356	if allowed_user_search:	428	if allowed_user_search:
357	users = self._get_user_list(query)	429	users = self._get_user_list(query)
@@ -362,7 +434,7 b' class HomeView(BaseAppView):'
362	for serialized_user_group in user_groups:	434	for serialized_user_group in user_groups:
363	res.append(serialized_user_group)	435	res.append(serialized_user_group)
364		436
365	commits = self._get_hash_commit_list(c.auth_user, query)	437	commits = self._get_hash_commit_list(c.auth_user, searcher, query)
366	if commits:	438	if commits:
367	unique_repos = collections.OrderedDict()	439	unique_repos = collections.OrderedDict()
368	for commit in commits:	440	for commit in commits:

rhodecode/apps/search/views.py

0 +10 -6

                 errors = []
                 try:
                     search_params = schema.deserialize(
-                        dict(search_query=request.GET.get('q'),
+                        dict(
-                             search_type=request.GET.get('type'),
+                            search_query=request.GET.get('q'),
-                             search_sort=request.GET.get('sort'),
+                            search_type=request.GET.get('type'),
-                             page_limit=request.GET.get('page_limit'),
+                            search_sort=request.GET.get('sort'),
-                             requested_page=request.GET.get('page'))
+                            search_max_lines=request.GET.get('max_lines'),
+                            page_limit=request.GET.get('page_limit'),
+                            requested_page=request.GET.get('page'),
+                         )
                     )
                 except validation_schema.Invalid as e:
                     errors = e.children
                 def url_generator(**kw):
                     q = urllib.quote(safe_str(search_query))
                     return update_params(
-                        "?q=%s&type=%s" % (q, safe_str(search_type)), **kw)
+                        "?q=%s&type=%s&max_lines=%s" % (q, safe_str(search_type), search_max_lines), **kw)
                 c = tmpl_context
                 search_query = search_params.get('search_query')
                 search_type = search_params.get('search_type')
                 search_sort = search_params.get('search_sort')
+                search_max_lines = search_params.get('search_max_lines')
                 if search_params.get('search_query'):
                     page_limit = search_params['page_limit']
                     requested_page = search_params['requested_page']

rhodecode/lib/helpers.py

0 +29 -121

@@ -48,7 +48,6 b' import bleach'
48	from datetime import datetime	48	from datetime import datetime
49	from functools import partial	49	from functools import partial
50	from pygments.formatters.html import HtmlFormatter	50	from pygments.formatters.html import HtmlFormatter
51	from pygments import highlight as code_highlight
52	from pygments.lexers import (	51	from pygments.lexers import (
53	get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)	52	get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)
54		53
@@ -81,12 +80,14 b' from rhodecode.lib.utils2 import str2boo'
81	from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links	80	from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links
82	from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError	81	from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError
83	from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit	82	from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit
		83	from rhodecode.lib.index.search_utils import get_matching_line_offsets
84	from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT	84	from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT
85	from rhodecode.model.changeset_status import ChangesetStatusModel	85	from rhodecode.model.changeset_status import ChangesetStatusModel
86	from rhodecode.model.db import Permission, User, Repository	86	from rhodecode.model.db import Permission, User, Repository
87	from rhodecode.model.repo_group import RepoGroupModel	87	from rhodecode.model.repo_group import RepoGroupModel
88	from rhodecode.model.settings import IssueTrackerSettingsModel	88	from rhodecode.model.settings import IssueTrackerSettingsModel
89		89
		90
90	log = logging.getLogger(__name__)	91	log = logging.getLogger(__name__)
91		92
92		93
@@ -260,6 +261,21 b' def files_breadcrumbs(repo_name, commit_'
260	return literal('/'.join(url_segments))	261	return literal('/'.join(url_segments))
261		262
262		263
		264	def code_highlight(code, lexer, formatter, use_hl_filter=False):
		265	"""
		266	Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
		267
		268	If ``outfile`` is given and a valid file object (an object
		269	with a ``write`` method), the result will be written to it, otherwise
		270	it is returned as a string.
		271	"""
		272	if use_hl_filter:
		273	# add HL filter
		274	from rhodecode.lib.index import search_utils
		275	lexer.add_filter(search_utils.ElasticSearchHLFilter())
		276	return pygments.format(pygments.lex(code, lexer), formatter)
		277
		278
263	class CodeHtmlFormatter(HtmlFormatter):	279	class CodeHtmlFormatter(HtmlFormatter):
264	"""	280	"""
265	My code Html Formatter for source codes	281	My code Html Formatter for source codes
@@ -386,110 +402,9 b' class SearchContentCodeHtmlFormatter(Cod'
386		402
387	current_line_number += 1	403	current_line_number += 1
388		404
389
390	yield 0, '</table>'	405	yield 0, '</table>'
391		406
392		407
393	def extract_phrases(text_query):
394	"""
395	Extracts phrases from search term string making sure phrases
396	contained in double quotes are kept together - and discarding empty values
397	or fully whitespace values eg.
398
399	'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
400
401	"""
402
403	in_phrase = False
404	buf = ''
405	phrases = []
406	for char in text_query:
407	if in_phrase:
408	if char == '"': # end phrase
409	phrases.append(buf)
410	buf = ''
411	in_phrase = False
412	continue
413	else:
414	buf += char
415	continue
416	else:
417	if char == '"': # start phrase
418	in_phrase = True
419	phrases.append(buf)
420	buf = ''
421	continue
422	elif char == ' ':
423	phrases.append(buf)
424	buf = ''
425	continue
426	else:
427	buf += char
428
429	phrases.append(buf)
430	phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
431	return phrases
432
433
434	def get_matching_offsets(text, phrases):
435	"""
436	Returns a list of string offsets in `text` that the list of `terms` match
437
438	>>> get_matching_offsets('some text here', ['some', 'here'])
439	[(0, 4), (10, 14)]
440
441	"""
442	offsets = []
443	for phrase in phrases:
444	for match in re.finditer(phrase, text):
445	offsets.append((match.start(), match.end()))
446
447	return offsets
448
449
450	def normalize_text_for_matching(x):
451	"""
452	Replaces all non alnum characters to spaces and lower cases the string,
453	useful for comparing two text strings without punctuation
454	"""
455	return re.sub(r'[^\w]', ' ', x.lower())
456
457
458	def get_matching_line_offsets(lines, terms):
459	""" Return a set of `lines` indices (starting from 1) matching a
460	text search query, along with `context` lines above/below matching lines
461
462	:param lines: list of strings representing lines
463	:param terms: search term string to match in lines eg. 'some text'
464	:param context: number of lines above/below a matching line to add to result
465	:param max_lines: cut off for lines of interest
466	eg.
467
468	text = '''
469	words words words
470	words words words
471	some text some
472	words words words
473	words words words
474	text here what
475	'''
476	get_matching_line_offsets(text, 'text', context=1)
477	{3: [(5, 9)], 6: [(0, 4)]]
478
479	"""
480	matching_lines = {}
481	phrases = [normalize_text_for_matching(phrase)
482	for phrase in extract_phrases(terms)]
483
484	for line_index, line in enumerate(lines, start=1):
485	match_offsets = get_matching_offsets(
486	normalize_text_for_matching(line), phrases)
487	if match_offsets:
488	matching_lines[line_index] = match_offsets
489
490	return matching_lines
491
492
493	def hsv_to_rgb(h, s, v):	408	def hsv_to_rgb(h, s, v):
494	""" Convert hsv color values to rgb """	409	""" Convert hsv color values to rgb """
495		410
@@ -1904,25 +1819,6 b' def journal_filter_help(request):'
1904	).format(actions=actions)	1819	).format(actions=actions)
1905		1820
1906		1821
1907	def search_filter_help(searcher, request):
1908	_ = request.translate
1909
1910	terms = ''
1911	return _(
1912	'Example filter terms for `{searcher}` search:\n' +
1913	'{terms}\n' +
1914	'Generate wildcards using \'*\' character:\n' +
1915	' "repo_name:vcs*" - search everything starting with \'vcs\'\n' +
1916	' "repo_name:vcs" - search for repository containing \'vcs\'\n' +
1917	'\n' +
1918	'Optional AND / OR operators in queries\n' +
1919	' "repo_name:vcs OR repo_name:test"\n' +
1920	' "owner:test AND repo_name:test*"\n' +
1921	'More: {search_doc}'
1922	).format(searcher=searcher.name,
1923	terms=terms, search_doc=searcher.query_lang_doc)
1924
1925
1926	def not_mapped_error(repo_name):	1822	def not_mapped_error(repo_name):
1927	from rhodecode.translation import _	1823	from rhodecode.translation import _
1928	flash(_('%s repository is not mapped to db perhaps'	1824	flash(_('%s repository is not mapped to db perhaps'
@@ -2107,3 +2003,15 b' def go_import_header(request, db_repo=No'
2107	def reviewer_as_json(args, *kwargs):	2003	def reviewer_as_json(args, *kwargs):
2108	from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json	2004	from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json
2109	return _reviewer_as_json(args, *kwargs)	2005	return _reviewer_as_json(args, *kwargs)
		2006
		2007
		2008	def get_repo_view_type(request):
		2009	route_name = request.matched_route.name
		2010	route_to_view_type = {
		2011	'repo_changelog': 'changelog',
		2012	'repo_files': 'files',
		2013	'repo_summary': 'summary',
		2014	'repo_commit': 'commit'
		2015
		2016	}
		2017	return route_to_view_type.get(route_name)

rhodecode/lib/index/__init__.py

0 +42 -3

@@ -25,15 +25,27 b' Index schema for RhodeCode'
25	import importlib	25	import importlib
26	import logging	26	import logging
27		27
		28	from rhodecode.lib.index.search_utils import normalize_text_for_matching
		29
28	log = logging.getLogger(__name__)	30	log = logging.getLogger(__name__)
29		31
30	# leave defaults for backward compat	32	# leave defaults for backward compat
31	default_searcher = 'rhodecode.lib.index.whoosh'	33	default_searcher = 'rhodecode.lib.index.whoosh'
32	default_location = '%(here)s/data/index'	34	default_location = '%(here)s/data/index'
33		35
		36	ES_VERSION_2 = '2'
		37	ES_VERSION_6 = '6'
		38	# for legacy reasons we keep 2 compat as default
		39	DEFAULT_ES_VERSION = ES_VERSION_2
34		40
35	class BaseSearch(object):	41	from rhodecode_tools.lib.fts_index.elasticsearch_engine_6 import \
		42	ES_CONFIG # pragma: no cover
		43
		44
		45	class BaseSearcher(object):
36	query_lang_doc = ''	46	query_lang_doc = ''
		47	es_version = None
		48	name = None
37		49
38	def __init__(self):	50	def __init__(self):
39	pass	51	pass
@@ -45,15 +57,42 b' class BaseSearch(object):'
45	raise_on_exc=True):	57	raise_on_exc=True):
46	raise Exception('NotImplemented')	58	raise Exception('NotImplemented')
47		59
		60	@staticmethod
		61	def query_to_mark(query, default_field=None):
		62	"""
		63	Formats the query to mark token for jquery.mark.js highlighting. ES could
		64	have a different format optionally.
48		65
49	def searcher_from_config(config, prefix='search.'):	66	:param default_field:
		67	:param query:
		68	"""
		69	return ' '.join(normalize_text_for_matching(query).split())
		70
		71	@property
		72	def is_es_6(self):
		73	return self.es_version == ES_VERSION_6
		74
		75	def get_handlers(self):
		76	return {}
		77
		78
		79	def search_config(config, prefix='search.'):
50	_config = {}	80	_config = {}
51	for key in config.keys():	81	for key in config.keys():
52	if key.startswith(prefix):	82	if key.startswith(prefix):
53	_config[key[len(prefix):]] = config[key]	83	_config[key[len(prefix):]] = config[key]
		84	return _config
		85
		86
		87	def searcher_from_config(config, prefix='search.'):
		88	_config = search_config(config, prefix)
54		89
55	if 'location' not in _config:	90	if 'location' not in _config:
56	_config['location'] = default_location	91	_config['location'] = default_location
		92	if 'es_version' not in _config:
		93	# use old legacy ES version set to 2
		94	_config['es_version'] = '2'
		95
57	imported = importlib.import_module(_config.get('module', default_searcher))	96	imported = importlib.import_module(_config.get('module', default_searcher))
58	searcher = imported.Search(config=_config)	97	searcher = imported.Searcher(config=_config)
59	return searcher	98	return searcher

rhodecode/lib/index/whoosh.py

0 +17 -11

             from whoosh.qparser import QueryParser, QueryParserError
             import rhodecode.lib.helpers as h
-            from rhodecode.lib.index import BaseSearch
+            from rhodecode.lib.index import BaseSearcher
             from rhodecode.lib.utils2 import safe_unicode
             log = logging.getLogger(__name__)
             log = logging.getLogger(__name__)
-            class Search(BaseSearch):
+            class WhooshSearcher(BaseSearcher):
                 # this also shows in UI
                 query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
                 name = 'whoosh'
                 def __init__(self, config):
-                    super(Search, self).__init__()
+                    super(Searcher, self).__init__()
                     self.config = config
                     if not os.path.isdir(self.config['location']):
                         os.makedirs(self.config['location'])
                     _ = translator
                     stats = [
                         {'key': _('Index Type'), 'value': 'Whoosh'},
+                        {'sep': True},
                         {'key': _('File Index'), 'value': str(self.file_index)},
-                        {'key': _('Indexed documents'),
+                        {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
-                         'value': self.file_index.doc_count()},
+                        {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
-                        {'key': _('Last update'),
-                         'value': h.time_to_datetime(self.file_index.last_modified())},
+                        {'sep': True},
                         {'key': _('Commit index'), 'value': str(self.commit_index)},
-                        {'key': _('Indexed documents'),
+                        {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
-                         'value': str(self.commit_index.doc_count())},
+                        {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
-                        {'key': _('Last update'),
-                         'value': h.time_to_datetime(self.commit_index.last_modified())}
                     ]
                     return stats
                     return self.searcher
+            Searcher = WhooshSearcher
             class WhooshResultWrapper(object):
                 def __init__(self, search_type, total_hits, results):
                     self.search_type = search_type
                     # TODO: marcink: this feels like an overkill, there's a lot of data
                     # inside hit object, and we don't need all
                     res = dict(hit)
+                    # elastic search uses that, we set it empty so it fallbacks to regular HL logic
+                    res['content_highlight'] = ''
                     f_path = ''  # pragma: no cover
                     if self.search_type in ['content', 'path']:

rhodecode/lib/utils2.py

0 +11 0

@@ -1009,3 +1009,14 b' def glob2re(pat):'
1009	else:	1009	else:
1010	res = res + re.escape(c)	1010	res = res + re.escape(c)
1011	return res + '\Z(?ms)'	1011	return res + '\Z(?ms)'
		1012
		1013
		1014	def parse_byte_string(size_str):
		1015	match = re.match(r'(\d+)(MB\|KB)', size_str, re.IGNORECASE)
		1016	if not match:
		1017	raise ValueError('Given size:%s is invalid, please make sure '
		1018	'to use format of <num>(MB\|KB)' % size_str)
		1019
		1020	_parts = match.groups()
		1021	num, type_ = _parts
		1022	return long(num) * {'mb': 1024*1024, 'kb': 1024}[type_.lower()]

rhodecode/lib/vcs/utils/__init__.py

0 +1 -1

                 to get the username
                 """
-                if not author or not '@' in author:
+                if not author or '@' not in author:
                     return author
                 else:
                     return author.replace(author_email(author), '').replace('<', '')\

rhodecode/model/validation_schema/schemas/search_schema.py

0 +3 0

                     colander.String(),
                     missing='newfirst',
                     validator=colander.OneOf(['oldfirst', 'newfirst']))
+                search_max_lines = colander.SchemaNode(
+                    colander.Integer(),
+                    missing=10)
                 page_limit = colander.SchemaNode(
                     colander.Integer(),
                     missing=10,

rhodecode/public/css/code-block.less

0 +2 0

             .code-highlight, /* TODO: dan: merge codehilite into code-highlight */
             /* This can be generated with `pygmentize -S default -f html` */
             .codehilite {
+                .c-ElasticMatch { background-color: #faffa6; padding: 0.2em;}
                 .hll { background-color: #ffffcc }
                 .c { color: #408080; font-style: italic } /* Comment */
                 .err, .codehilite .err { border: none } /* Error */
                 .vi { color: #19177C } /* Name.Variable.Instance */
                 .vm { color: #19177C } /* Name.Variable.Magic */
                 .il { color: #666666 } /* Literal.Number.Integer.Long */
             }
             /* customized pre blocks for markdown/rst */

rhodecode/public/css/type.less

0 0 -1

             mark,
             .mark {
-              background-color: @rclightblue;
               padding: .2em;
             }

rhodecode/templates/admin/settings/settings_search.mako

0 +7 -2

                 <div class="panel-body">
                     <dl class="dl-horizontal">
                       % for stat in c.statistics:
-                      <dt>${stat['key']}</dt>
+                          % if stat.get('sep'):
-                      <dd>${stat['value']}</dd>
+                              <dt></dt>
+                              <dd>--</dd>
+                          % else:
+                              <dt>${stat['key']}</dt>
+                              <dd>${stat['value']}</dd>
+                          % endif
                       % endfor
                     </dl>
                 </div>

rhodecode/templates/base/root.mako

0 +9 0

             if hasattr(c, 'rhodecode_db_repo'):
                 c.template_context['repo_type'] = c.rhodecode_db_repo.repo_type
                 c.template_context['repo_landing_commit'] = c.rhodecode_db_repo.landing_rev[1]
+                ## check repo context
+                c.template_context['repo_view_type'] = h.get_repo_view_type(request)
             if getattr(c, 'repo_group', None):
                 c.template_context['repo_group_id'] = c.repo_group.group_id
+                c.template_context['repo_group_name'] = c.repo_group.group_name
             if getattr(c, 'rhodecode_user', None) and c.rhodecode_user.user_id:
                 c.template_context['rhodecode_user']['username'] = c.rhodecode_user.username
                 'username': h.DEFAULT_USER,
                 'user_id': 1
             }
+            c.template_context['search_context'] = {
+                'repo_group_id': c.template_context.get('repo_group_id'),
+                'repo_group_name': c.template_context.get('repo_group_name'),
+                'repo_name': c.template_context.get('repo_name'),
+                'repo_view_type': c.template_context.get('repo_view_type'),
+            }
             %>
             <html xmlns="http://www.w3.org/1999/xhtml">

rhodecode/templates/search/search.mako

0 +52 -7

@@ -18,10 +18,7 b''
18	%else:	18	%else:
19	${_('Search inside all accessible repositories')}	19	${_('Search inside all accessible repositories')}
20	%endif	20	%endif
21	%if c.cur_query:	21
22	»
23	${c.cur_query}
24	%endif
25	</%def>	22	</%def>
26		23
27	<%def name="menu_bar_nav()">	24	<%def name="menu_bar_nav()">
@@ -59,7 +56,8 b''
59	<div class="fields">	56	<div class="fields">
60	${h.text('q', c.cur_query, placeholder="Enter query...")}	57	${h.text('q', c.cur_query, placeholder="Enter query...")}
61		58
62	${h.select('type',c.search_type,[('content',_('File ~~contents')),~~ (~~'commit'~~,_(~~'Commit message~~s')), ('path',_('File ~~names~~')),],id='id_search_type')}	59	${h.select('type',c.search_type,[('content',_('Files')), ('path',_('File path')),('commit',_('Commits'))],id='id_search_type')}
		60	${h.hidden('max_lines', '10')}
63	<input type="submit" value="${_('Search')}" class="btn"/>	61	<input type="submit" value="${_('Search')}" class="btn"/>
64	<br/>	62	<br/>
65		63
@@ -72,8 +70,54 b''
72	</span>	70	</span>
73	% endfor	71	% endfor
74	<div class="field">	72	<div class="field">
75	<p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('~~Example Queri~~es')}</p>	73	<p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Query Langague examples')}</p>
76	<pre id="search-help" style="display: none">${h.tooltip(h.search_filter_help(c.searcher, request))}</pre>	74	<pre id="search-help" style="display: none">\
		75
		76	% if c.searcher.name == 'whoosh':
		77	Example filter terms for `Whoosh` search:
		78	query lang: <a href="${c.searcher.query_lang_doc}">Whoosh Query Language</a>
		79	Whoosh has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
		80
		81	Generate wildcards using '*' character:
		82	"repo_name:vcs*" - search everything starting with 'vcs'
		83	"repo_name:vcs" - search for repository containing 'vcs'
		84
		85	Optional AND / OR operators in queries
		86	"repo_name:vcs OR repo_name:test"
		87	"owner:test AND repo_name:test*" AND extension:py
		88
		89	Move advanced search is available via ElasticSearch6 backend in EE edition.
		90	% elif c.searcher.name == 'elasticsearch' and c.searcher.es_version == '2':
		91	Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
		92	ElasticSearch-2 has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
		93
		94	search type: content (File Content)
		95	indexed fields: content
		96
		97	# search for `fix` string in all files
		98	fix
		99
		100	search type: commit (Commit message)
		101	indexed fields: message
		102
		103	search type: path (File name)
		104	indexed fields: path
		105
		106	% else:
		107	Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
		108	query lang: <a href="${c.searcher.query_lang_doc}">ES 6 Query Language</a>
		109	The reserved characters needed espace by `\`: + - = && \|\| > < ! ( ) { } [ ] ^ " ~ * ? : \ /
		110	% for handler in c.searcher.get_handlers().values():
		111
		112	search type: ${handler.search_type_label}
		113	indexed fields: ${', '.join( [('\n ' if x[0]%4==0 else '')+x[1] for x in enumerate(handler.es_6_field_names)])}
		114	% for entry in handler.es_6_example_queries:
		115	${entry.rstrip()}
		116	% endfor
		117	% endfor
		118
		119	% endif
		120	</pre>
77	</div>	121	</div>
78		122
79	<div class="field">${c.runtime}</div>	123	<div class="field">${c.runtime}</div>
@@ -96,6 +140,7 b''
96	</div>	140	</div>
97	<script>	141	<script>
98	$(document).ready(function(){	142	$(document).ready(function(){
		143	$('#q').autoGrowInput();
99	$("#id_search_type").select2({	144	$("#id_search_type").select2({
100	'containerCssClass': "drop-menu",	145	'containerCssClass': "drop-menu",
101	'dropdownCssClass': "drop-menu-dropdown",	146	'dropdownCssClass': "drop-menu-dropdown",

rhodecode/templates/search/search_commit.mako

0 +22 -2

             <%namespace name="base" file="/base/base.mako"/>
+            % if c.formatted_results:
             <table class="rctable search-results">
                 <tr>
                     <th>${_('Repository')}</th>
                             </td>
                             <td class="td-user author">
-                                ${base.gravatar_with_user(entry['author'])}
+                                <%
+                                ## es6 stores this as object
+                                author = entry['author']
+                                if isinstance(author, dict):
+                                    author = author['email']
+                                %>
+                                ${base.gravatar_with_user(author)}
                             </td>
                         </tr>
                     % endif
                 %endfor
             </table>
-            %if c.cur_query and c.formatted_results:
+            %if c.cur_query:
             <div class="pagination-wh pagination-left">
                 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
             </div>
                     target_expand.addClass('open');
                   }
                 });
+                $(".message.td-description").mark(
+                    "${c.searcher.query_to_mark(c.cur_query, 'message')}",
+                    {
+                        "className": 'match',
+                        "accuracy": "complementary",
+                        "ignorePunctuation": ":._(){}[]!'+=".split("")
+                    }
+                );
             </script>
+            % endif

rhodecode/templates/search/search_content.mako

0 +100 -49

@@ -1,33 +1,10 b''
1	<%def name="highlight_text_file(terms, text, url, line_context=3,
2	max_lines=10,
3	mimetype=None, filepath=None)">
4	<%
5	lines = text.split('\n')
6	lines_of_interest = set()
7	matching_lines = h.get_matching_line_offsets(lines, terms)
8	shown_matching_lines = 0
9		1
10	for line_number in matching_lines:	2	<%def name="highlight_text_file(has_matched_content, file_content, lexer, html_formatter, matching_lines, shown_matching_lines, url, use_hl_filter)">
11	if len(lines_of_interest) < max_lines:	3	% if has_matched_content:
12	lines_of_interest \|= set(range(	4	${h.code_highlight(file_content, lexer, html_formatter, use_hl_filter=use_hl_filter)\|n}
13	max(line_number - line_context, 0),	5	% else:
14	min(line_number + line_context, len(lines) + 1)))	6	${_('No content matched')} <br/>
15	shown_matching_lines += 1	7	% endif
16
17	%>
18	${h.code_highlight(
19	text,
20	h.get_lexer_safe(
21	mimetype=mimetype,
22	filepath=filepath,
23	),
24	h.SearchContentCodeHtmlFormatter(
25	linenos=True,
26	cssclass="code-highlight",
27	url=url,
28	query_terms=terms,
29	only_line_numbers=lines_of_interest
30	))\|n}
31		8
32	%if len(matching_lines) > shown_matching_lines:	9	%if len(matching_lines) > shown_matching_lines:
33	<a href="${url}">	10	<a href="${url}">
@@ -37,12 +14,52 b' for line_number in matching_lines:'
37	</%def>	14	</%def>
38		15
39	<div class="search-results">	16	<div class="search-results">
		17	<% query_mark = c.searcher.query_to_mark(c.cur_query, 'content') %>
		18
40	%for entry in c.formatted_results:	19	%for entry in c.formatted_results:
		20
		21	<%
		22	file_content = entry['content_highlight'] or entry['content']
		23	mimetype = entry.get('mimetype')
		24	filepath = entry.get('path')
		25	max_lines = h.safe_int(request.GET.get('max_lines', '10'))
		26	line_context = h.safe_int(request.GET.get('line_contenxt', '3'))
		27
		28	match_file_url=h.route_path('repo_files',repo_name=entry['repository'], commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path'], _query={"mark": query_mark})
		29	terms = c.cur_query
		30
		31	if c.searcher.is_es_6:
		32	# use empty terms so we default to markers usage
		33	total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms=None)
		34	else:
		35	total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms)
		36
		37	shown_matching_lines = 0
		38	lines_of_interest = set()
		39	for line_number in matching_lines:
		40	if len(lines_of_interest) < max_lines:
		41	lines_of_interest \|= set(range(
		42	max(line_number - line_context, 0),
		43	min(line_number + line_context, total_lines + 1)))
		44	shown_matching_lines += 1
		45	lexer = h.get_lexer_safe(mimetype=mimetype, filepath=filepath)
		46
		47	html_formatter = h.SearchContentCodeHtmlFormatter(
		48	linenos=True,
		49	cssclass="code-highlight",
		50	url=match_file_url,
		51	query_terms=terms,
		52	only_line_numbers=lines_of_interest
		53	)
		54
		55	has_matched_content = len(lines_of_interest) >= 1
		56
		57	%>
41	## search results are additionally filtered, and this check is just a safe gate	58	## search results are additionally filtered, and this check is just a safe gate
42	% if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):	59	% if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):
43	<div id="codeblock" class="codeblock">	60	<div id="codeblock" class="codeblock">
44	<div class="codeblock-header">	61	<div class="codeblock-header">
45	<h2>	62	<h1>
46	%if h.get_repo_type_by_name(entry.get('repository')) == 'hg':	63	%if h.get_repo_type_by_name(entry.get('repository')) == 'hg':
47	<i class="icon-hg"></i>	64	<i class="icon-hg"></i>
48	%elif h.get_repo_type_by_name(entry.get('repository')) == 'git':	65	%elif h.get_repo_type_by_name(entry.get('repository')) == 'git':
@@ -51,18 +68,39 b' for line_number in matching_lines:'
51	<i class="icon-svn"></i>	68	<i class="icon-svn"></i>
52	%endif	69	%endif
53	${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}	70	${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}
54	</h2>	71	</h1>
		72
55	<div class="stats">	73	<div class="stats">
56	${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}	74	<span class="stats-filename">
57	%if entry.get('lines'):	75	<strong>
58	\| ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}	76	<i class="icon-file-text"></i>
59	%endif	77	${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}
60	%if entry.get('size'):	78	</strong>
61	\| ${h.format_byte_size_binary(entry['size'])}	79	</span>
62	%endif	80	<span class="item last"><i class="tooltip icon-clipboard clipboard-action" data-clipboard-text="${entry['f_path']}" title="${_('Copy the full path')}"></i></span>
63	%if entry.get('mimetype'):	81	<br/>
64	\| ${entry.get('mimetype', "unknown mimetype")}	82	<span class="stats-first-item">
65	%endif	83	${len(matching_lines)} ${_ungettext('search match', 'search matches', len(matching_lines))}
		84	</span>
		85
		86	<span >
		87	%if entry.get('lines'):
		88	\| ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}
		89	%endif
		90	</span>
		91
		92	<span>
		93	%if entry.get('size'):
		94	\| ${h.format_byte_size_binary(entry['size'])}
		95	%endif
		96	</span>
		97
		98	<span>
		99	%if entry.get('mimetype'):
		100	\| ${entry.get('mimetype', "unknown mimetype")}
		101	%endif
		102	</span>
		103
66	</div>	104	</div>
67	<div class="buttons">	105	<div class="buttons">
68	<a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">	106	<a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">
@@ -74,10 +112,19 b' for line_number in matching_lines:'
74	</div>	112	</div>
75	</div>	113	</div>
76	<div class="code-body search-code-body">	114	<div class="code-body search-code-body">
77	${highlight_text_file(c.cur_query, entry['content'],	115
78	url=h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']),	116	${highlight_text_file(
79	mimetype=entry.get('mimetype'), filepath=entry.get('path'))}	117	has_matched_content=has_matched_content,
		118	file_content=file_content,
		119	lexer=lexer,
		120	html_formatter=html_formatter,
		121	matching_lines=matching_lines,
		122	shown_matching_lines=shown_matching_lines,
		123	url=match_file_url,
		124	use_hl_filter=c.searcher.is_es_6
		125	)}
80	</div>	126	</div>
		127
81	</div>	128	</div>
82	% endif	129	% endif
83	%endfor	130	%endfor
@@ -91,10 +138,14 b' for line_number in matching_lines:'
91	%if c.cur_query:	138	%if c.cur_query:
92	<script type="text/javascript">	139	<script type="text/javascript">
93	$(function(){	140	$(function(){
94	$(".code").mark(	141	$(".search-code-body").mark(
95	'${' '.join(h.normalize_text_for_matching(c.cur_query).split())}',	142	"${query_mark}",
96	{"className": 'match',	143	{
97	});	144	"className": 'match',
		145	"accuracy": "complementary",
		146	"ignorePunctuation": ":._(){}[]!'+=".split("")
		147	}
		148	);
98	})	149	})
99	</script>	150	</script>
100	%endif No newline at end of file	151	%endif

rhodecode/templates/search/search_path.mako

0 +6 -2

+            % if c.formatted_results:
             <table class="rctable search-results">
                 <tr>
                     <th>${_('Repository')}</th>
                 %endfor
             </table>
-            %if c.cur_query and c.formatted_results:
+            %if c.cur_query:
             <div class="pagination-wh pagination-left">
                 ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
             </div>
-            %endif
  No newline at end of file
+            %endif
+            % endif

rhodecode/tests/lib/test_helpers.py

0 0 -41

@@ -208,44 +208,3 b' def test_get_visual_attr(baseapp):'
208	def test_chop_at(test_text, inclusive, expected_text):	208	def test_chop_at(test_text, inclusive, expected_text):
209	assert helpers.chop_at_smart(	209	assert helpers.chop_at_smart(
210	test_text, '\n', inclusive, '...') == expected_text	210	test_text, '\n', inclusive, '...') == expected_text
211
212
213	@pytest.mark.parametrize('test_text, expected_output', [
214	('some text', ['some', 'text']),
215	('some text', ['some', 'text']),
216	('some text "with a phrase"', ['some', 'text', 'with a phrase']),
217	('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
218	('"justphrase"', ['justphrase']),
219	('""', []),
220	('', []),
221	(' ', []),
222	('" "', []),
223	])
224	def test_extract_phrases(test_text, expected_output):
225	assert helpers.extract_phrases(test_text) == expected_output
226
227
228	@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
229	('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
230	('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
231	('irrelevant', ['not found'], []),
232	('irrelevant', ['not found'], []),
233	])
234	def test_get_matching_offsets(test_text, text_phrases, expected_output):
235	assert helpers.get_matching_offsets(
236	test_text, text_phrases) == expected_output
237
238
239	def test_normalize_text_for_matching():
240	assert helpers.normalize_text_for_matching(
241	'OJjfe)#$@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
242
243
244	def test_get_matching_line_offsets():
245	assert helpers.get_matching_line_offsets([
246	'words words words',
247	'words words words',
248	'some text some',
249	'words words words',
250	'words words words',
251	'text here what'], 'text') == {3: [(5, 9)], 6: [(0, 4)]}

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages