rhodecode-enterprise-ce Commit - r3319:b8fd1d7a

search: add support for elastic search 6...

dan -

r3319:b8fd1d7a default

parent child

rhodecode/lib/index/search_utils.py

0 created 644 +257 0

			@@ -0,0 +1,257 b''
		1	# -- coding: utf-8 --
		2
		3	# Copyright (C) 2012-2018 RhodeCode GmbH
		4	#
		5	# This program is free software: you can redistribute it and/or modify
		6	# it under the terms of the GNU Affero General Public License, version 3
		7	# (only), as published by the Free Software Foundation.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU Affero General Public License
		15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		16	#
		17	# This program is dual-licensed. If you wish to learn more about the
		18	# RhodeCode Enterprise Edition, including its added features, Support services,
		19	# and proprietary license terms, please see https://rhodecode.com/licenses/
		20	import re
		21
		22	import pygments.filter
		23	import pygments.filters
		24	from pygments.token import Comment
		25
		26	HL_BEG_MARKER = '__RCSearchHLMarkBEG__'
		27	HL_END_MARKER = '__RCSearchHLMarkEND__'
		28	HL_MARKER_RE = '{}(.*?){}'.format(HL_BEG_MARKER, HL_END_MARKER)
		29
		30
		31	class ElasticSearchHLFilter(pygments.filters.Filter):
		32	_names = [HL_BEG_MARKER, HL_END_MARKER]
		33
		34	def __init__(self, **options):
		35	pygments.filters.Filter.__init__(self, **options)
		36
		37	def filter(self, lexer, stream):
		38	def tokenize(_value):
		39	for token in re.split('({}\|{})'.format(
		40	self._names[0], self._names[1]), _value):
		41	if token:
		42	yield token
		43
		44	hl = False
		45	for ttype, value in stream:
		46
		47	if self._names[0] in value or self._names[1] in value:
		48	for item in tokenize(value):
		49	if item == self._names[0]:
		50	# skip marker, but start HL
		51	hl = True
		52	continue
		53	elif item == self._names[1]:
		54	hl = False
		55	continue
		56
		57	if hl:
		58	yield Comment.ElasticMatch, item
		59	else:
		60	yield ttype, item
		61	else:
		62	if hl:
		63	yield Comment.ElasticMatch, value
		64	else:
		65	yield ttype, value
		66
		67
		68	def extract_phrases(text_query):
		69	"""
		70	Extracts phrases from search term string making sure phrases
		71	contained in double quotes are kept together - and discarding empty values
		72	or fully whitespace values eg.
		73
		74	'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
		75
		76	"""
		77
		78	in_phrase = False
		79	buf = ''
		80	phrases = []
		81	for char in text_query:
		82	if in_phrase:
		83	if char == '"': # end phrase
		84	phrases.append(buf)
		85	buf = ''
		86	in_phrase = False
		87	continue
		88	else:
		89	buf += char
		90	continue
		91	else:
		92	if char == '"': # start phrase
		93	in_phrase = True
		94	phrases.append(buf)
		95	buf = ''
		96	continue
		97	elif char == ' ':
		98	phrases.append(buf)
		99	buf = ''
		100	continue
		101	else:
		102	buf += char
		103
		104	phrases.append(buf)
		105	phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
		106	return phrases
		107
		108
		109	def get_matching_phrase_offsets(text, phrases):
		110	"""
		111	Returns a list of string offsets in `text` that the list of `terms` match
		112
		113	>>> get_matching_phrase_offsets('some text here', ['some', 'here'])
		114	[(0, 4), (10, 14)]
		115
		116	"""
		117	phrases = phrases or []
		118	offsets = []
		119
		120	for phrase in phrases:
		121	for match in re.finditer(phrase, text):
		122	offsets.append((match.start(), match.end()))
		123
		124	return offsets
		125
		126
		127	def get_matching_markers_offsets(text, markers=None):
		128	"""
		129	Returns a list of string offsets in `text` that the are between matching markers
		130
		131	>>> get_matching_markers_offsets('$1some$2 text $1here$2 marked', ['\$1(.*?)\$2'])
		132	[(0, 5), (16, 22)]
		133
		134	"""
		135	markers = markers or [HL_MARKER_RE]
		136	offsets = []
		137
		138	if markers:
		139	for mark in markers:
		140	for match in re.finditer(mark, text):
		141	offsets.append((match.start(), match.end()))
		142
		143	return offsets
		144
		145
		146	def normalize_text_for_matching(x):
		147	"""
		148	Replaces all non alfanum characters to spaces and lower cases the string,
		149	useful for comparing two text strings without punctuation
		150	"""
		151	return re.sub(r'[^\w]', ' ', x.lower())
		152
		153
		154	def get_matching_line_offsets(lines, terms=None, markers=None):
		155	""" Return a set of `lines` indices (starting from 1) matching a
		156	text search query, along with `context` lines above/below matching lines
		157
		158	:param lines: list of strings representing lines
		159	:param terms: search term string to match in lines eg. 'some text'
		160	:param markers: instead of terms, use highlight markers instead that
		161	mark beginning and end for matched item. eg. ['START(.*?)END']
		162
		163	eg.
		164
		165	text = '''
		166	words words words
		167	words words words
		168	some text some
		169	words words words
		170	words words words
		171	text here what
		172	'''
		173	get_matching_line_offsets(text, 'text', context=1)
		174	6, {3: [(5, 9)], 6: [(0, 4)]]
		175
		176	"""
		177	matching_lines = {}
		178	line_index = 0
		179
		180	if terms:
		181	phrases = [normalize_text_for_matching(phrase)
		182	for phrase in extract_phrases(terms)]
		183
		184	for line_index, line in enumerate(lines.splitlines(), start=1):
		185	normalized_line = normalize_text_for_matching(line)
		186	match_offsets = get_matching_phrase_offsets(normalized_line, phrases)
		187	if match_offsets:
		188	matching_lines[line_index] = match_offsets
		189
		190	else:
		191	markers = markers or [HL_MARKER_RE]
		192	for line_index, line in enumerate(lines.splitlines(), start=1):
		193	match_offsets = get_matching_markers_offsets(line, markers=markers)
		194	if match_offsets:
		195	matching_lines[line_index] = match_offsets
		196
		197	return line_index, matching_lines
		198
		199
		200	def lucene_query_parser():
		201	# from pyparsing lucene_grammar
		202	from pyparsing import (
		203	Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress,
		204	Optional, Group, infixNotation, opAssoc, ParserElement, pyparsing_common)
		205
		206	ParserElement.enablePackrat()
		207
		208	COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^")
		209	LPAR, RPAR = map(Suppress, "()")
		210	and_, or_, not_, to_ = map(CaselessKeyword, "AND OR NOT TO".split())
		211	keyword = and_ \| or_ \| not_ \| to_
		212
		213	expression = Forward()
		214
		215	valid_word = Regex(r'([a-zA-Z0-9_+.-]\|\\[!(){}\[\]^"~?\\:])+').setName("word")
		216	valid_word.setParseAction(
		217	lambda t: t[0]
		218	.replace('\\\\', chr(127))
		219	.replace('\\', '')
		220	.replace(chr(127), '\\')
		221	)
		222
		223	string = QuotedString('"')
		224
		225	required_modifier = Literal("+")("required")
		226	prohibit_modifier = Literal("-")("prohibit")
		227	integer = Regex(r"\d+").setParseAction(lambda t: int(t[0]))
		228	proximity_modifier = Group(TILDE + integer("proximity"))
		229	number = pyparsing_common.fnumber()
		230	fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")
		231
		232	term = Forward()
		233	field_name = valid_word().setName("fieldname")
		234	incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
		235	excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
		236	range_search = incl_range_search("incl_range") \| excl_range_search("excl_range")
		237	boost = (CARAT + number("boost"))
		238
		239	string_expr = Group(string + proximity_modifier) \| string
		240	word_expr = Group(valid_word + fuzzy_modifier) \| valid_word
		241	term << (Optional(field_name("field") + COLON) +
		242	(word_expr \| string_expr \| range_search \| Group(
		243	LPAR + expression + RPAR)) +
		244	Optional(boost))
		245	term.setParseAction(lambda t: [t] if 'field' in t or 'boost' in t else None)
		246
		247	expression << infixNotation(
		248	term,
		249	[
		250	(required_modifier \| prohibit_modifier, 1, opAssoc.RIGHT),
		251	((not_ \| '!').setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT),
		252	((and_ \| '&&').setParseAction(lambda: "AND"), 2, opAssoc.LEFT),
		253	(Optional(or_ \| '\|\|').setParseAction(lambda: "OR"), 2, opAssoc.LEFT),
		254	]
		255	)
		256
		257	return expression

rhodecode/tests/lib/test_search_utils.py

0 created 644 +100 0

			@@ -0,0 +1,100 b''
		1	# -- coding: utf-8 --
		2
		3	# Copyright (C) 2010-2018 RhodeCode GmbH
		4	#
		5	# This program is free software: you can redistribute it and/or modify
		6	# it under the terms of the GNU Affero General Public License, version 3
		7	# (only), as published by the Free Software Foundation.
		8	#
		9	# This program is distributed in the hope that it will be useful,
		10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		12	# GNU General Public License for more details.
		13	#
		14	# You should have received a copy of the GNU Affero General Public License
		15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		16	#
		17	# This program is dual-licensed. If you wish to learn more about the
		18	# RhodeCode Enterprise Edition, including its added features, Support services,
		19	# and proprietary license terms, please see https://rhodecode.com/licenses/
		20
		21	import copy
		22	import mock
		23	import pytest
		24
		25	from rhodecode.lib.index import search_utils
		26
		27
		28	@pytest.mark.parametrize('test_text, expected_output', [
		29	('some text', ['some', 'text']),
		30	('some text', ['some', 'text']),
		31	('some text "with a phrase"', ['some', 'text', 'with a phrase']),
		32	('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
		33	('"justphrase"', ['justphrase']),
		34	('""', []),
		35	('', []),
		36	(' ', []),
		37	('" "', []),
		38	])
		39	def test_extract_phrases(test_text, expected_output):
		40	assert search_utils.extract_phrases(test_text) == expected_output
		41
		42
		43	@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
		44	('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
		45	('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
		46	('irrelevant', ['not found'], []),
		47	('irrelevant', ['not found'], []),
		48	])
		49	def test_get_matching_phrase_offsets(test_text, text_phrases, expected_output):
		50	assert search_utils.get_matching_phrase_offsets(
		51	test_text, text_phrases) == expected_output
		52
		53
		54	@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
		55	('__RCSearchHLMarkBEG__some__RCSearchHLMarkEND__ text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(0, 46), (52, 98)]),
		56	('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ there', [], [(0, 46), (47, 93)]),
		57	('some text __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__', [], [(10, 56)]),
		58	('__RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__here__RCSearchHLMarkEND__ __RCSearchHLMarkBEG__there__RCSearchHLMarkEND__', [], [(0, 46), (47, 93), (94, 141)]),
		59	('irrelevant', ['not found'], []),
		60	('irrelevant', ['not found'], []),
		61	])
		62	def test_get_matching_marker_offsets(test_text, text_phrases, expected_output):
		63
		64	assert search_utils.get_matching_markers_offsets(test_text) == expected_output
		65
		66
		67	def test_normalize_text_for_matching():
		68	assert search_utils.normalize_text_for_matching(
		69	'OJjfe)#$@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
		70
		71
		72	def test_get_matching_line_offsets():
		73	words = '\n'.join([
		74	'words words words',
		75	'words words words',
		76	'some text some',
		77	'words words words',
		78	'words words words',
		79	'text here what'
		80	])
		81	total_lines, matched_offsets = \
		82	search_utils.get_matching_line_offsets(words, terms='text')
		83	assert total_lines == 6
		84	assert matched_offsets == {3: [(5, 9)], 6: [(0, 4)]}
		85
		86
		87	def test_get_matching_line_offsets_using_markers():
		88	words = '\n'.join([
		89	'words words words',
		90	'words words words',
		91	'some __1__text__2__ some',
		92	'words words words',
		93	'words words words',
		94	'__1__text__2__ here what'
		95	])
		96	total_lines, matched_offsets = \
		97	search_utils.get_matching_line_offsets(words, terms=None,
		98	markers=['__1__(.*?)__2__'])
		99	assert total_lines == 6
		100	assert matched_offsets == {3: [(5, 19)], 6: [(0, 14)]}

pkgs/python-packages.nix

0 +71 -24

                  };
                };
                "elasticsearch" = super.buildPythonPackage {
-                 name = "elasticsearch-2.3.0";
+                 name = "elasticsearch-6.3.1";
                  doCheck = false;
                  propagatedBuildInputs = [
                    self."urllib3"
                  ];
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/10/35/5fd52c5f0b0ee405ed4b5195e8bce44c5e041787680dc7b94b8071cac600/elasticsearch-2.3.0.tar.gz";
-                   sha256 = "10ad2dk73xsys9vajwsncibs69asa63w1hgwz6lz1prjpyi80c5y";
+                   url = "https://files.pythonhosted.org/packages/9d/ce/c4664e8380e379a9402ecfbaf158e56396da90d520daba21cfa840e0eb71/elasticsearch-6.3.1.tar.gz";
+                   sha256 = "12y93v0yn7a4xmf969239g8gb3l4cdkclfpbk1qc8hx5qkymrnma";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.asl20 ];
                  };
                };
                "elasticsearch-dsl" = super.buildPythonPackage {
-                 name = "elasticsearch-dsl-2.2.0";
+                 name = "elasticsearch-dsl-6.3.1";
                  doCheck = false;
                  propagatedBuildInputs = [
                    self."six"
                    self."python-dateutil"
                    self."elasticsearch"
+                   self."ipaddress"
+                 ];
+                 src = fetchurl {
+                   url = "https://files.pythonhosted.org/packages/4c/0d/1549f50c591db6bb4e66cbcc8d34a6e537c3d89aa426b167c244fd46420a/elasticsearch-dsl-6.3.1.tar.gz";
+                   sha256 = "1gh8a0shqi105k325hgwb9avrpdjh0mc6mxwfg9ba7g6lssb702z";
+                 };
+                 meta = {
+                   license = [ pkgs.lib.licenses.asl20 ];
+                 };
+               };
+               "elasticsearch1" = super.buildPythonPackage {
+                 name = "elasticsearch1-1.10.0";
+                 doCheck = false;
+                 propagatedBuildInputs = [
+                   self."urllib3"
                  ];
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/66/2f/52a086968788e58461641570f45c3207a52d46ebbe9b77dc22b6a8ffda66/elasticsearch-dsl-2.2.0.tar.gz";
-                   sha256 = "1g4kxzxsdwlsl2a9kscmx11pafgimhj7y8wrfksv8pgvpkfb9fwr";
+                   url = "https://files.pythonhosted.org/packages/a6/eb/73e75f9681fa71e3157b8ee878534235d57f24ee64f0e77f8d995fb57076/elasticsearch1-1.10.0.tar.gz";
+                   sha256 = "0g89444kd5zwql4vbvyrmi2m6l6dcj6ga98j4hqxyyyz6z20aki2";
+                 };
+                 meta = {
+                   license = [ pkgs.lib.licenses.asl20 ];
+                 };
+               };
+               "elasticsearch1-dsl" = super.buildPythonPackage {
+                 name = "elasticsearch1-dsl-0.0.12";
+                 doCheck = false;
+                 propagatedBuildInputs = [
+                   self."six"
+                   self."python-dateutil"
+                   self."elasticsearch1"
+                 ];
+                 src = fetchurl {
+                   url = "https://files.pythonhosted.org/packages/eb/9d/785342775cb10eddc9b8d7457d618a423b4f0b89d8b2b2d1bc27190d71db/elasticsearch1-dsl-0.0.12.tar.gz";
+                   sha256 = "0ig1ly39v93hba0z975wnhbmzwj28w6w1sqlr2g7cn5spp732bhk";
+                 };
+                 meta = {
+                   license = [ pkgs.lib.licenses.asl20 ];
+                 };
+               };
+               "elasticsearch2" = super.buildPythonPackage {
+                 name = "elasticsearch2-2.5.0";
+                 doCheck = false;
+                 propagatedBuildInputs = [
+                   self."urllib3"
+                 ];
+                 src = fetchurl {
+                   url = "https://files.pythonhosted.org/packages/84/77/63cf63d4ba11d913b5278406f2a37b0712bec6fc85edfb6151a33eaeba25/elasticsearch2-2.5.0.tar.gz";
+                   sha256 = "0ky0q16lbvz022yv6q3pix7aamf026p1y994537ccjf0p0dxnbxr";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.asl20 ];
                  };
                };
                "markupsafe" = super.buildPythonPackage {
-                 name = "markupsafe-1.0";
+                 name = "markupsafe-1.1.0";
                  doCheck = false;
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/4d/de/32d741db316d8fdb7680822dd37001ef7a448255de9699ab4bfcbdf4172b/MarkupSafe-1.0.tar.gz";
-                   sha256 = "0rdn1s8x9ni7ss8rfiacj7x1085lx8mh2zdwqslnw8xc3l4nkgm6";
+                   url = "https://files.pythonhosted.org/packages/ac/7e/1b4c2e05809a4414ebce0892fe1e32c14ace86ca7d50c70f00979ca9b3a3/MarkupSafe-1.1.0.tar.gz";
+                   sha256 = "1lxirjypbdd3l9jl4vliilhfnhy7c7f2vlldqg1b0i74khn375sf";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.bsdOriginal ];
                  };
                };
                "pyparsing" = super.buildPythonPackage {
-                 name = "pyparsing-1.5.7";
+                 name = "pyparsing-2.3.0";
                  doCheck = false;
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/6f/2c/47457771c02a8ff0f302b695e094ec309e30452232bd79198ee94fda689f/pyparsing-1.5.7.tar.gz";
-                   sha256 = "17z7ws076z977sclj628fvwrp8y9j2rvdjcsq42v129n1gwi8vk4";
+                   url = "https://files.pythonhosted.org/packages/d0/09/3e6a5eeb6e04467b737d55f8bba15247ac0876f98fae659e58cd744430c6/pyparsing-2.3.0.tar.gz";
+                   sha256 = "14k5v7n3xqw8kzf42x06bzp184spnlkya2dpjyflax6l3yrallzk";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.mit ];
                  };
                };
                "rhodecode-enterprise-ce" = super.buildPythonPackage {
-                 name = "rhodecode-enterprise-ce-4.15.0";
+                 name = "rhodecode-enterprise-ce-4.16.0";
                  buildInputs = [
                    self."pytest"
                    self."py"
                  };
                };
                "rhodecode-tools" = super.buildPythonPackage {
-                 name = "rhodecode-tools-1.0.1";
+                 name = "rhodecode-tools-1.1.0";
                  doCheck = false;
                  propagatedBuildInputs = [
                    self."click"
                    self."mako"
                    self."markupsafe"
                    self."requests"
-                   self."elasticsearch"
-                   self."elasticsearch-dsl"
                    self."urllib3"
                    self."whoosh"
+                   self."elasticsearch"
+                   self."elasticsearch-dsl"
+                   self."elasticsearch2"
+                   self."elasticsearch1-dsl"
                  ];
                  src = fetchurl {
-                   url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b";
-                   sha256 = "0nr300s4sg685qs4wgbwlplwriawrwi6jq79z37frcnpyc89gpvm";
+                   url = "https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626";
+                   sha256 = "1wbnnfrzyp0d4ys55vj5vnfrzfhwlqgdhc8yv8i6kwinizf8hfrn";
                  };
                  meta = {
                    license = [ { fullName = "Apache 2.0 and Proprietary"; } ];
                  };
                };
                "setuptools" = super.buildPythonPackage {
-                 name = "setuptools-40.6.2";
+                 name = "setuptools-40.6.3";
                  doCheck = false;
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/b0/d1/8acb42f391cba52e35b131e442e80deffbb8d0676b93261d761b1f0ef8fb/setuptools-40.6.2.zip";
-                   sha256 = "0r2c5hapirlzm34h7pl1lgkm6gk7bcrlrdj28qgsvaqg3f74vfw6";
+                   url = "https://files.pythonhosted.org/packages/37/1b/b25507861991beeade31473868463dad0e58b1978c209de27384ae541b0b/setuptools-40.6.3.zip";
+                   sha256 = "1y085dnk574sxw9aymdng9gijvrsbw86hsv9hqnhv7y4d6nlsirv";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.mit ];
                  };
                };
                "urllib3" = super.buildPythonPackage {
-                 name = "urllib3-1.21";
+                 name = "urllib3-1.24.1";
                  doCheck = false;
                  src = fetchurl {
-                   url = "https://files.pythonhosted.org/packages/34/95/7b28259d0006ed681c424cd71a668363265eac92b67dddd018eb9a22bff8/urllib3-1.21.tar.gz";
-                   sha256 = "0irnj4wvh2y36s4q3l2vas9qr9m766w6w418nb490j3mf8a8zw6h";
+                   url = "https://files.pythonhosted.org/packages/b1/53/37d82ab391393565f2f831b8eedbffd57db5a718216f82f1a8b4d381a1c1/urllib3-1.24.1.tar.gz";
+                   sha256 = "08lwd9f3hqznyf32vnzwvp87pchx062nkbgyrf67rwlkgj0jk5fy";
                  };
                  meta = {
                    license = [ pkgs.lib.licenses.mit ];

requirements.txt

0 +4 -4

              lxml==4.2.5
              mako==1.0.7
              markdown==2.6.11
-             markupsafe==1.0.0
+             markupsafe==1.1.0
              msgpack-python==0.5.6
              pyotp==2.2.7
              packaging==15.2
              pycurl==7.43.0.2
              pyflakes==0.8.1
              pygments==2.3.0
-             pyparsing==1.5.7
+             pyparsing==2.3.0
              pyramid-beaker==0.8
              pyramid-debugtoolbar==4.4.0
              pyramid-jinja2==2.7
              supervisor==3.3.4
              tempita==0.5.2
              translationstring==1.3
-             urllib3==1.21
+             urllib3==1.24.1
              urlobject==2.4.3
              venusian==1.1.0
              weberror==0.10.3
              ipython==5.1.0
              ## rhodecode-tools, special case
-             https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.0.1.tar.gz?md5=ffb5d6bcb855305b93cfe23ad42e500b#egg=rhodecode-tools==1.0.1
+             https://code.rhodecode.com/rhodecode-tools-ce/archive/v1.1.0.tar.gz?md5=cc320c277cb2add546220290ac9be626#egg=rhodecode-tools==1.1.0
              ## appenlight
              appenlight-client==0.6.26

rhodecode/apps/admin/views/settings.py

0 +2 -2

                      c = self.load_default_context()
                      c.active = 'search'
-                     searcher = searcher_from_config(self.request.registry.settings)
-                     c.statistics = searcher.statistics(self.request.translate)
+                     c.searcher = searcher_from_config(self.request.registry.settings)
+                     c.statistics = c.searcher.statistics(self.request.translate)
                      return self._get_template_context(c)

rhodecode/apps/home/views.py

0 +89 -17

                          }
                          for obj in acl_iter]
-                 def _get_hash_commit_list(self, auth_user, query):
+                 def _get_hash_commit_list(self, auth_user, searcher, query):
                      org_query = query
-                     if not query or len(query) < 3:
+                     if not query or len(query) < 3 or not searcher:
                          return []
                      commit_hashes = re.compile('(?:commit:)([0-9a-f]{2,40})').findall(query)
                          return []
                      commit_hash = commit_hashes[0]
-                     searcher = searcher_from_config(self.request.registry.settings)
                      result = searcher.search(
-                         'commit_id:%s*' % commit_hash, 'commit', auth_user,
+                         'commit_id:{}*'.format(commit_hash), 'commit', auth_user,
                          raise_on_exc=False)
                      return [
                      }
                      return data
+                 def _get_default_search_queries(self, search_context, searcher, query):
+                     if not searcher:
+                         return []
+                     is_es_6 = searcher.is_es_6
+                     queries = []
+                     repo_group_name, repo_name, repo_context = None, None, None
+                     # repo group context
+                     if search_context.get('search_context[repo_group_name]'):
+                         repo_group_name = search_context.get('search_context[repo_group_name]')
+                     if search_context.get('search_context[repo_name]'):
+                         repo_name = search_context.get('search_context[repo_name]')
+                         repo_context = search_context.get('search_context[repo_view_type]')
+                     if is_es_6 and repo_name:
+                         def query_modifier():
+                             qry = '{} repo_name.raw:{} '.format(
+                                 query, searcher.escape_specials(repo_name))
+                             return {'q': qry, 'type': 'content'}
+                         label = u'Search for `{}` through files in this repository.'.format(query)
+                         queries.append(
+                             {
+                                 'id': -10,
+                                 'value': query,
+                                 'value_display': label,
+                                 'type': 'search',
+                                 'url': h.route_path(
+                                     'search_repo', repo_name=repo_name, _query=query_modifier())
+                             }
+                         )
+                         def query_modifier():
+                             qry = '{} repo_name.raw:{} '.format(
+                                 query, searcher.escape_specials(repo_name))
+                             return {'q': qry, 'type': 'commit'}
+                         label = u'Search for `{}` through commits in this repository.'.format(query)
+                         queries.append(
+                             {
+                                 'id': -10,
+                                 'value': query,
+                                 'value_display': label,
+                                 'type': 'search',
+                                 'url': h.route_path(
+                                     'search_repo', repo_name=repo_name, _query=query_modifier())
+                             }
+                         )
+                     elif is_es_6 and repo_group_name:
+                         def query_modifier():
+                             qry = '{} repo_name.raw:{} '.format(
+                                 query, searcher.escape_specials(repo_group_name + '/*'))
+                             return {'q': qry, 'type': 'content'}
+                         label = u'Search for `{}` through files in this repository group'.format(query)
+                         queries.append(
+                             {
+                                 'id': -20,
+                                 'value': query,
+                                 'value_display': label,
+                                 'type': 'search',
+                                 'url': h.route_path('search', _query=query_modifier())
+                             }
+                         )
+                     if not queries:
+                         queries.append(
+                             {
+                                 'id': -1,
+                                 'value': query,
+                                 'value_display': u'Search for: `{}`'.format(query),
+                                 'type': 'search',
+                                 'url': h.route_path('search',
+                                                     _query={'q': query, 'type': 'content'})
+                             }
+                         )
+                     return queries
                  @LoginRequired()
                  @view_config(
                      route_name='goto_switcher_data', request_method='GET',
                      query = self.request.GET.get('query')
                      log.debug('generating main filter data, query %s', query)
-                     default_search_val = u'Full text search for: `{}`'.format(query)
                      res = []
                      if not query:
                          return {'suggestions': res}
-                     res.append({
-                         'id': -1,
-                         'value': query,
-                         'value_display': default_search_val,
-                         'type': 'search',
-                         'url': h.route_path(
-                             'search', _query={'q': query})
-                     })
-                     repo_group_id = safe_int(self.request.GET.get('repo_group_id'))
+                     searcher = searcher_from_config(self.request.registry.settings)
+                     for _q in self._get_default_search_queries(self.request.GET, searcher, query):
+                         res.append(_q)
+                     repo_group_id = safe_int(self.request.GET.get('search_context[repo_group_id]'))
                      if repo_group_id:
                          repo_group = RepoGroup.get(repo_group_id)
                          composed_hint = '{}/{}'.format(repo_group.group_name, query)
                          show_hint = not query.startswith(repo_group.group_name)
                          if repo_group and show_hint:
-                             hint = u'Group search: `{}`'.format(composed_hint)
+                             hint = u'Repository search inside: `{}`'.format(composed_hint)
                              res.append({
                                  'id': -1,
                                  'value': composed_hint,
                      for serialized_repo in repos:
                          res.append(serialized_repo)
-                     # TODO(marcink): permissions for that ?
+                     # TODO(marcink): should all logged in users be allowed to search others?
                      allowed_user_search = self._rhodecode_user.username != User.DEFAULT_USER
                      if allowed_user_search:
                          users = self._get_user_list(query)
                          for serialized_user_group in user_groups:
                              res.append(serialized_user_group)
-                     commits = self._get_hash_commit_list(c.auth_user, query)
+                     commits = self._get_hash_commit_list(c.auth_user, searcher, query)
                      if commits:
                          unique_repos = collections.OrderedDict()
                          for commit in commits:

rhodecode/apps/search/views.py

0 +7 -3

                  errors = []
                  try:
                      search_params = schema.deserialize(
-                         dict(search_query=request.GET.get('q'),
+                         dict(
+                             search_query=request.GET.get('q'),
-                              search_type=request.GET.get('type'),
-                              search_sort=request.GET.get('sort'),
+                             search_max_lines=request.GET.get('max_lines'),
-                              page_limit=request.GET.get('page_limit'),
-                              requested_page=request.GET.get('page'))
+                             requested_page=request.GET.get('page'),
+                          )
                      )
                  except validation_schema.Invalid as e:
                      errors = e.children
                  def url_generator(**kw):
                      q = urllib.quote(safe_str(search_query))
                      return update_params(
-                         "?q=%s&type=%s" % (q, safe_str(search_type)), **kw)
+                         "?q=%s&type=%s&max_lines=%s" % (q, safe_str(search_type), search_max_lines), **kw)
                  c = tmpl_context
                  search_query = search_params.get('search_query')
                  search_type = search_params.get('search_type')
                  search_sort = search_params.get('search_sort')
+                 search_max_lines = search_params.get('search_max_lines')
                  if search_params.get('search_query'):
                      page_limit = search_params['page_limit']
                      requested_page = search_params['requested_page']

rhodecode/lib/helpers.py

0 +29 -121

              from datetime import datetime
              from functools import partial
              from pygments.formatters.html import HtmlFormatter
-             from pygments import highlight as code_highlight
              from pygments.lexers import (
                  get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype)
              from rhodecode.lib.markup_renderer import MarkupRenderer, relative_links
              from rhodecode.lib.vcs.exceptions import CommitDoesNotExistError
              from rhodecode.lib.vcs.backends.base import BaseChangeset, EmptyCommit
+             from rhodecode.lib.index.search_utils import get_matching_line_offsets
              from rhodecode.config.conf import DATE_FORMAT, DATETIME_FORMAT
              from rhodecode.model.changeset_status import ChangesetStatusModel
              from rhodecode.model.db import Permission, User, Repository
              from rhodecode.model.repo_group import RepoGroupModel
              from rhodecode.model.settings import IssueTrackerSettingsModel
              log = logging.getLogger(__name__)
                  return literal('/'.join(url_segments))
+             def code_highlight(code, lexer, formatter, use_hl_filter=False):
+                 """
+                 Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
+                 If ``outfile`` is given and a valid file object (an object
+                 with a ``write`` method), the result will be written to it, otherwise
+                 it is returned as a string.
+                 """
+                 if use_hl_filter:
+                     # add HL filter
+                     from rhodecode.lib.index import search_utils
+                     lexer.add_filter(search_utils.ElasticSearchHLFilter())
+                 return pygments.format(pygments.lex(code, lexer), formatter)
              class CodeHtmlFormatter(HtmlFormatter):
                  """
                  My code Html Formatter for source codes
                          current_line_number += 1
                      yield 0, '</table>'
-             def extract_phrases(text_query):
-                 """
-                 Extracts phrases from search term string making sure phrases
-                 contained in double quotes are kept together - and discarding empty values
-                 or fully whitespace values eg.
-                 'some   text "a phrase" more' => ['some', 'text', 'a phrase', 'more']
-                 """
-                 in_phrase = False
-                 buf = ''
-                 phrases = []
-                 for char in text_query:
-                     if in_phrase:
-                         if char == '"': # end phrase
-                             phrases.append(buf)
-                             buf = ''
-                             in_phrase = False
-                             continue
-                         else:
-                             buf += char
-                             continue
-                     else:
-                         if char == '"': # start phrase
-                             in_phrase = True
-                             phrases.append(buf)
-                             buf = ''
-                             continue
-                         elif char == ' ':
-                             phrases.append(buf)
-                             buf = ''
-                             continue
-                         else:
-                             buf += char
-                 phrases.append(buf)
-                 phrases = [phrase.strip() for phrase in phrases if phrase.strip()]
-                 return phrases
-             def get_matching_offsets(text, phrases):
-                 """
-                 Returns a list of string offsets in `text` that the list of `terms` match
-                 >>> get_matching_offsets('some text here', ['some', 'here'])
-                 [(0, 4), (10, 14)]
-                 """
-                 offsets = []
-                 for phrase in phrases:
-                     for match in re.finditer(phrase, text):
-                         offsets.append((match.start(), match.end()))
-                 return offsets
-             def normalize_text_for_matching(x):
-                 """
-                 Replaces all non alnum characters to spaces and lower cases the string,
-                 useful for comparing two text strings without punctuation
-                 """
-                 return re.sub(r'[^\w]', ' ', x.lower())
-             def get_matching_line_offsets(lines, terms):
-                 """ Return a set of `lines` indices (starting from 1) matching a
-                 text search query, along with `context` lines above/below matching lines
-                 :param lines: list of strings representing lines
-                 :param terms: search term string to match in lines eg. 'some text'
-                 :param context: number of lines above/below a matching line to add to result
-                 :param max_lines: cut off for lines of interest
-                  eg.
-                 text = '''
-                 words words words
-                 words words words
-                 some text some
-                 words words words
-                 words words words
-                 text here what
-                 '''
-                 get_matching_line_offsets(text, 'text', context=1)
-                 {3: [(5, 9)], 6: [(0, 4)]]
-                 """
-                 matching_lines = {}
-                 phrases = [normalize_text_for_matching(phrase)
-                            for phrase in extract_phrases(terms)]
-                 for line_index, line in enumerate(lines, start=1):
-                     match_offsets = get_matching_offsets(
-                         normalize_text_for_matching(line), phrases)
-                     if match_offsets:
-                         matching_lines[line_index] = match_offsets
-                 return matching_lines
              def hsv_to_rgb(h, s, v):
                  """ Convert hsv color values to rgb """
                  ).format(actions=actions)
-             def search_filter_help(searcher, request):
-                 _ = request.translate
-                 terms = ''
-                 return _(
-                     'Example filter terms for `{searcher}` search:\n' +
-                     '{terms}\n' +
-                     'Generate wildcards using \'*\' character:\n' +
-                     '     "repo_name:vcs*" - search everything starting with \'vcs\'\n' +
-                     '     "repo_name:*vcs*" - search for repository containing \'vcs\'\n' +
-                     '\n' +
-                     'Optional AND / OR operators in queries\n' +
-                     '     "repo_name:vcs OR repo_name:test"\n' +
-                     '     "owner:test AND repo_name:test*"\n' +
-                     'More: {search_doc}'
-                 ).format(searcher=searcher.name,
-                          terms=terms, search_doc=searcher.query_lang_doc)
              def not_mapped_error(repo_name):
                  from rhodecode.translation import _
                  flash(_('%s repository is not mapped to db perhaps'
              def reviewer_as_json(*args, **kwargs):
                  from rhodecode.apps.repository.utils import reviewer_as_json as _reviewer_as_json
                  return _reviewer_as_json(*args, **kwargs)
+             def get_repo_view_type(request):
+                 route_name = request.matched_route.name
+                 route_to_view_type = {
+                     'repo_changelog': 'changelog',
+                     'repo_files': 'files',
+                     'repo_summary': 'summary',
+                     'repo_commit': 'commit'
+                 }
+                 return route_to_view_type.get(route_name)

rhodecode/lib/index/__init__.py

0 +42 -3

              import importlib
              import logging
+             from rhodecode.lib.index.search_utils import normalize_text_for_matching
              log = logging.getLogger(__name__)
              # leave defaults for backward compat
              default_searcher = 'rhodecode.lib.index.whoosh'
              default_location = '%(here)s/data/index'
+             ES_VERSION_2 = '2'
+             ES_VERSION_6 = '6'
+             # for legacy reasons we keep 2 compat as default
+             DEFAULT_ES_VERSION = ES_VERSION_2
-             class BaseSearch(object):
+             from rhodecode_tools.lib.fts_index.elasticsearch_engine_6 import \
+                 ES_CONFIG  # pragma: no cover
+             class BaseSearcher(object):
                  query_lang_doc = ''
+                 es_version = None
+                 name = None
                  def __init__(self):
                      pass
                             raise_on_exc=True):
                      raise Exception('NotImplemented')
+                 @staticmethod
+                 def query_to_mark(query, default_field=None):
+                     """
+                     Formats the query to mark token for jquery.mark.js highlighting. ES could
+                     have a different format optionally.
-             def searcher_from_config(config, prefix='search.'):
+                     :param default_field:
+                     :param query:
+                     """
+                     return ' '.join(normalize_text_for_matching(query).split())
+                 @property
+                 def is_es_6(self):
+                     return self.es_version == ES_VERSION_6
+                 def get_handlers(self):
+                     return {}
+             def search_config(config, prefix='search.'):
                  _config = {}
                  for key in config.keys():
                      if key.startswith(prefix):
                          _config[key[len(prefix):]] = config[key]
+                 return _config
+             def searcher_from_config(config, prefix='search.'):
+                 _config = search_config(config, prefix)
                  if 'location' not in _config:
                      _config['location'] = default_location
+                 if 'es_version' not in _config:
+                     # use old legacy ES version set to 2
+                     _config['es_version'] = '2'
                  imported = importlib.import_module(_config.get('module', default_searcher))
-                 searcher = imported.Search(config=_config)
+                 searcher = imported.Searcher(config=_config)
                  return searcher

rhodecode/lib/index/whoosh.py

0 +17 -11

              from whoosh.qparser import QueryParser, QueryParserError
              import rhodecode.lib.helpers as h
-             from rhodecode.lib.index import BaseSearch
+             from rhodecode.lib.index import BaseSearcher
              from rhodecode.lib.utils2 import safe_unicode
              log = logging.getLogger(__name__)
              log = logging.getLogger(__name__)
-             class Search(BaseSearch):
+             class WhooshSearcher(BaseSearcher):
                  # this also shows in UI
                  query_lang_doc = 'http://whoosh.readthedocs.io/en/latest/querylang.html'
                  name = 'whoosh'
                  def __init__(self, config):
-                     super(Search, self).__init__()
+                     super(Searcher, self).__init__()
                      self.config = config
                      if not os.path.isdir(self.config['location']):
                          os.makedirs(self.config['location'])
                      _ = translator
                      stats = [
                          {'key': _('Index Type'), 'value': 'Whoosh'},
+                         {'sep': True},
                          {'key': _('File Index'), 'value': str(self.file_index)},
-                         {'key': _('Indexed documents'),
-                          'value': self.file_index.doc_count()},
-                         {'key': _('Last update'),
-                          'value': h.time_to_datetime(self.file_index.last_modified())},
+                         {'key': _('Indexed documents'), 'value': self.file_index.doc_count()},
+                         {'key': _('Last update'), 'value': h.time_to_datetime(self.file_index.last_modified())},
+                         {'sep': True},
                          {'key': _('Commit index'), 'value': str(self.commit_index)},
-                         {'key': _('Indexed documents'),
-                          'value': str(self.commit_index.doc_count())},
-                         {'key': _('Last update'),
-                          'value': h.time_to_datetime(self.commit_index.last_modified())}
+                         {'key': _('Indexed documents'), 'value': str(self.commit_index.doc_count())},
+                         {'key': _('Last update'), 'value': h.time_to_datetime(self.commit_index.last_modified())}
                      ]
                      return stats
                      return self.searcher
+             Searcher = WhooshSearcher
              class WhooshResultWrapper(object):
                  def __init__(self, search_type, total_hits, results):
                      self.search_type = search_type
                      # TODO: marcink: this feels like an overkill, there's a lot of data
                      # inside hit object, and we don't need all
                      res = dict(hit)
+                     # elastic search uses that, we set it empty so it fallbacks to regular HL logic
+                     res['content_highlight'] = ''
                      f_path = ''  # pragma: no cover
                      if self.search_type in ['content', 'path']:

rhodecode/lib/utils2.py

0 +11 0

		@@ -1009,3 +1009,14 b' def glob2re(pat):'
1009	1009	else:
1010	1010	res = res + re.escape(c)
1011	1011	return res + '\Z(?ms)'
	1012
	1013
	1014	def parse_byte_string(size_str):
	1015	match = re.match(r'(\d+)(MB\|KB)', size_str, re.IGNORECASE)
	1016	if not match:
	1017	raise ValueError('Given size:%s is invalid, please make sure '
	1018	'to use format of <num>(MB\|KB)' % size_str)
	1019
	1020	_parts = match.groups()
	1021	num, type_ = _parts
	1022	return long(num) * {'mb': 1024*1024, 'kb': 1024}[type_.lower()]

rhodecode/lib/vcs/utils/__init__.py

0 +1 -1

                  to get the username
                  """
-                 if not author or not '@' in author:
+                 if not author or '@' not in author:
                      return author
                  else:
                      return author.replace(author_email(author), '').replace('<', '')\

rhodecode/model/validation_schema/schemas/search_schema.py

0 +3 0

                      colander.String(),
                      missing='newfirst',
                      validator=colander.OneOf(['oldfirst', 'newfirst']))
+                 search_max_lines = colander.SchemaNode(
+                     colander.Integer(),
+                     missing=10)
                  page_limit = colander.SchemaNode(
                      colander.Integer(),
                      missing=10,

rhodecode/public/css/code-block.less

0 +2 0

              .code-highlight, /* TODO: dan: merge codehilite into code-highlight */
              /* This can be generated with `pygmentize -S default -f html` */
              .codehilite {
+                 .c-ElasticMatch { background-color: #faffa6; padding: 0.2em;}
                  .hll { background-color: #ffffcc }
                  .c { color: #408080; font-style: italic } /* Comment */
                  .err, .codehilite .err { border: none } /* Error */
                  .vi { color: #19177C } /* Name.Variable.Instance */
                  .vm { color: #19177C } /* Name.Variable.Magic */
                  .il { color: #666666 } /* Literal.Number.Integer.Long */
              }
              /* customized pre blocks for markdown/rst */

rhodecode/public/css/type.less

0 0 -1

              mark,
              .mark {
-               background-color: @rclightblue;
                padding: .2em;
              }

rhodecode/templates/admin/settings/settings_search.mako

0 +5 0

                  <div class="panel-body">
                      <dl class="dl-horizontal">
                        % for stat in c.statistics:
+                           % if stat.get('sep'):
+                               <dt></dt>
+                               <dd>--</dd>
+                           % else:
                        <dt>${stat['key']}</dt>
                        <dd>${stat['value']}</dd>
+                           % endif
                        % endfor
                      </dl>
                  </div>

rhodecode/templates/base/root.mako

0 +9 0

              if hasattr(c, 'rhodecode_db_repo'):
                  c.template_context['repo_type'] = c.rhodecode_db_repo.repo_type
                  c.template_context['repo_landing_commit'] = c.rhodecode_db_repo.landing_rev[1]
+                 ## check repo context
+                 c.template_context['repo_view_type'] = h.get_repo_view_type(request)
              if getattr(c, 'repo_group', None):
                  c.template_context['repo_group_id'] = c.repo_group.group_id
+                 c.template_context['repo_group_name'] = c.repo_group.group_name
              if getattr(c, 'rhodecode_user', None) and c.rhodecode_user.user_id:
                  c.template_context['rhodecode_user']['username'] = c.rhodecode_user.username
                  'username': h.DEFAULT_USER,
                  'user_id': 1
              }
+             c.template_context['search_context'] = {
+                 'repo_group_id': c.template_context.get('repo_group_id'),
+                 'repo_group_name': c.template_context.get('repo_group_name'),
+                 'repo_name': c.template_context.get('repo_name'),
+                 'repo_view_type': c.template_context.get('repo_view_type'),
+             }
              %>
              <html xmlns="http://www.w3.org/1999/xhtml">

rhodecode/templates/search/search.mako

0 +52 -7

                %else:
                  ${_('Search inside all accessible repositories')}
                %endif
-               %if c.cur_query:
-                 &raquo;
-                 ${c.cur_query}
-               %endif
              </%def>
              <%def name="menu_bar_nav()">
                      <div class="fields">
                          ${h.text('q', c.cur_query, placeholder="Enter query...")}
-                         ${h.select('type',c.search_type,[('content',_('File contents')), ('commit',_('Commit messages')), ('path',_('File names')),],id='id_search_type')}
+                         ${h.select('type',c.search_type,[('content',_('Files')), ('path',_('File path')),('commit',_('Commits'))],id='id_search_type')}
+                         ${h.hidden('max_lines', '10')}
                          <input type="submit" value="${_('Search')}" class="btn"/>
                          <br/>
                            </span>
                          % endfor
                          <div class="field">
-                             <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Example Queries')}</p>
-                             <pre id="search-help" style="display: none">${h.tooltip(h.search_filter_help(c.searcher, request))}</pre>
+                             <p class="filterexample" style="position: inherit" onclick="$('#search-help').toggle()">${_('Query Langague examples')}</p>
+             <pre id="search-help" style="display: none">\
+             % if c.searcher.name == 'whoosh':
+             Example filter terms for `Whoosh` search:
+             query lang: <a href="${c.searcher.query_lang_doc}">Whoosh Query Language</a>
+             Whoosh has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
+             Generate wildcards using '*' character:
+               "repo_name:vcs*" - search everything starting with 'vcs'
+               "repo_name:*vcs*" - search for repository containing 'vcs'
+             Optional AND / OR operators in queries
+               "repo_name:vcs OR repo_name:test"
+               "owner:test AND repo_name:test*" AND extension:py
+             Move advanced search is available via ElasticSearch6 backend in EE edition.
+             % elif c.searcher.name == 'elasticsearch' and c.searcher.es_version == '2':
+             Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
+             ElasticSearch-2 has limited query capabilities. For advanced search use ElasticSearch 6 from RhodeCode EE edition.
+             search type: content (File Content)
+               indexed fields: content
+               # search for `fix` string in all files
+               fix
+             search type: commit (Commit message)
+               indexed fields: message
+             search type: path (File name)
+               indexed fields: path
+             % else:
+             Example filter terms for `ElasticSearch-${c.searcher.es_version}`search:
+             query lang: <a href="${c.searcher.query_lang_doc}">ES 6 Query Language</a>
+             The reserved characters needed espace by `\`: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
+             % for handler in c.searcher.get_handlers().values():
+             search type: ${handler.search_type_label}
+               *indexed fields*: ${', '.join( [('\n    ' if x[0]%4==0 else '')+x[1] for x in enumerate(handler.es_6_field_names)])}
+               % for entry in handler.es_6_example_queries:
+               ${entry.rstrip()}
+               % endfor
+             % endfor
+             % endif
+             </pre>
                          </div>
                          <div class="field">${c.runtime}</div>
              </div>
              <script>
                  $(document).ready(function(){
+                     $('#q').autoGrowInput();
                      $("#id_search_type").select2({
                          'containerCssClass': "drop-menu",
                          'dropdownCssClass': "drop-menu-dropdown",

rhodecode/templates/search/search_commit.mako

0 +22 -2

              <%namespace name="base" file="/base/base.mako"/>
+             % if c.formatted_results:
              <table class="rctable search-results">
                  <tr>
                      <th>${_('Repository')}</th>
                              </td>
                              <td class="td-user author">
-                                 ${base.gravatar_with_user(entry['author'])}
+                                 <%
+                                 ## es6 stores this as object
+                                 author = entry['author']
+                                 if isinstance(author, dict):
+                                     author = author['email']
+                                 %>
+                                 ${base.gravatar_with_user(author)}
                              </td>
                          </tr>
                      % endif
                  %endfor
              </table>
-             %if c.cur_query and c.formatted_results:
+             %if c.cur_query:
              <div class="pagination-wh pagination-left">
                  ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
              </div>
                      target_expand.addClass('open');
                    }
                  });
+                 $(".message.td-description").mark(
+                     "${c.searcher.query_to_mark(c.cur_query, 'message')}",
+                     {
+                         "className": 'match',
+                         "accuracy": "complementary",
+                         "ignorePunctuation": ":._(){}[]!'+=".split("")
+                     }
+                 );
              </script>
+             % endif

rhodecode/templates/search/search_content.mako

0 +90 -39

-             <%def name="highlight_text_file(terms, text, url, line_context=3,
-                                             max_lines=10,
-                                             mimetype=None, filepath=None)">
-             <%
-             lines = text.split('\n')
-             lines_of_interest = set()
-             matching_lines = h.get_matching_line_offsets(lines, terms)
-             shown_matching_lines = 0
-             for line_number in matching_lines:
-                 if len(lines_of_interest) < max_lines:
-                     lines_of_interest |= set(range(
-                         max(line_number - line_context, 0),
-                         min(line_number + line_context, len(lines) + 1)))
-                     shown_matching_lines += 1
-             %>
-             ${h.code_highlight(
-                 text,
-                 h.get_lexer_safe(
-                     mimetype=mimetype,
-                     filepath=filepath,
-                 ),
-                 h.SearchContentCodeHtmlFormatter(
-                     linenos=True,
-                     cssclass="code-highlight",
-                     url=url,
-                     query_terms=terms,
-                     only_line_numbers=lines_of_interest
-             ))|n}
+             <%def name="highlight_text_file(has_matched_content, file_content, lexer, html_formatter, matching_lines, shown_matching_lines, url, use_hl_filter)">
+             % if has_matched_content:
+                 ${h.code_highlight(file_content, lexer, html_formatter, use_hl_filter=use_hl_filter)|n}
+             % else:
+                 ${_('No content matched')} <br/>
+             % endif
              %if len(matching_lines) > shown_matching_lines:
              <a href="${url}">
              </%def>
              <div class="search-results">
+             <% query_mark = c.searcher.query_to_mark(c.cur_query, 'content') %>
              %for entry in c.formatted_results:
+               <%
+                 file_content = entry['content_highlight'] or entry['content']
+                 mimetype = entry.get('mimetype')
+                 filepath = entry.get('path')
+                 max_lines = h.safe_int(request.GET.get('max_lines', '10'))
+                 line_context = h.safe_int(request.GET.get('line_contenxt', '3'))
+                 match_file_url=h.route_path('repo_files',repo_name=entry['repository'], commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path'], _query={"mark": query_mark})
+                 terms = c.cur_query
+                 if c.searcher.is_es_6:
+                     # use empty terms so we default to markers usage
+                     total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms=None)
+                 else:
+                     total_lines, matching_lines = h.get_matching_line_offsets(file_content, terms)
+                 shown_matching_lines = 0
+                 lines_of_interest = set()
+                 for line_number in matching_lines:
+                     if len(lines_of_interest) < max_lines:
+                         lines_of_interest |= set(range(
+                             max(line_number - line_context, 0),
+                             min(line_number + line_context, total_lines + 1)))
+                         shown_matching_lines += 1
+                 lexer = h.get_lexer_safe(mimetype=mimetype, filepath=filepath)
+                 html_formatter = h.SearchContentCodeHtmlFormatter(
+                     linenos=True,
+                     cssclass="code-highlight",
+                     url=match_file_url,
+                     query_terms=terms,
+                     only_line_numbers=lines_of_interest
+                 )
+                 has_matched_content = len(lines_of_interest) >= 1
+               %>
-               ## search results are additionally filtered, and this check is just a safe gate
                % if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(entry['repository'], 'search results content check'):
                    <div id="codeblock" class="codeblock">
                      <div class="codeblock-header">
-                       <h2>
+                       <h1>
                          %if h.get_repo_type_by_name(entry.get('repository')) == 'hg':
                              <i class="icon-hg"></i>
                          %elif h.get_repo_type_by_name(entry.get('repository')) == 'git':
                              <i class="icon-svn"></i>
                          %endif
                          ${h.link_to(entry['repository'], h.route_path('repo_summary',repo_name=entry['repository']))}
-                       </h2>
+                       </h1>
                        <div class="stats">
+                         <span class="stats-filename">
+                             <strong>
+                                 <i class="icon-file-text"></i>
                          ${h.link_to(h.literal(entry['f_path']), h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']))}
+                             </strong>
+                         </span>
+                         <span class="item last"><i class="tooltip icon-clipboard clipboard-action" data-clipboard-text="${entry['f_path']}" title="${_('Copy the full path')}"></i></span>
+                         <br/>
+                         <span class="stats-first-item">
+                             ${len(matching_lines)} ${_ungettext('search match', 'search matches', len(matching_lines))}
+                         </span>
+                         <span >
                          %if entry.get('lines'):
                            | ${entry.get('lines', 0.)} ${_ungettext('line', 'lines', entry.get('lines', 0.))}
                          %endif
+                         </span>
+                         <span>
                          %if entry.get('size'):
                            | ${h.format_byte_size_binary(entry['size'])}
                          %endif
+                         </span>
+                         <span>
                          %if entry.get('mimetype'):
                            | ${entry.get('mimetype', "unknown mimetype")}
                          %endif
+                         </span>
                        </div>
                        <div class="buttons">
                          <a id="file_history_overview_full" href="${h.route_path('repo_changelog_file',repo_name=entry.get('repository',''),commit_id=entry.get('commit_id', 'tip'),f_path=entry.get('f_path',''))}">
                        </div>
                      </div>
                      <div class="code-body search-code-body">
-                         ${highlight_text_file(c.cur_query, entry['content'],
-                         url=h.route_path('repo_files',repo_name=entry['repository'],commit_id=entry.get('commit_id', 'tip'),f_path=entry['f_path']),
-                         mimetype=entry.get('mimetype'), filepath=entry.get('path'))}
+                         ${highlight_text_file(
+                             has_matched_content=has_matched_content,
+                             file_content=file_content,
+                             lexer=lexer,
+                             html_formatter=html_formatter,
+                             matching_lines=matching_lines,
+                             shown_matching_lines=shown_matching_lines,
+                             url=match_file_url,
+                             use_hl_filter=c.searcher.is_es_6
+                         )}
                      </div>
                    </div>
                  % endif
              %endfor
              %if c.cur_query:
              <script type="text/javascript">
              $(function(){
-               $(".code").mark(
-                 '${' '.join(h.normalize_text_for_matching(c.cur_query).split())}',
-                 {"className": 'match',
-               });
+               $(".search-code-body").mark(
+                   "${query_mark}",
+                       {
+                           "className": 'match',
+                           "accuracy": "complementary",
+                           "ignorePunctuation": ":._(){}[]!'+=".split("")
+                       }
+               );
              })
              </script>
-             %endif
  No newline at end of file
+             %endif

rhodecode/templates/search/search_path.mako

0 +6 -2

+             % if c.formatted_results:
              <table class="rctable search-results">
                  <tr>
                      <th>${_('Repository')}</th>
                  %endfor
              </table>
-             %if c.cur_query and c.formatted_results:
+             %if c.cur_query:
              <div class="pagination-wh pagination-left">
                  ${c.formatted_results.pager('$link_previous ~2~ $link_next')}
              </div>
-             %endif
  No newline at end of file
+             %endif
+             % endif

rhodecode/tests/lib/test_helpers.py

0 0 -41

		@@ -208,44 +208,3 b' def test_get_visual_attr(baseapp):'
208	208	def test_chop_at(test_text, inclusive, expected_text):
209	209	assert helpers.chop_at_smart(
210	210	test_text, '\n', inclusive, '...') == expected_text
211
212
213		@pytest.mark.parametrize('test_text, expected_output', [
214		('some text', ['some', 'text']),
215		('some text', ['some', 'text']),
216		('some text "with a phrase"', ['some', 'text', 'with a phrase']),
217		('"a phrase" "another phrase"', ['a phrase', 'another phrase']),
218		('"justphrase"', ['justphrase']),
219		('""', []),
220		('', []),
221		(' ', []),
222		('" "', []),
223		])
224		def test_extract_phrases(test_text, expected_output):
225		assert helpers.extract_phrases(test_text) == expected_output
226
227
228		@pytest.mark.parametrize('test_text, text_phrases, expected_output', [
229		('some text here', ['some', 'here'], [(0, 4), (10, 14)]),
230		('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]),
231		('irrelevant', ['not found'], []),
232		('irrelevant', ['not found'], []),
233		])
234		def test_get_matching_offsets(test_text, text_phrases, expected_output):
235		assert helpers.get_matching_offsets(
236		test_text, text_phrases) == expected_output
237
238
239		def test_normalize_text_for_matching():
240		assert helpers.normalize_text_for_matching(
241		'OJjfe)#$@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h'
242
243
244		def test_get_matching_line_offsets():
245		assert helpers.get_matching_line_offsets([
246		'words words words',
247		'words words words',
248		'some text some',
249		'words words words',
250		'words words words',
251		'text here what'], 'text') == {3: [(5, 9)], 6: [(0, 4)]}

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages