# HG changeset patch # User Daniel Dourvaris # Date 2016-05-30 04:15:12 # Node ID 5de64805f24055b925d36214cb38435458eb6a95 # Parent 6c3a05927ae4168ffbd912419c302844916f139c search: add syntax highlighting, line numbers and line context to file content search results diff --git a/Gruntfile.js b/Gruntfile.js --- a/Gruntfile.js +++ b/Gruntfile.js @@ -18,6 +18,7 @@ module.exports = function(grunt) { '<%= dirs.js.src %>/bootstrap.js', '<%= dirs.js.src %>/mousetrap.js', '<%= dirs.js.src %>/moment.js', + '<%= dirs.js.src %>/moment.js', '<%= dirs.js.src %>/appenlight-client-0.4.1.min.js', // Plugins @@ -27,12 +28,13 @@ module.exports = function(grunt) { '<%= dirs.js.src %>/plugins/jquery.auto-grow-input.js', '<%= dirs.js.src %>/plugins/jquery.autocomplete.js', '<%= dirs.js.src %>/plugins/jquery.debounce.js', + '<%= dirs.js.src %>/plugins/jquery.mark.js', '<%= dirs.js.src %>/plugins/jquery.timeago.js', '<%= dirs.js.src %>/plugins/jquery.timeago-extension.js', // Select2 '<%= dirs.js.src %>/select2/select2.js', - + // Code-mirror '<%= dirs.js.src %>/codemirror/codemirror.js', '<%= dirs.js.src %>/codemirror/codemirror_loadmode.js', diff --git a/rhodecode/controllers/search.py b/rhodecode/controllers/search.py --- a/rhodecode/controllers/search.py +++ b/rhodecode/controllers/search.py @@ -79,7 +79,8 @@ class SearchController(BaseRepoControlle try: search_result = searcher.search( - search_query, search_type, c.perm_user, repo_name) + search_query, search_type, c.perm_user, repo_name, + requested_page, page_limit) formatted_results = Page( search_result['results'], page=requested_page, diff --git a/rhodecode/lib/helpers.py b/rhodecode/lib/helpers.py --- a/rhodecode/lib/helpers.py +++ b/rhodecode/lib/helpers.py @@ -36,11 +36,14 @@ import urlparse import time import string import hashlib +import pygments from datetime import datetime from functools import partial from pygments.formatters.html import HtmlFormatter from pygments import highlight as code_highlight +from pygments.lexers import ( + get_lexer_by_name, get_lexer_for_filename, get_lexer_for_mimetype) from pylons import url from pylons.i18n.translation import _, ungettext from pyramid.threadlocal import get_current_request @@ -307,6 +310,176 @@ class CodeHtmlFormatter(HtmlFormatter): yield 0, '' +class SearchContentCodeHtmlFormatter(CodeHtmlFormatter): + def __init__(self, **kw): + # only show these line numbers if set + self.only_lines = kw.pop('only_line_numbers', []) + self.query_terms = kw.pop('query_terms', []) + self.max_lines = kw.pop('max_lines', 5) + self.line_context = kw.pop('line_context', 3) + self.url = kw.pop('url', None) + + super(CodeHtmlFormatter, self).__init__(**kw) + + def _wrap_code(self, source): + for cnt, it in enumerate(source): + i, t = it + t = '
%s
' % t + yield i, t + + def _wrap_tablelinenos(self, inner): + yield 0, '' % self.cssclass + + last_shown_line_number = 0 + current_line_number = 1 + + for t, line in inner: + if not t: + yield t, line + continue + + if current_line_number in self.only_lines: + if last_shown_line_number + 1 != current_line_number: + yield 0, '' + yield 0, '' + yield 0, '' + yield 0, '' + + yield 0, '' + if self.url: + yield 0, '' % ( + self.url, current_line_number, current_line_number) + else: + yield 0, '' % ( + current_line_number) + yield 0, '' + yield 0, '' + + last_shown_line_number = current_line_number + + current_line_number += 1 + + + yield 0, '
...
%i%i' + line + '
' + + +def extract_phrases(text_query): + """ + Extracts phrases from search term string making sure phrases + contained in double quotes are kept together - and discarding empty values + or fully whitespace values eg. + + 'some text "a phrase" more' => ['some', 'text', 'a phrase', 'more'] + + """ + + in_phrase = False + buf = '' + phrases = [] + for char in text_query: + if in_phrase: + if char == '"': # end phrase + phrases.append(buf) + buf = '' + in_phrase = False + continue + else: + buf += char + continue + else: + if char == '"': # start phrase + in_phrase = True + phrases.append(buf) + buf = '' + continue + elif char == ' ': + phrases.append(buf) + buf = '' + continue + else: + buf += char + + phrases.append(buf) + phrases = [phrase.strip() for phrase in phrases if phrase.strip()] + return phrases + + +def get_matching_offsets(text, phrases): + """ + Returns a list of string offsets in `text` that the list of `terms` match + + >>> get_matching_offsets('some text here', ['some', 'here']) + [(0, 4), (10, 14)] + + """ + offsets = [] + for phrase in phrases: + for match in re.finditer(phrase, text): + offsets.append((match.start(), match.end())) + + return offsets + + +def normalize_text_for_matching(x): + """ + Replaces all non alnum characters to spaces and lower cases the string, + useful for comparing two text strings without punctuation + """ + return re.sub(r'[^\w]', ' ', x.lower()) + + +def get_matching_line_offsets(lines, terms): + """ Return a set of `lines` indices (starting from 1) matching a + text search query, along with `context` lines above/below matching lines + + :param lines: list of strings representing lines + :param terms: search term string to match in lines eg. 'some text' + :param context: number of lines above/below a matching line to add to result + :param max_lines: cut off for lines of interest + eg. + + >>> get_matching_line_offsets(''' +words words words +words words words +some text some +words words words +words words words +text here what +''', 'text', context=1) + {3: [(5, 9)], 6: [(0, 4)]] + """ + matching_lines = {} + phrases = [normalize_text_for_matching(phrase) + for phrase in extract_phrases(terms)] + + for line_index, line in enumerate(lines, start=1): + match_offsets = get_matching_offsets( + normalize_text_for_matching(line), phrases) + if match_offsets: + matching_lines[line_index] = match_offsets + + return matching_lines + +def get_lexer_safe(mimetype=None, filepath=None): + """ + Tries to return a relevant pygments lexer using mimetype/filepath name, + defaulting to plain text if none could be found + """ + lexer = None + try: + if mimetype: + lexer = get_lexer_for_mimetype(mimetype) + if not lexer: + lexer = get_lexer_for_filename(path) + except pygments.util.ClassNotFound: + pass + + if not lexer: + lexer = get_lexer_by_name('text') + + return lexer + + def pygmentize(filenode, **kwargs): """ pygmentize function using pygments diff --git a/rhodecode/lib/index/whoosh.py b/rhodecode/lib/index/whoosh.py --- a/rhodecode/lib/index/whoosh.py +++ b/rhodecode/lib/index/whoosh.py @@ -90,7 +90,8 @@ class Search(BaseSearch): if self.searcher: self.searcher.close() - def search(self, query, document_type, search_user, repo_name=None): + def search(self, query, document_type, search_user, repo_name=None, + requested_page=1, page_limit=10): log.debug(u'QUERY: %s on %s', query, document_type) result = { 'results': [], diff --git a/rhodecode/public/css/code-block.less b/rhodecode/public/css/code-block.less --- a/rhodecode/public/css/code-block.less +++ b/rhodecode/public/css/code-block.less @@ -514,6 +514,26 @@ div.search-code-body { .match { background-color: #faffa6;} .break { display: block; width: 100%; background-color: #DDE7EF; color: #747474; } } + .code-highlighttable { + border-collapse: collapse; + + tr:hover { + background: #fafafa; + } + td.code { + padding-left: 10px; + } + td.line { + border-right: 1px solid #ccc !important; + padding-right: 10px; + text-align: right; + font-family: "Lucida Console",Monaco,monospace; + span { + white-space: pre-wrap; + color: #666666; + } + } + } } div.annotatediv { margin-left: 2px; margin-right: 4px; } diff --git a/rhodecode/public/js/src/plugins/jquery.mark.js b/rhodecode/public/js/src/plugins/jquery.mark.js new file mode 100755 --- /dev/null +++ b/rhodecode/public/js/src/plugins/jquery.mark.js @@ -0,0 +1,490 @@ +/*!*************************************************** + * mark.js v6.1.0 + * https://github.com/julmot/mark.js + * Copyright (c) 2014–2016, Julian Motz + * Released under the MIT license https://git.io/vwTVl + *****************************************************/ + +"use strict"; + +var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; + +var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); + +var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol ? "symbol" : typeof obj; }; + +function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } + +(function (factory, window, document) { + if (typeof define === "function" && define.amd) { + define(["jquery"], function (jQuery) { + return factory(window, document, jQuery); + }); + } else if ((typeof exports === "undefined" ? "undefined" : _typeof(exports)) === "object") { + factory(window, document, require("jquery")); + } else { + factory(window, document, jQuery); + } +})(function (window, document, $) { + var Mark = function () { + function Mark(ctx) { + _classCallCheck(this, Mark); + + this.ctx = ctx; + } + + _createClass(Mark, [{ + key: "log", + value: function log(msg) { + var level = arguments.length <= 1 || arguments[1] === undefined ? "debug" : arguments[1]; + + var log = this.opt.log; + if (!this.opt.debug) { + return; + } + if ((typeof log === "undefined" ? "undefined" : _typeof(log)) === "object" && typeof log[level] === "function") { + log[level]("mark.js: " + msg); + } + } + }, { + key: "escapeStr", + value: function escapeStr(str) { + return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); + } + }, { + key: "createRegExp", + value: function createRegExp(str) { + str = this.escapeStr(str); + if (Object.keys(this.opt.synonyms).length) { + str = this.createSynonymsRegExp(str); + } + if (this.opt.diacritics) { + str = this.createDiacriticsRegExp(str); + } + str = this.createAccuracyRegExp(str); + return str; + } + }, { + key: "createSynonymsRegExp", + value: function createSynonymsRegExp(str) { + var syn = this.opt.synonyms; + for (var index in syn) { + if (syn.hasOwnProperty(index)) { + var value = syn[index], + k1 = this.escapeStr(index), + k2 = this.escapeStr(value); + str = str.replace(new RegExp("(" + k1 + "|" + k2 + ")", "gmi"), "(" + k1 + "|" + k2 + ")"); + } + } + return str; + } + }, { + key: "createDiacriticsRegExp", + value: function createDiacriticsRegExp(str) { + var dct = ["aÀÁÂÃÄÅàáâãäåĀāąĄ", "cÇçćĆčČ", "dđĐďĎ", "eÈÉÊËèéêëěĚĒēęĘ", "iÌÍÎÏìíîïĪī", "lłŁ", "nÑñňŇńŃ", "oÒÓÔÕÕÖØòóôõöøŌō", "rřŘ", "sŠšśŚ", "tťŤ", "uÙÚÛÜùúûüůŮŪū", "yŸÿýÝ", "zŽžżŻźŹ"]; + var handled = []; + str.split("").forEach(function (ch) { + dct.every(function (dct) { + if (dct.indexOf(ch) !== -1) { + if (handled.indexOf(dct) > -1) { + return false; + } + + str = str.replace(new RegExp("[" + dct + "]", "gmi"), "[" + dct + "]"); + handled.push(dct); + } + return true; + }); + }); + return str; + } + }, { + key: "createAccuracyRegExp", + value: function createAccuracyRegExp(str) { + switch (this.opt.accuracy) { + case "partially": + return "()(" + str + ")"; + case "complementary": + return "()(\\S*" + str + "\\S*)"; + case "exactly": + return "(^|\\s)(" + str + ")(?=\\s|$)"; + } + } + }, { + key: "getSeparatedKeywords", + value: function getSeparatedKeywords(sv) { + var _this = this; + + var stack = []; + sv.forEach(function (kw) { + if (!_this.opt.separateWordSearch) { + if (kw.trim()) { + stack.push(kw); + } + } else { + kw.split(" ").forEach(function (kwSplitted) { + if (kwSplitted.trim()) { + stack.push(kwSplitted); + } + }); + } + }); + return { + "keywords": stack, + "length": stack.length + }; + } + }, { + key: "getElements", + value: function getElements() { + var ctx = void 0, + stack = []; + if (typeof this.ctx === "undefined") { + ctx = []; + } else if (this.ctx instanceof HTMLElement) { + ctx = [this.ctx]; + } else if (Array.isArray(this.ctx)) { + ctx = this.ctx; + } else { + ctx = Array.prototype.slice.call(this.ctx); + } + ctx.forEach(function (ctx) { + stack.push(ctx); + var childs = ctx.querySelectorAll("*"); + if (childs.length) { + stack = stack.concat(Array.prototype.slice.call(childs)); + } + }); + if (!ctx.length) { + this.log("Empty context", "warn"); + } + return { + "elements": stack, + "length": stack.length + }; + } + }, { + key: "matches", + value: function matches(el, selector) { + return (el.matches || el.matchesSelector || el.msMatchesSelector || el.mozMatchesSelector || el.webkitMatchesSelector || el.oMatchesSelector).call(el, selector); + } + }, { + key: "matchesFilter", + value: function matchesFilter(el, exclM) { + var _this2 = this; + + var remain = true; + var fltr = this.opt.filter.concat(["script", "style", "title"]); + if (!this.opt.iframes) { + fltr = fltr.concat(["iframe"]); + } + if (exclM) { + fltr = fltr.concat(["*[data-markjs='true']"]); + } + fltr.every(function (filter) { + if (_this2.matches(el, filter)) { + return remain = false; + } + return true; + }); + return !remain; + } + }, { + key: "onIframeReady", + value: function onIframeReady(ifr, successFn, errorFn) { + try { + (function () { + var ifrWin = ifr.contentWindow, + bl = "about:blank", + compl = "complete"; + var callCallback = function callCallback() { + try { + if (ifrWin.document === null) { + throw new Error("iframe inaccessible"); + } + successFn(ifrWin.document); + } catch (e) { + errorFn(); + } + }; + var isBlank = function isBlank() { + var src = ifr.getAttribute("src").trim(), + href = ifrWin.location.href; + return href === bl && src !== bl && src; + }; + var observeOnload = function observeOnload() { + var listener = function listener() { + try { + if (!isBlank()) { + ifr.removeEventListener("load", listener); + callCallback(); + } + } catch (e) { + errorFn(); + } + }; + ifr.addEventListener("load", listener); + }; + if (ifrWin.document.readyState === compl) { + if (isBlank()) { + observeOnload(); + } else { + callCallback(); + } + } else { + observeOnload(); + } + })(); + } catch (e) { + errorFn(); + } + } + }, { + key: "forEachElementInIframe", + value: function forEachElementInIframe(ifr, cb) { + var _this3 = this; + + var end = arguments.length <= 2 || arguments[2] === undefined ? function () {} : arguments[2]; + + var open = 0; + var checkEnd = function checkEnd() { + if (--open < 1) { + end(); + } + }; + this.onIframeReady(ifr, function (con) { + var stack = Array.prototype.slice.call(con.querySelectorAll("*")); + if ((open = stack.length) === 0) { + checkEnd(); + } + stack.forEach(function (el) { + if (el.tagName.toLowerCase() === "iframe") { + (function () { + var j = 0; + _this3.forEachElementInIframe(el, function (iel, len) { + cb(iel, len); + if (len - 1 === j) { + checkEnd(); + } + j++; + }, checkEnd); + })(); + } else { + cb(el, stack.length); + checkEnd(); + } + }); + }, function () { + var src = ifr.getAttribute("src"); + _this3.log("iframe '" + src + "' could not be accessed", "warn"); + checkEnd(); + }); + } + }, { + key: "forEachElement", + value: function forEachElement(cb) { + var _this4 = this; + + var end = arguments.length <= 1 || arguments[1] === undefined ? function () {} : arguments[1]; + var exclM = arguments.length <= 2 || arguments[2] === undefined ? true : arguments[2]; + + var _getElements = this.getElements(); + + var stack = _getElements.elements; + var open = _getElements.length; + + var checkEnd = function checkEnd() { + if (--open === 0) { + end(); + } + }; + checkEnd(++open); + stack.forEach(function (el) { + if (!_this4.matchesFilter(el, exclM)) { + if (el.tagName.toLowerCase() === "iframe") { + _this4.forEachElementInIframe(el, function (iel) { + if (!_this4.matchesFilter(iel, exclM)) { + cb(iel); + } + }, checkEnd); + return; + } else { + cb(el); + } + } + checkEnd(); + }); + } + }, { + key: "forEachNode", + value: function forEachNode(cb) { + var end = arguments.length <= 1 || arguments[1] === undefined ? function () {} : arguments[1]; + + this.forEachElement(function (n) { + for (n = n.firstChild; n; n = n.nextSibling) { + if (n.nodeType === 3 && n.textContent.trim()) { + cb(n); + } + } + }, end); + } + }, { + key: "wrapMatches", + value: function wrapMatches(node, regex, custom, cb) { + var hEl = !this.opt.element ? "mark" : this.opt.element, + index = custom ? 0 : 2; + var match = void 0; + while ((match = regex.exec(node.textContent)) !== null) { + var pos = match.index; + if (!custom) { + pos += match[index - 1].length; + } + var startNode = node.splitText(pos); + + node = startNode.splitText(match[index].length); + if (startNode.parentNode !== null) { + var repl = document.createElement(hEl); + repl.setAttribute("data-markjs", "true"); + if (this.opt.className) { + repl.setAttribute("class", this.opt.className); + } + repl.textContent = match[index]; + startNode.parentNode.replaceChild(repl, startNode); + cb(repl); + } + regex.lastIndex = 0; + } + } + }, { + key: "unwrapMatches", + value: function unwrapMatches(node) { + var parent = node.parentNode; + var docFrag = document.createDocumentFragment(); + while (node.firstChild) { + docFrag.appendChild(node.removeChild(node.firstChild)); + } + parent.replaceChild(docFrag, node); + parent.normalize(); + } + }, { + key: "markRegExp", + value: function markRegExp(regexp, opt) { + var _this5 = this; + + this.opt = opt; + this.log("Searching with expression \"" + regexp + "\""); + var found = false; + var eachCb = function eachCb(element) { + found = true; + _this5.opt.each(element); + }; + this.forEachNode(function (node) { + _this5.wrapMatches(node, regexp, true, eachCb); + }, function () { + if (!found) { + _this5.opt.noMatch(regexp); + } + _this5.opt.complete(); + _this5.opt.done(); + }); + } + }, { + key: "mark", + value: function mark(sv, opt) { + var _this6 = this; + + this.opt = opt; + sv = typeof sv === "string" ? [sv] : sv; + + var _getSeparatedKeywords = this.getSeparatedKeywords(sv); + + var kwArr = _getSeparatedKeywords.keywords; + var kwArrLen = _getSeparatedKeywords.length; + + if (kwArrLen === 0) { + this.opt.complete(); + this.opt.done(); + } + kwArr.forEach(function (kw) { + var regex = new RegExp(_this6.createRegExp(kw), "gmi"), + found = false; + var eachCb = function eachCb(element) { + found = true; + _this6.opt.each(element); + }; + _this6.log("Searching with expression \"" + regex + "\""); + _this6.forEachNode(function (node) { + _this6.wrapMatches(node, regex, false, eachCb); + }, function () { + if (!found) { + _this6.opt.noMatch(kw); + } + if (kwArr[kwArrLen - 1] === kw) { + _this6.opt.complete(); + _this6.opt.done(); + } + }); + }); + } + }, { + key: "unmark", + value: function unmark(opt) { + var _this7 = this; + + this.opt = opt; + var sel = this.opt.element ? this.opt.element : "*"; + sel += "[data-markjs]"; + if (this.opt.className) { + sel += "." + this.opt.className; + } + this.log("Removal selector \"" + sel + "\""); + this.forEachElement(function (el) { + if (_this7.matches(el, sel)) { + _this7.unwrapMatches(el); + } + }, function () { + _this7.opt.complete(); + _this7.opt.done(); + }, false); + } + }, { + key: "opt", + set: function set(val) { + this._opt = _extends({}, { + "element": "", + "className": "", + "filter": [], + "iframes": false, + "separateWordSearch": true, + "diacritics": true, + "synonyms": {}, + "accuracy": "partially", + "each": function each() {}, + "noMatch": function noMatch() {}, + "done": function done() {}, + "complete": function complete() {}, + "debug": false, + "log": window.console + }, val); + }, + get: function get() { + return this._opt; + } + }]); + + return Mark; + }(); + + $.fn.mark = function (sv, opt) { + new Mark(this).mark(sv, opt); + return this; + }; + $.fn.markRegExp = function (regexp, opt) { + new Mark(this).markRegExp(regexp, opt); + return this; + }; + $.fn.unmark = function (opt) { + new Mark(this).unmark(opt); + return this; + }; +}, window, document); diff --git a/rhodecode/templates/search/search_commit.html b/rhodecode/templates/search/search_commit.html --- a/rhodecode/templates/search/search_commit.html +++ b/rhodecode/templates/search/search_commit.html @@ -33,7 +33,7 @@ - %if entry['message_hl']: + %if entry.get('message_hl'): ${h.literal(entry['message_hl'])} %else: ${h.urlify_commit_message(entry['message'], entry['repository'])} diff --git a/rhodecode/templates/search/search_content.html b/rhodecode/templates/search/search_content.html --- a/rhodecode/templates/search/search_content.html +++ b/rhodecode/templates/search/search_content.html @@ -1,3 +1,40 @@ +<%def name="highlight_text_file(terms, text, url, line_context=3, + max_lines=10, + mimetype=None, filepath=None)"> +<% +lines = text.split('\n') +lines_of_interest = set() +matching_lines = h.get_matching_line_offsets(lines, terms) +shown_matching_lines = 0 + +for line_number in matching_lines: + if len(lines_of_interest) < max_lines: + lines_of_interest |= set(range( + max(line_number - line_context, 0), + min(line_number + line_context, len(lines)))) + shown_matching_lines += 1 + +%> +${h.code_highlight( + text, + h.get_lexer_safe( + mimetype=mimetype, + filepath=filepath, + ), + h.SearchContentCodeHtmlFormatter( + linenos=True, + cssclass="code-highlight", + url=url, + query_terms=terms, + only_line_numbers=lines_of_interest +))|n} +%if len(matching_lines) > shown_matching_lines: + + ${len(matching_lines) - shown_matching_lines} ${_('more matches in this file')} +

+%endif + +
%for entry in c.formatted_results: ## search results are additionally filtered, and this check is just a safe gate @@ -29,7 +66,7 @@
${_('Show Full History')} - | + | ${h.link_to(_('Annotation'), h.url('files_annotate_home', repo_name=entry.get('repository',''),revision=entry.get('commit_id', 'tip'),f_path=entry.get('f_path','')))} | ${h.link_to(_('Raw'), h.url('files_raw_home', repo_name=entry.get('repository',''),revision=entry.get('commit_id', 'tip'),f_path=entry.get('f_path','')))} | @@ -38,8 +75,10 @@
-
${h.literal(entry['content_short_hl'])}
-
+ ${highlight_text_file(c.cur_query, entry['content'], + url=h.url('files_home',repo_name=entry['repository'],revision=entry.get('commit_id', 'tip'),f_path=entry['f_path']), + mimetype=entry.get('mimetype'), filepath=entry.get('path'))} + % endif %endfor @@ -49,3 +88,14 @@ ${c.formatted_results.pager('$link_previous ~2~ $link_next')} %endif + +%if c.cur_query: + +%endif \ No newline at end of file diff --git a/rhodecode/tests/lib/test_helpers.py b/rhodecode/tests/lib/test_helpers.py --- a/rhodecode/tests/lib/test_helpers.py +++ b/rhodecode/tests/lib/test_helpers.py @@ -155,3 +155,42 @@ def test_get_visual_attr(pylonsapp): def test_chop_at(test_text, inclusive, expected_text): assert helpers.chop_at_smart( test_text, '\n', inclusive, '...') == expected_text + + +@pytest.mark.parametrize('test_text, expected_output', [ + ('some text', ['some', 'text']), + ('some text', ['some', 'text']), + ('some text "with a phrase"', ['some', 'text', 'with a phrase']), + ('"a phrase" "another phrase"', ['a phrase', 'another phrase']), + ('"justphrase"', ['justphrase']), + ('""', []), + ('', []), + (' ', []), + ('" "', []), +]) +def test_extract_phrases(test_text, expected_output): + assert helpers.extract_phrases(test_text) == expected_output + + +@pytest.mark.parametrize('test_text, text_phrases, expected_output', [ + ('some text here', ['some', 'here'], [(0, 4), (10, 14)]), + ('here here there', ['here'], [(0, 4), (5, 9), (11, 15)]), + ('irrelevant', ['not found'], []), + ('irrelevant', ['not found'], []), +]) +def test_get_matching_offsets(test_text, text_phrases, expected_output): + assert helpers.get_matching_offsets( + test_text, text_phrases) == expected_output + +def test_normalize_text_for_matching(): + assert helpers.normalize_text_for_matching( + 'OJjfe)*#$*@)$JF*)3r2f80h') == 'ojjfe jf 3r2f80h' + +def test_get_matching_line_offsets(): + assert helpers.get_matching_line_offsets([ + 'words words words', + 'words words words', + 'some text some', + 'words words words', + 'words words words', + 'text here what'], 'text') == {3: [(5, 9)], 6: [(0, 4)]} \ No newline at end of file