diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py --- a/rhodecode/lib/diffs.py +++ b/rhodecode/lib/diffs.py @@ -816,7 +816,7 @@ class DiffProcessor(object): return b''.join(raw_diff), chunks, stats def _safe_id(self, idstring): - """Make a string safe for including in an id attribute. + r"""Make a string safe for including in an id attribute. The HTML spec says that id attributes 'must begin with a letter ([A-Za-z]) and may be followed by any number @@ -828,8 +828,8 @@ class DiffProcessor(object): Whitespace is transformed into underscores, and then anything which is not a hyphen or a character that matches \w (alphanumerics and underscore) is removed. + """ - """ # Transform all whitespace to underscore idstring = re.sub(r'\s', "_", f'{idstring}') # Remove everything that is not a hyphen or a member of \w diff --git a/rhodecode/lib/helpers.py b/rhodecode/lib/helpers.py --- a/rhodecode/lib/helpers.py +++ b/rhodecode/lib/helpers.py @@ -1038,33 +1038,71 @@ def gravatar_with_user(request, author, return _render('gravatar_with_user', author, show_disabled=show_disabled, tooltip=tooltip) -tags_paterns = OrderedDict(( - ('lang', (re.compile(r'\[(lang|language)\ \=\>\ *([a-zA-Z\-\/\#\+\.]*)\]'), - '
')), - - ('see', (re.compile(r'\[see\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]'), - ' ')), - - ('url', (re.compile(r'\[url\ \=\>\ \[([a-zA-Z0-9\ \.\-\_]+)\]\((http://|https://|/)(.*?)\)\]'), - ' ')), - - ('license', (re.compile(r'\[license\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]'), - ' ')), - - ('ref', (re.compile(r'\[(requires|recommends|conflicts|base)\ \=\>\ *([a-zA-Z0-9\-\/]*)\]'), - ' ')), - - ('state', (re.compile(r'\[(stable|featured|stale|dead|dev|deprecated)\]'), - ' ')), - - # label in grey - ('label', (re.compile(r'\[([a-z]+)\]'), - ' ')), - - # generic catch all in grey - ('generic', (re.compile(r'\[([a-zA-Z0-9\.\-\_]+)\]'), - ' ')), -)) +tags_patterns = OrderedDict( + ( + ( + "lang", + ( + re.compile(r"\[(lang|language)\ \=\>\ *([a-zA-Z\-\/\#\+\.]*)\]"), + ' ', + ), + ), + ( + "see", + ( + re.compile(r"\[see\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]"), + ' ', + ), + ), + ( + "url", + ( + re.compile( + r"\[url\ \=\>\ \[([a-zA-Z0-9\ \.\-\_]+)\]\((http://|https://|/)(.*?)\)\]" + ), + ' ', + ), + ), + ( + "license", + ( + re.compile( + r"\[license\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]" + ), + r' ', + ), + ), + ( + "ref", + ( + re.compile( + r"\[(requires|recommends|conflicts|base)\ \=\>\ *([a-zA-Z0-9\-\/]*)\]" + ), + ' ', + ), + ), + ( + "state", + ( + re.compile(r"\[(stable|featured|stale|dead|dev|deprecated)\]"), + ' ', + ), + ), + # label in grey + ( + "label", + (re.compile(r"\[([a-z]+)\]"), ' '), + ), + # generic catch all in grey + ( + "generic", + ( + re.compile(r"\[([a-zA-Z0-9\.\-\_]+)\]"), + ' ', + ), + ), + ) +) def extract_metatags(value): @@ -1075,7 +1113,7 @@ def extract_metatags(value): if not value: return tags, '' - for key, val in list(tags_paterns.items()): + for key, val in list(tags_patterns.items()): pat, replace_html = val tags.extend([(key, x.group()) for x in pat.finditer(value)]) value = pat.sub('', value) @@ -1091,7 +1129,7 @@ def style_metatag(tag_type, value): return '' html_value = value - tag_data = tags_paterns.get(tag_type) + tag_data = tags_patterns.get(tag_type) if tag_data: pat, replace_html = tag_data # convert to plain `str` instead of a markup tag to be used in @@ -1530,7 +1568,7 @@ def urlify_text(text_, safe=True, **href """ url_pat = re.compile(r'''(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@#.&+]''' - '''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''') + r'''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''') def url_func(match_obj): url_full = match_obj.groups()[0] diff --git a/rhodecode/lib/index/search_utils.py b/rhodecode/lib/index/search_utils.py --- a/rhodecode/lib/index/search_utils.py +++ b/rhodecode/lib/index/search_utils.py @@ -148,7 +148,7 @@ def normalize_text_for_matching(x): Replaces all non alfanum characters to spaces and lower cases the string, useful for comparing two text strings without punctuation """ - return re.sub(r'[^\w]', ' ', x.lower()) + return re.sub(r'\W', ' ', x.lower()) def get_matching_line_offsets(lines, terms=None, markers=None): diff --git a/rhodecode/tests/lib/test_helpers.py b/rhodecode/tests/lib/test_helpers.py --- a/rhodecode/tests/lib/test_helpers.py +++ b/rhodecode/tests/lib/test_helpers.py @@ -28,14 +28,14 @@ from rhodecode.tests import no_newline_i @pytest.mark.parametrize('url, expected_url', [ - ('http://rc.com', 'http://rc.com'), - ('http://rc.com/test', 'http://rc.com/test'), - ('http://rc.com/!foo', 'http://rc.com/!foo'), - ('http://rc.com/&foo', 'http://rc.com/&foo'), - ('http://rc.com/?foo-1&bar=1', 'http://rc.com/?foo-1&bar=1'), - ('http://rc.com?foo-1&bar=1', 'http://rc.com?foo-1&bar=1'), - ('http://rc.com/#foo', 'http://rc.com/#foo'), - ('http://rc.com/@foo', 'http://rc.com/@foo'), + (r'https://rc.com', 'http://rc.com'), + (r'https://rc.com/test', 'https://rc.com/test'), + (r'https://rc.com/!foo', 'https://rc.com/!foo'), + (r'https://rc.com/&foo', 'https://rc.com/&foo'), + (r'https://rc.com/?foo-1&bar=1', 'https://rc.com/?foo-1&bar=1'), + (r'https://rc.com?foo-1&bar=1', 'https://rc.com?foo-1&bar=1'), + (r'https://rc.com/#foo', 'https://rc.com/#foo'), + (r'https://rc.com/@foo', 'https://rc.com/@foo'), ]) def test_urlify_text(url, expected_url): assert helpers.urlify_text(url) == expected_url @@ -95,12 +95,12 @@ def test_format_binary(): @pytest.mark.parametrize('text_string, pattern, expected', [ - ('No issue here', '(?:#)(?P