# HG changeset patch # User RhodeCode Admin # Date 2023-08-25 12:20:05 # Node ID ef80e1c5655c9024b83c272825cedce90ca87d44 # Parent 5ce7d4ae1aa2a5666e2c3bfa32f55bcd5f0e8195 code: fixes to escape characters improperly used diff --git a/rhodecode/lib/diffs.py b/rhodecode/lib/diffs.py --- a/rhodecode/lib/diffs.py +++ b/rhodecode/lib/diffs.py @@ -816,7 +816,7 @@ class DiffProcessor(object): return b''.join(raw_diff), chunks, stats def _safe_id(self, idstring): - """Make a string safe for including in an id attribute. + r"""Make a string safe for including in an id attribute. The HTML spec says that id attributes 'must begin with a letter ([A-Za-z]) and may be followed by any number @@ -828,8 +828,8 @@ class DiffProcessor(object): Whitespace is transformed into underscores, and then anything which is not a hyphen or a character that matches \w (alphanumerics and underscore) is removed. + """ - """ # Transform all whitespace to underscore idstring = re.sub(r'\s', "_", f'{idstring}') # Remove everything that is not a hyphen or a member of \w diff --git a/rhodecode/lib/helpers.py b/rhodecode/lib/helpers.py --- a/rhodecode/lib/helpers.py +++ b/rhodecode/lib/helpers.py @@ -1038,33 +1038,71 @@ def gravatar_with_user(request, author, return _render('gravatar_with_user', author, show_disabled=show_disabled, tooltip=tooltip) -tags_paterns = OrderedDict(( - ('lang', (re.compile(r'\[(lang|language)\ \=\>\ *([a-zA-Z\-\/\#\+\.]*)\]'), - '
\\2
')), - - ('see', (re.compile(r'\[see\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]'), - '
see: \\1
')), - - ('url', (re.compile(r'\[url\ \=\>\ \[([a-zA-Z0-9\ \.\-\_]+)\]\((http://|https://|/)(.*?)\)\]'), - '
\\1
')), - - ('license', (re.compile(r'\[license\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]'), - '
\\1
')), - - ('ref', (re.compile(r'\[(requires|recommends|conflicts|base)\ \=\>\ *([a-zA-Z0-9\-\/]*)\]'), - '
\\1: \\2
')), - - ('state', (re.compile(r'\[(stable|featured|stale|dead|dev|deprecated)\]'), - '
\\1
')), - - # label in grey - ('label', (re.compile(r'\[([a-z]+)\]'), - '
\\1
')), - - # generic catch all in grey - ('generic', (re.compile(r'\[([a-zA-Z0-9\.\-\_]+)\]'), - '
\\1
')), -)) +tags_patterns = OrderedDict( + ( + ( + "lang", + ( + re.compile(r"\[(lang|language)\ \=\>\ *([a-zA-Z\-\/\#\+\.]*)\]"), + '
\\2
', + ), + ), + ( + "see", + ( + re.compile(r"\[see\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]"), + '
see: \\1
', + ), + ), + ( + "url", + ( + re.compile( + r"\[url\ \=\>\ \[([a-zA-Z0-9\ \.\-\_]+)\]\((http://|https://|/)(.*?)\)\]" + ), + '
\\1
', + ), + ), + ( + "license", + ( + re.compile( + r"\[license\ \=\>\ *([a-zA-Z0-9\/\=\?\&\ \:\/\.\-]*)\]" + ), + r'
\\1
', + ), + ), + ( + "ref", + ( + re.compile( + r"\[(requires|recommends|conflicts|base)\ \=\>\ *([a-zA-Z0-9\-\/]*)\]" + ), + '
\\1: \\2
', + ), + ), + ( + "state", + ( + re.compile(r"\[(stable|featured|stale|dead|dev|deprecated)\]"), + '
\\1
', + ), + ), + # label in grey + ( + "label", + (re.compile(r"\[([a-z]+)\]"), '
\\1
'), + ), + # generic catch all in grey + ( + "generic", + ( + re.compile(r"\[([a-zA-Z0-9\.\-\_]+)\]"), + '
\\1
', + ), + ), + ) +) def extract_metatags(value): @@ -1075,7 +1113,7 @@ def extract_metatags(value): if not value: return tags, '' - for key, val in list(tags_paterns.items()): + for key, val in list(tags_patterns.items()): pat, replace_html = val tags.extend([(key, x.group()) for x in pat.finditer(value)]) value = pat.sub('', value) @@ -1091,7 +1129,7 @@ def style_metatag(tag_type, value): return '' html_value = value - tag_data = tags_paterns.get(tag_type) + tag_data = tags_patterns.get(tag_type) if tag_data: pat, replace_html = tag_data # convert to plain `str` instead of a markup tag to be used in @@ -1530,7 +1568,7 @@ def urlify_text(text_, safe=True, **href """ url_pat = re.compile(r'''(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@#.&+]''' - '''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''') + r'''|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)''') def url_func(match_obj): url_full = match_obj.groups()[0] diff --git a/rhodecode/lib/index/search_utils.py b/rhodecode/lib/index/search_utils.py --- a/rhodecode/lib/index/search_utils.py +++ b/rhodecode/lib/index/search_utils.py @@ -148,7 +148,7 @@ def normalize_text_for_matching(x): Replaces all non alfanum characters to spaces and lower cases the string, useful for comparing two text strings without punctuation """ - return re.sub(r'[^\w]', ' ', x.lower()) + return re.sub(r'\W', ' ', x.lower()) def get_matching_line_offsets(lines, terms=None, markers=None): diff --git a/rhodecode/tests/lib/test_helpers.py b/rhodecode/tests/lib/test_helpers.py --- a/rhodecode/tests/lib/test_helpers.py +++ b/rhodecode/tests/lib/test_helpers.py @@ -28,14 +28,14 @@ from rhodecode.tests import no_newline_i @pytest.mark.parametrize('url, expected_url', [ - ('http://rc.com', 'http://rc.com'), - ('http://rc.com/test', 'http://rc.com/test'), - ('http://rc.com/!foo', 'http://rc.com/!foo'), - ('http://rc.com/&foo', 'http://rc.com/&foo'), - ('http://rc.com/?foo-1&bar=1', 'http://rc.com/?foo-1&bar=1'), - ('http://rc.com?foo-1&bar=1', 'http://rc.com?foo-1&bar=1'), - ('http://rc.com/#foo', 'http://rc.com/#foo'), - ('http://rc.com/@foo', 'http://rc.com/@foo'), + (r'https://rc.com', 'http://rc.com'), + (r'https://rc.com/test', 'https://rc.com/test'), + (r'https://rc.com/!foo', 'https://rc.com/!foo'), + (r'https://rc.com/&foo', 'https://rc.com/&foo'), + (r'https://rc.com/?foo-1&bar=1', 'https://rc.com/?foo-1&bar=1'), + (r'https://rc.com?foo-1&bar=1', 'https://rc.com?foo-1&bar=1'), + (r'https://rc.com/#foo', 'https://rc.com/#foo'), + (r'https://rc.com/@foo', 'https://rc.com/@foo'), ]) def test_urlify_text(url, expected_url): assert helpers.urlify_text(url) == expected_url @@ -95,12 +95,12 @@ def test_format_binary(): @pytest.mark.parametrize('text_string, pattern, expected', [ - ('No issue here', '(?:#)(?P\d+)', []), + ('No issue here', r'(?:#)(?P\d+)', []), ('Fix #42', '(?:#)(?P\d+)', - [{'url': 'http://r.io/{repo}/i/42', 'id': '42'}]), + [{'url': 'https://r.io/{repo}/i/42', 'id': '42'}]), ('Fix #42, #53', '(?:#)(?P\d+)', [ - {'url': 'http://r.io/{repo}/i/42', 'id': '42'}, - {'url': 'http://r.io/{repo}/i/53', 'id': '53'}]), + {'url': 'https://r.io/{repo}/i/42', 'id': '42'}, + {'url': 'https://r.io/{repo}/i/53', 'id': '53'}]), ('Fix #42', '(?:#)?\d+)', []), # Broken regex ]) def test_extract_issues(backend, text_string, pattern, expected): @@ -109,7 +109,7 @@ def test_extract_issues(backend, text_st '123': { 'uid': '123', 'pat': pattern, - 'url': 'http://r.io/${repo}/i/${issue_id}', + 'url': r'https://r.io/${repo}/i/${issue_id}', 'pref': '#', 'desc': 'Test Pattern' } diff --git a/rhodecode/tests/models/schemas/test_schema_types.py b/rhodecode/tests/models/schemas/test_schema_types.py --- a/rhodecode/tests/models/schemas/test_schema_types.py +++ b/rhodecode/tests/models/schemas/test_schema_types.py @@ -36,29 +36,29 @@ class TestGroupNameType(object): assert result == expected @pytest.mark.parametrize('given, expected', [ - ('//group1/group2//', 'group1/group2'), - ('//group1///group2//', 'group1/group2'), - ('group1/group2///group3', 'group1/group2/group3'), - ('v1.2', 'v1.2'), - ('/v1.2', 'v1.2'), - ('.dirs', '.dirs'), - ('..dirs', '.dirs'), - ('./..dirs', '.dirs'), - ('dir/;name;/;[];/sub', 'dir/name/sub'), - (',/,/,d,,,', 'd'), - ('/;/#/,d,,,', 'd'), - ('long../../..name', 'long./.name'), - ('long../..name', 'long./.name'), - ('../', ''), - ('\'../"../', ''), - ('c,/,/..//./,c,,,/.d/../.........c', 'c/c/.d/.c'), - ('c,/,/..//./,c,,,', 'c/c'), - ('d../..d', 'd./.d'), - ('d../../d', 'd./d'), + (r'//group1/group2//', 'group1/group2'), + (r'//group1///group2//', 'group1/group2'), + (r'group1/group2///group3', 'group1/group2/group3'), + (r'v1.2', 'v1.2'), + (r'/v1.2', 'v1.2'), + (r'.dirs', '.dirs'), + (r'..dirs', '.dirs'), + (r'./..dirs', '.dirs'), + (r'dir/;name;/;[];/sub', 'dir/name/sub'), + (r',/,/,d,,,', 'd'), + (r'/;/#/,d,,,', 'd'), + (r'long../../..name', 'long./.name'), + (r'long../..name', 'long./.name'), + (r'../', ''), + (r'\'../"../', ''), + (r'c,/,/..//./,c,,,/.d/../.........c', 'c/c/.d/.c'), + (r'c,/,/..//./,c,,,', 'c/c'), + (r'd../..d', 'd./.d'), + (r'd../../d', 'd./d'), - ('d\;\./\,\./d', 'd./d'), - ('d\.\./\.\./d', 'd./d'), - ('d\.\./\..\../d', 'd./d'), + (r'd\;\./\,\./d', 'd./d'), + (r'd\.\./\.\./d', 'd./d'), + (r'd\.\./\..\../d', 'd./d'), ]) def test_deserialize_clean_up_name(self, given, expected): class TestSchema(colander.Schema):