Show More
@@ -0,0 +1,38 b'' | |||
|
1 | ||
|
2 | # Copyright (C) 2010-2023 RhodeCode GmbH | |
|
3 | # | |
|
4 | # This program is free software: you can redistribute it and/or modify | |
|
5 | # it under the terms of the GNU Affero General Public License, version 3 | |
|
6 | # (only), as published by the Free Software Foundation. | |
|
7 | # | |
|
8 | # This program is distributed in the hope that it will be useful, | |
|
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
|
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
|
11 | # GNU General Public License for more details. | |
|
12 | # | |
|
13 | # You should have received a copy of the GNU Affero General Public License | |
|
14 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
|
15 | # | |
|
16 | # This program is dual-licensed. If you wish to learn more about the | |
|
17 | # RhodeCode Enterprise Edition, including its added features, Support services, | |
|
18 | # and proprietary license terms, please see https://rhodecode.com/licenses/ | |
|
19 | ||
|
20 | import pytest | |
|
21 | ||
|
22 | from rhodecode.lib.html_filters import sanitize_html | |
|
23 | ||
|
24 | ||
|
25 | @pytest.mark.parametrize( | |
|
26 | "src_html, expected_html", | |
|
27 | [ | |
|
28 | ('<div>ITEM</div>', '<div>ITEM</div>'), | |
|
29 | ('<div>ITEM</div> <!-- comment here -->', '<div>ITEM</div> <!-- comment here -->'), | |
|
30 | ('<div style="not-allowed:true">ITEM</div>', '<div style="">ITEM</div>'), | |
|
31 | ('<div onload="ACTION">ITEM</div>', '<div>ITEM</div>'), | |
|
32 | ('<a onload="ACTION" style="color:red">ITEM</a>', '<a style="color:red;">ITEM</a>'), | |
|
33 | ('<img src="/file.png"></img>', '<img src="/file.png">'), | |
|
34 | ('<img src="/file.png"></img>', '<img src="/file.png">'), | |
|
35 | ]) | |
|
36 | def test_html_sanitizer_options(src_html, expected_html): | |
|
37 | parsed_html = sanitize_html(src_html) | |
|
38 | assert parsed_html == expected_html |
@@ -44,7 +44,6 b' from collections import OrderedDict' | |||
|
44 | 44 | import pygments |
|
45 | 45 | import itertools |
|
46 | 46 | import fnmatch |
|
47 | import bleach | |
|
48 | 47 | |
|
49 | 48 | from datetime import datetime |
|
50 | 49 | from functools import partial |
@@ -78,6 +77,7 b' from webhelpers2.number import format_by' | |||
|
78 | 77 | from rhodecode.lib._vendor.webhelpers_backports import raw_select |
|
79 | 78 | |
|
80 | 79 | from rhodecode.lib.action_parser import action_parser |
|
80 | from rhodecode.lib.html_filters import sanitize_html | |
|
81 | 81 | from rhodecode.lib.pagination import Page, RepoPage, SqlPage |
|
82 | 82 | from rhodecode.lib import ext_json |
|
83 | 83 | from rhodecode.lib.ext_json import json |
@@ -1645,7 +1645,7 b' def _process_url_func(match_obj, repo_na' | |||
|
1645 | 1645 | 'id-repr': issue_id, |
|
1646 | 1646 | 'issue-prefix': entry['pref'], |
|
1647 | 1647 | 'serv': entry['url'], |
|
1648 |
'title': |
|
|
1648 | 'title': sanitize_html(desc, strip=True), | |
|
1649 | 1649 | 'hovercard_url': hovercard_url |
|
1650 | 1650 | } |
|
1651 | 1651 |
@@ -16,8 +16,47 b'' | |||
|
16 | 16 | # RhodeCode Enterprise Edition, including its added features, Support services, |
|
17 | 17 | # and proprietary license terms, please see https://rhodecode.com/licenses/ |
|
18 | 18 | |
|
19 | import functools | |
|
20 | import logging | |
|
21 | from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles | |
|
22 | ||
|
23 | ||
|
24 | log = logging.getLogger(__name__) | |
|
25 | ||
|
26 | ||
|
19 | 27 | # base64 filter e.g ${ example | base64,n } |
|
20 | 28 | def base64(text): |
|
21 | 29 | from rhodecode.lib.str_utils import base64_to_str |
|
22 | 30 | return base64_to_str(text) |
|
23 | 31 | |
|
32 | ||
|
33 | def sanitize_html(text, **kwargs): | |
|
34 | # TODO: replace this with https://nh3.readthedocs.io/en/latest | |
|
35 | # bleach is abandoned and deprecated :/ | |
|
36 | import bleach | |
|
37 | from bleach.css_sanitizer import CSSSanitizer | |
|
38 | ||
|
39 | css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles) | |
|
40 | ||
|
41 | markdown = kwargs.pop('markdown', False) | |
|
42 | ||
|
43 | allowed_attrs = markdown_attrs | |
|
44 | ||
|
45 | cleaner = functools.partial(bleach.clean, | |
|
46 | tags=all_tags, | |
|
47 | attributes=allowed_attrs, | |
|
48 | css_sanitizer=css_sanitizer, | |
|
49 | strip_comments=False, **kwargs) | |
|
50 | ||
|
51 | if markdown: | |
|
52 | cleaner = functools.partial(bleach.clean, | |
|
53 | tags=markdown_tags, | |
|
54 | attributes=markdown_attrs, | |
|
55 | css_sanitizer=css_sanitizer, | |
|
56 | strip_comments=False, **kwargs) | |
|
57 | ||
|
58 | try: | |
|
59 | return cleaner(text) | |
|
60 | except Exception: | |
|
61 | log.exception('Failed to sanitize html') | |
|
62 | return 'TEXT CANNOT BE PARSED USING HTML SANITIZE' |
@@ -62,7 +62,8 b' markdown_tags = [' | |||
|
62 | 62 | "a", |
|
63 | 63 | "input", |
|
64 | 64 | "details", |
|
65 | "summary" | |
|
65 | "summary", | |
|
66 | "div" | |
|
66 | 67 | ] |
|
67 | 68 | |
|
68 | 69 | markdown_attrs = { |
@@ -28,7 +28,6 b' import os' | |||
|
28 | 28 | import lxml |
|
29 | 29 | import logging |
|
30 | 30 | import urllib.parse |
|
31 | import bleach | |
|
32 | 31 | import pycmarkgfm |
|
33 | 32 | |
|
34 | 33 | from mako.lookup import TemplateLookup |
@@ -40,7 +39,7 b' from docutils import writers' | |||
|
40 | 39 | from docutils.writers import html4css1 |
|
41 | 40 | import markdown |
|
42 | 41 | |
|
43 |
from rhodecode.lib.utils2 import safe_str, |
|
|
42 | from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX | |
|
44 | 43 | |
|
45 | 44 | log = logging.getLogger(__name__) |
|
46 | 45 | |
@@ -271,17 +270,8 b' class MarkupRenderer(object):' | |||
|
271 | 270 | |
|
272 | 271 | @classmethod |
|
273 | 272 | def sanitize_html(cls, text): |
|
274 | # TODO: replace this with https://nh3.readthedocs.io/en/latest | |
|
275 | # bleach is abandoned and deprecated :/ | |
|
276 | ||
|
277 | from .bleach_whitelist import markdown_attrs, markdown_tags | |
|
278 | allowed_tags = markdown_tags | |
|
279 | allowed_attrs = markdown_attrs | |
|
280 | ||
|
281 | try: | |
|
282 | return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) | |
|
283 | except Exception: | |
|
284 | return 'TEXT CANNOT BE PARSED USING SANITIZE' | |
|
273 | from .html_filters import sanitize_html | |
|
274 | return sanitize_html(text, markdown=True) | |
|
285 | 275 | |
|
286 | 276 | @classmethod |
|
287 | 277 | def renderer_from_filename(cls, filename, exclude): |
@@ -21,13 +21,13 b' import re' | |||
|
21 | 21 | import logging |
|
22 | 22 | import time |
|
23 | 23 | import functools |
|
24 | import bleach | |
|
25 | 24 | from collections import namedtuple |
|
26 | 25 | |
|
27 | 26 | from pyramid.threadlocal import get_current_request |
|
28 | 27 | |
|
29 | 28 | from rhodecode.lib import rc_cache |
|
30 | 29 | from rhodecode.lib.hash_utils import sha1_safe |
|
30 | from rhodecode.lib.html_filters import sanitize_html | |
|
31 | 31 | from rhodecode.lib.utils2 import ( |
|
32 | 32 | Optional, AttributeDict, safe_str, remove_prefix, str2bool) |
|
33 | 33 | from rhodecode.lib.vcs.backends import base |
@@ -376,7 +376,7 b' class IssueTrackerSettingsModel(object):' | |||
|
376 | 376 | |
|
377 | 377 | def url_cleaner(input_str): |
|
378 | 378 | input_str = input_str.replace('"', '').replace("'", '') |
|
379 |
input_str = |
|
|
379 | input_str = sanitize_html(input_str, strip=True) | |
|
380 | 380 | return input_str |
|
381 | 381 | |
|
382 | 382 | # populate |
@@ -394,7 +394,7 b' class IssueTrackerSettingsModel(object):' | |||
|
394 | 394 | 'pat_compiled': pat_compiled, |
|
395 | 395 | 'url': url_cleaner( |
|
396 | 396 | qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''), |
|
397 |
'pref': |
|
|
397 | 'pref': sanitize_html( | |
|
398 | 398 | qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''), |
|
399 | 399 | 'desc': qs.get( |
|
400 | 400 | self._get_keyname('desc', uid, 'rhodecode_')), |
General Comments 0
You need to be logged in to leave comments.
Login now