Show More
@@ -0,0 +1,38 b'' | |||||
|
1 | ||||
|
2 | # Copyright (C) 2010-2023 RhodeCode GmbH | |||
|
3 | # | |||
|
4 | # This program is free software: you can redistribute it and/or modify | |||
|
5 | # it under the terms of the GNU Affero General Public License, version 3 | |||
|
6 | # (only), as published by the Free Software Foundation. | |||
|
7 | # | |||
|
8 | # This program is distributed in the hope that it will be useful, | |||
|
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
|
11 | # GNU General Public License for more details. | |||
|
12 | # | |||
|
13 | # You should have received a copy of the GNU Affero General Public License | |||
|
14 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |||
|
15 | # | |||
|
16 | # This program is dual-licensed. If you wish to learn more about the | |||
|
17 | # RhodeCode Enterprise Edition, including its added features, Support services, | |||
|
18 | # and proprietary license terms, please see https://rhodecode.com/licenses/ | |||
|
19 | ||||
|
20 | import pytest | |||
|
21 | ||||
|
22 | from rhodecode.lib.html_filters import sanitize_html | |||
|
23 | ||||
|
24 | ||||
|
25 | @pytest.mark.parametrize( | |||
|
26 | "src_html, expected_html", | |||
|
27 | [ | |||
|
28 | ('<div>ITEM</div>', '<div>ITEM</div>'), | |||
|
29 | ('<div>ITEM</div> <!-- comment here -->', '<div>ITEM</div> <!-- comment here -->'), | |||
|
30 | ('<div style="not-allowed:true">ITEM</div>', '<div style="">ITEM</div>'), | |||
|
31 | ('<div onload="ACTION">ITEM</div>', '<div>ITEM</div>'), | |||
|
32 | ('<a onload="ACTION" style="color:red">ITEM</a>', '<a style="color:red;">ITEM</a>'), | |||
|
33 | ('<img src="/file.png"></img>', '<img src="/file.png">'), | |||
|
34 | ('<img src="/file.png"></img>', '<img src="/file.png">'), | |||
|
35 | ]) | |||
|
36 | def test_html_sanitizer_options(src_html, expected_html): | |||
|
37 | parsed_html = sanitize_html(src_html) | |||
|
38 | assert parsed_html == expected_html |
@@ -44,7 +44,6 b' from collections import OrderedDict' | |||||
44 | import pygments |
|
44 | import pygments | |
45 | import itertools |
|
45 | import itertools | |
46 | import fnmatch |
|
46 | import fnmatch | |
47 | import bleach |
|
|||
48 |
|
47 | |||
49 | from datetime import datetime |
|
48 | from datetime import datetime | |
50 | from functools import partial |
|
49 | from functools import partial | |
@@ -78,6 +77,7 b' from webhelpers2.number import format_by' | |||||
78 | from rhodecode.lib._vendor.webhelpers_backports import raw_select |
|
77 | from rhodecode.lib._vendor.webhelpers_backports import raw_select | |
79 |
|
78 | |||
80 | from rhodecode.lib.action_parser import action_parser |
|
79 | from rhodecode.lib.action_parser import action_parser | |
|
80 | from rhodecode.lib.html_filters import sanitize_html | |||
81 | from rhodecode.lib.pagination import Page, RepoPage, SqlPage |
|
81 | from rhodecode.lib.pagination import Page, RepoPage, SqlPage | |
82 | from rhodecode.lib import ext_json |
|
82 | from rhodecode.lib import ext_json | |
83 | from rhodecode.lib.ext_json import json |
|
83 | from rhodecode.lib.ext_json import json | |
@@ -1645,7 +1645,7 b' def _process_url_func(match_obj, repo_na' | |||||
1645 | 'id-repr': issue_id, |
|
1645 | 'id-repr': issue_id, | |
1646 | 'issue-prefix': entry['pref'], |
|
1646 | 'issue-prefix': entry['pref'], | |
1647 | 'serv': entry['url'], |
|
1647 | 'serv': entry['url'], | |
1648 |
'title': |
|
1648 | 'title': sanitize_html(desc, strip=True), | |
1649 | 'hovercard_url': hovercard_url |
|
1649 | 'hovercard_url': hovercard_url | |
1650 | } |
|
1650 | } | |
1651 |
|
1651 |
@@ -16,8 +16,47 b'' | |||||
16 | # RhodeCode Enterprise Edition, including its added features, Support services, |
|
16 | # RhodeCode Enterprise Edition, including its added features, Support services, | |
17 | # and proprietary license terms, please see https://rhodecode.com/licenses/ |
|
17 | # and proprietary license terms, please see https://rhodecode.com/licenses/ | |
18 |
|
18 | |||
|
19 | import functools | |||
|
20 | import logging | |||
|
21 | from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles | |||
|
22 | ||||
|
23 | ||||
|
24 | log = logging.getLogger(__name__) | |||
|
25 | ||||
|
26 | ||||
19 | # base64 filter e.g ${ example | base64,n } |
|
27 | # base64 filter e.g ${ example | base64,n } | |
20 | def base64(text): |
|
28 | def base64(text): | |
21 | from rhodecode.lib.str_utils import base64_to_str |
|
29 | from rhodecode.lib.str_utils import base64_to_str | |
22 | return base64_to_str(text) |
|
30 | return base64_to_str(text) | |
23 |
|
31 | |||
|
32 | ||||
|
33 | def sanitize_html(text, **kwargs): | |||
|
34 | # TODO: replace this with https://nh3.readthedocs.io/en/latest | |||
|
35 | # bleach is abandoned and deprecated :/ | |||
|
36 | import bleach | |||
|
37 | from bleach.css_sanitizer import CSSSanitizer | |||
|
38 | ||||
|
39 | css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles) | |||
|
40 | ||||
|
41 | markdown = kwargs.pop('markdown', False) | |||
|
42 | ||||
|
43 | allowed_attrs = markdown_attrs | |||
|
44 | ||||
|
45 | cleaner = functools.partial(bleach.clean, | |||
|
46 | tags=all_tags, | |||
|
47 | attributes=allowed_attrs, | |||
|
48 | css_sanitizer=css_sanitizer, | |||
|
49 | strip_comments=False, **kwargs) | |||
|
50 | ||||
|
51 | if markdown: | |||
|
52 | cleaner = functools.partial(bleach.clean, | |||
|
53 | tags=markdown_tags, | |||
|
54 | attributes=markdown_attrs, | |||
|
55 | css_sanitizer=css_sanitizer, | |||
|
56 | strip_comments=False, **kwargs) | |||
|
57 | ||||
|
58 | try: | |||
|
59 | return cleaner(text) | |||
|
60 | except Exception: | |||
|
61 | log.exception('Failed to sanitize html') | |||
|
62 | return 'TEXT CANNOT BE PARSED USING HTML SANITIZE' |
@@ -62,7 +62,8 b' markdown_tags = [' | |||||
62 | "a", |
|
62 | "a", | |
63 | "input", |
|
63 | "input", | |
64 | "details", |
|
64 | "details", | |
65 | "summary" |
|
65 | "summary", | |
|
66 | "div" | |||
66 | ] |
|
67 | ] | |
67 |
|
68 | |||
68 | markdown_attrs = { |
|
69 | markdown_attrs = { |
@@ -28,7 +28,6 b' import os' | |||||
28 | import lxml |
|
28 | import lxml | |
29 | import logging |
|
29 | import logging | |
30 | import urllib.parse |
|
30 | import urllib.parse | |
31 | import bleach |
|
|||
32 | import pycmarkgfm |
|
31 | import pycmarkgfm | |
33 |
|
32 | |||
34 | from mako.lookup import TemplateLookup |
|
33 | from mako.lookup import TemplateLookup | |
@@ -40,7 +39,7 b' from docutils import writers' | |||||
40 | from docutils.writers import html4css1 |
|
39 | from docutils.writers import html4css1 | |
41 | import markdown |
|
40 | import markdown | |
42 |
|
41 | |||
43 |
from rhodecode.lib.utils2 import safe_str, |
|
42 | from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX | |
44 |
|
43 | |||
45 | log = logging.getLogger(__name__) |
|
44 | log = logging.getLogger(__name__) | |
46 |
|
45 | |||
@@ -271,17 +270,8 b' class MarkupRenderer(object):' | |||||
271 |
|
270 | |||
272 | @classmethod |
|
271 | @classmethod | |
273 | def sanitize_html(cls, text): |
|
272 | def sanitize_html(cls, text): | |
274 | # TODO: replace this with https://nh3.readthedocs.io/en/latest |
|
273 | from .html_filters import sanitize_html | |
275 | # bleach is abandoned and deprecated :/ |
|
274 | return sanitize_html(text, markdown=True) | |
276 |
|
||||
277 | from .bleach_whitelist import markdown_attrs, markdown_tags |
|
|||
278 | allowed_tags = markdown_tags |
|
|||
279 | allowed_attrs = markdown_attrs |
|
|||
280 |
|
||||
281 | try: |
|
|||
282 | return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) |
|
|||
283 | except Exception: |
|
|||
284 | return 'TEXT CANNOT BE PARSED USING SANITIZE' |
|
|||
285 |
|
275 | |||
286 | @classmethod |
|
276 | @classmethod | |
287 | def renderer_from_filename(cls, filename, exclude): |
|
277 | def renderer_from_filename(cls, filename, exclude): |
@@ -21,13 +21,13 b' import re' | |||||
21 | import logging |
|
21 | import logging | |
22 | import time |
|
22 | import time | |
23 | import functools |
|
23 | import functools | |
24 | import bleach |
|
|||
25 | from collections import namedtuple |
|
24 | from collections import namedtuple | |
26 |
|
25 | |||
27 | from pyramid.threadlocal import get_current_request |
|
26 | from pyramid.threadlocal import get_current_request | |
28 |
|
27 | |||
29 | from rhodecode.lib import rc_cache |
|
28 | from rhodecode.lib import rc_cache | |
30 | from rhodecode.lib.hash_utils import sha1_safe |
|
29 | from rhodecode.lib.hash_utils import sha1_safe | |
|
30 | from rhodecode.lib.html_filters import sanitize_html | |||
31 | from rhodecode.lib.utils2 import ( |
|
31 | from rhodecode.lib.utils2 import ( | |
32 | Optional, AttributeDict, safe_str, remove_prefix, str2bool) |
|
32 | Optional, AttributeDict, safe_str, remove_prefix, str2bool) | |
33 | from rhodecode.lib.vcs.backends import base |
|
33 | from rhodecode.lib.vcs.backends import base | |
@@ -376,7 +376,7 b' class IssueTrackerSettingsModel(object):' | |||||
376 |
|
376 | |||
377 | def url_cleaner(input_str): |
|
377 | def url_cleaner(input_str): | |
378 | input_str = input_str.replace('"', '').replace("'", '') |
|
378 | input_str = input_str.replace('"', '').replace("'", '') | |
379 |
input_str = |
|
379 | input_str = sanitize_html(input_str, strip=True) | |
380 | return input_str |
|
380 | return input_str | |
381 |
|
381 | |||
382 | # populate |
|
382 | # populate | |
@@ -394,7 +394,7 b' class IssueTrackerSettingsModel(object):' | |||||
394 | 'pat_compiled': pat_compiled, |
|
394 | 'pat_compiled': pat_compiled, | |
395 | 'url': url_cleaner( |
|
395 | 'url': url_cleaner( | |
396 | qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''), |
|
396 | qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''), | |
397 |
'pref': |
|
397 | 'pref': sanitize_html( | |
398 | qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''), |
|
398 | qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''), | |
399 | 'desc': qs.get( |
|
399 | 'desc': qs.get( | |
400 | self._get_keyname('desc', uid, 'rhodecode_')), |
|
400 | self._get_keyname('desc', uid, 'rhodecode_')), |
General Comments 0
You need to be logged in to leave comments.
Login now