##// END OF EJS Templates
html_sanitizer: abstracted bleach into own function/code for later replacement...
super-admin -
r5098:34f9ec38 default
parent child Browse files
Show More
@@ -0,0 +1,38 b''
1
2 # Copyright (C) 2010-2023 RhodeCode GmbH
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License, version 3
6 # (only), as published by the Free Software Foundation.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #
16 # This program is dual-licensed. If you wish to learn more about the
17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19
20 import pytest
21
22 from rhodecode.lib.html_filters import sanitize_html
23
24
25 @pytest.mark.parametrize(
26 "src_html, expected_html",
27 [
28 ('<div>ITEM</div>', '<div>ITEM</div>'),
29 ('<div>ITEM</div> <!-- comment here -->', '<div>ITEM</div> <!-- comment here -->'),
30 ('<div style="not-allowed:true">ITEM</div>', '<div style="">ITEM</div>'),
31 ('<div onload="ACTION">ITEM</div>', '<div>ITEM</div>'),
32 ('<a onload="ACTION" style="color:red">ITEM</a>', '<a style="color:red;">ITEM</a>'),
33 ('<img src="/file.png"></img>', '<img src="/file.png">'),
34 ('<img src="/file.png"></img>', '<img src="/file.png">'),
35 ])
36 def test_html_sanitizer_options(src_html, expected_html):
37 parsed_html = sanitize_html(src_html)
38 assert parsed_html == expected_html
@@ -44,7 +44,6 b' from collections import OrderedDict'
44 44 import pygments
45 45 import itertools
46 46 import fnmatch
47 import bleach
48 47
49 48 from datetime import datetime
50 49 from functools import partial
@@ -78,6 +77,7 b' from webhelpers2.number import format_by'
78 77 from rhodecode.lib._vendor.webhelpers_backports import raw_select
79 78
80 79 from rhodecode.lib.action_parser import action_parser
80 from rhodecode.lib.html_filters import sanitize_html
81 81 from rhodecode.lib.pagination import Page, RepoPage, SqlPage
82 82 from rhodecode.lib import ext_json
83 83 from rhodecode.lib.ext_json import json
@@ -1645,7 +1645,7 b' def _process_url_func(match_obj, repo_na'
1645 1645 'id-repr': issue_id,
1646 1646 'issue-prefix': entry['pref'],
1647 1647 'serv': entry['url'],
1648 'title': bleach.clean(desc, strip=True),
1648 'title': sanitize_html(desc, strip=True),
1649 1649 'hovercard_url': hovercard_url
1650 1650 }
1651 1651
@@ -16,8 +16,47 b''
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 import functools
20 import logging
21 from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles
22
23
24 log = logging.getLogger(__name__)
25
26
19 27 # base64 filter e.g ${ example | base64,n }
20 28 def base64(text):
21 29 from rhodecode.lib.str_utils import base64_to_str
22 30 return base64_to_str(text)
23 31
32
33 def sanitize_html(text, **kwargs):
34 # TODO: replace this with https://nh3.readthedocs.io/en/latest
35 # bleach is abandoned and deprecated :/
36 import bleach
37 from bleach.css_sanitizer import CSSSanitizer
38
39 css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles)
40
41 markdown = kwargs.pop('markdown', False)
42
43 allowed_attrs = markdown_attrs
44
45 cleaner = functools.partial(bleach.clean,
46 tags=all_tags,
47 attributes=allowed_attrs,
48 css_sanitizer=css_sanitizer,
49 strip_comments=False, **kwargs)
50
51 if markdown:
52 cleaner = functools.partial(bleach.clean,
53 tags=markdown_tags,
54 attributes=markdown_attrs,
55 css_sanitizer=css_sanitizer,
56 strip_comments=False, **kwargs)
57
58 try:
59 return cleaner(text)
60 except Exception:
61 log.exception('Failed to sanitize html')
62 return 'TEXT CANNOT BE PARSED USING HTML SANITIZE'
@@ -62,7 +62,8 b' markdown_tags = ['
62 62 "a",
63 63 "input",
64 64 "details",
65 "summary"
65 "summary",
66 "div"
66 67 ]
67 68
68 69 markdown_attrs = {
@@ -28,7 +28,6 b' import os'
28 28 import lxml
29 29 import logging
30 30 import urllib.parse
31 import bleach
32 31 import pycmarkgfm
33 32
34 33 from mako.lookup import TemplateLookup
@@ -40,7 +39,7 b' from docutils import writers'
40 39 from docutils.writers import html4css1
41 40 import markdown
42 41
43 from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
42 from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX
44 43
45 44 log = logging.getLogger(__name__)
46 45
@@ -271,17 +270,8 b' class MarkupRenderer(object):'
271 270
272 271 @classmethod
273 272 def sanitize_html(cls, text):
274 # TODO: replace this with https://nh3.readthedocs.io/en/latest
275 # bleach is abandoned and deprecated :/
276
277 from .bleach_whitelist import markdown_attrs, markdown_tags
278 allowed_tags = markdown_tags
279 allowed_attrs = markdown_attrs
280
281 try:
282 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
283 except Exception:
284 return 'TEXT CANNOT BE PARSED USING SANITIZE'
273 from .html_filters import sanitize_html
274 return sanitize_html(text, markdown=True)
285 275
286 276 @classmethod
287 277 def renderer_from_filename(cls, filename, exclude):
@@ -21,13 +21,13 b' import re'
21 21 import logging
22 22 import time
23 23 import functools
24 import bleach
25 24 from collections import namedtuple
26 25
27 26 from pyramid.threadlocal import get_current_request
28 27
29 28 from rhodecode.lib import rc_cache
30 29 from rhodecode.lib.hash_utils import sha1_safe
30 from rhodecode.lib.html_filters import sanitize_html
31 31 from rhodecode.lib.utils2 import (
32 32 Optional, AttributeDict, safe_str, remove_prefix, str2bool)
33 33 from rhodecode.lib.vcs.backends import base
@@ -376,7 +376,7 b' class IssueTrackerSettingsModel(object):'
376 376
377 377 def url_cleaner(input_str):
378 378 input_str = input_str.replace('"', '').replace("'", '')
379 input_str = bleach.clean(input_str, strip=True)
379 input_str = sanitize_html(input_str, strip=True)
380 380 return input_str
381 381
382 382 # populate
@@ -394,7 +394,7 b' class IssueTrackerSettingsModel(object):'
394 394 'pat_compiled': pat_compiled,
395 395 'url': url_cleaner(
396 396 qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''),
397 'pref': bleach.clean(
397 'pref': sanitize_html(
398 398 qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''),
399 399 'desc': qs.get(
400 400 self._get_keyname('desc', uid, 'rhodecode_')),
General Comments 0
You need to be logged in to leave comments. Login now