##// END OF EJS Templates
html_sanitizer: abstracted bleach into own function/code for later replacement...
super-admin -
r5098:34f9ec38 default
parent child Browse files
Show More
@@ -0,0 +1,38 b''
1
2 # Copyright (C) 2010-2023 RhodeCode GmbH
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License, version 3
6 # (only), as published by the Free Software Foundation.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #
16 # This program is dual-licensed. If you wish to learn more about the
17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19
20 import pytest
21
22 from rhodecode.lib.html_filters import sanitize_html
23
24
25 @pytest.mark.parametrize(
26 "src_html, expected_html",
27 [
28 ('<div>ITEM</div>', '<div>ITEM</div>'),
29 ('<div>ITEM</div> <!-- comment here -->', '<div>ITEM</div> <!-- comment here -->'),
30 ('<div style="not-allowed:true">ITEM</div>', '<div style="">ITEM</div>'),
31 ('<div onload="ACTION">ITEM</div>', '<div>ITEM</div>'),
32 ('<a onload="ACTION" style="color:red">ITEM</a>', '<a style="color:red;">ITEM</a>'),
33 ('<img src="/file.png"></img>', '<img src="/file.png">'),
34 ('<img src="/file.png"></img>', '<img src="/file.png">'),
35 ])
36 def test_html_sanitizer_options(src_html, expected_html):
37 parsed_html = sanitize_html(src_html)
38 assert parsed_html == expected_html
@@ -44,7 +44,6 b' from collections import OrderedDict'
44 import pygments
44 import pygments
45 import itertools
45 import itertools
46 import fnmatch
46 import fnmatch
47 import bleach
48
47
49 from datetime import datetime
48 from datetime import datetime
50 from functools import partial
49 from functools import partial
@@ -78,6 +77,7 b' from webhelpers2.number import format_by'
78 from rhodecode.lib._vendor.webhelpers_backports import raw_select
77 from rhodecode.lib._vendor.webhelpers_backports import raw_select
79
78
80 from rhodecode.lib.action_parser import action_parser
79 from rhodecode.lib.action_parser import action_parser
80 from rhodecode.lib.html_filters import sanitize_html
81 from rhodecode.lib.pagination import Page, RepoPage, SqlPage
81 from rhodecode.lib.pagination import Page, RepoPage, SqlPage
82 from rhodecode.lib import ext_json
82 from rhodecode.lib import ext_json
83 from rhodecode.lib.ext_json import json
83 from rhodecode.lib.ext_json import json
@@ -1645,7 +1645,7 b' def _process_url_func(match_obj, repo_na'
1645 'id-repr': issue_id,
1645 'id-repr': issue_id,
1646 'issue-prefix': entry['pref'],
1646 'issue-prefix': entry['pref'],
1647 'serv': entry['url'],
1647 'serv': entry['url'],
1648 'title': bleach.clean(desc, strip=True),
1648 'title': sanitize_html(desc, strip=True),
1649 'hovercard_url': hovercard_url
1649 'hovercard_url': hovercard_url
1650 }
1650 }
1651
1651
@@ -16,8 +16,47 b''
16 # RhodeCode Enterprise Edition, including its added features, Support services,
16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18
18
19 import functools
20 import logging
21 from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles
22
23
24 log = logging.getLogger(__name__)
25
26
19 # base64 filter e.g ${ example | base64,n }
27 # base64 filter e.g ${ example | base64,n }
20 def base64(text):
28 def base64(text):
21 from rhodecode.lib.str_utils import base64_to_str
29 from rhodecode.lib.str_utils import base64_to_str
22 return base64_to_str(text)
30 return base64_to_str(text)
23
31
32
33 def sanitize_html(text, **kwargs):
34 # TODO: replace this with https://nh3.readthedocs.io/en/latest
35 # bleach is abandoned and deprecated :/
36 import bleach
37 from bleach.css_sanitizer import CSSSanitizer
38
39 css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles)
40
41 markdown = kwargs.pop('markdown', False)
42
43 allowed_attrs = markdown_attrs
44
45 cleaner = functools.partial(bleach.clean,
46 tags=all_tags,
47 attributes=allowed_attrs,
48 css_sanitizer=css_sanitizer,
49 strip_comments=False, **kwargs)
50
51 if markdown:
52 cleaner = functools.partial(bleach.clean,
53 tags=markdown_tags,
54 attributes=markdown_attrs,
55 css_sanitizer=css_sanitizer,
56 strip_comments=False, **kwargs)
57
58 try:
59 return cleaner(text)
60 except Exception:
61 log.exception('Failed to sanitize html')
62 return 'TEXT CANNOT BE PARSED USING HTML SANITIZE'
@@ -62,7 +62,8 b' markdown_tags = ['
62 "a",
62 "a",
63 "input",
63 "input",
64 "details",
64 "details",
65 "summary"
65 "summary",
66 "div"
66 ]
67 ]
67
68
68 markdown_attrs = {
69 markdown_attrs = {
@@ -28,7 +28,6 b' import os'
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urllib.parse
30 import urllib.parse
31 import bleach
32 import pycmarkgfm
31 import pycmarkgfm
33
32
34 from mako.lookup import TemplateLookup
33 from mako.lookup import TemplateLookup
@@ -40,7 +39,7 b' from docutils import writers'
40 from docutils.writers import html4css1
39 from docutils.writers import html4css1
41 import markdown
40 import markdown
42
41
43 from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
42 from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX
44
43
45 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
46
45
@@ -271,17 +270,8 b' class MarkupRenderer(object):'
271
270
272 @classmethod
271 @classmethod
273 def sanitize_html(cls, text):
272 def sanitize_html(cls, text):
274 # TODO: replace this with https://nh3.readthedocs.io/en/latest
273 from .html_filters import sanitize_html
275 # bleach is abandoned and deprecated :/
274 return sanitize_html(text, markdown=True)
276
277 from .bleach_whitelist import markdown_attrs, markdown_tags
278 allowed_tags = markdown_tags
279 allowed_attrs = markdown_attrs
280
281 try:
282 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
283 except Exception:
284 return 'TEXT CANNOT BE PARSED USING SANITIZE'
285
275
286 @classmethod
276 @classmethod
287 def renderer_from_filename(cls, filename, exclude):
277 def renderer_from_filename(cls, filename, exclude):
@@ -21,13 +21,13 b' import re'
21 import logging
21 import logging
22 import time
22 import time
23 import functools
23 import functools
24 import bleach
25 from collections import namedtuple
24 from collections import namedtuple
26
25
27 from pyramid.threadlocal import get_current_request
26 from pyramid.threadlocal import get_current_request
28
27
29 from rhodecode.lib import rc_cache
28 from rhodecode.lib import rc_cache
30 from rhodecode.lib.hash_utils import sha1_safe
29 from rhodecode.lib.hash_utils import sha1_safe
30 from rhodecode.lib.html_filters import sanitize_html
31 from rhodecode.lib.utils2 import (
31 from rhodecode.lib.utils2 import (
32 Optional, AttributeDict, safe_str, remove_prefix, str2bool)
32 Optional, AttributeDict, safe_str, remove_prefix, str2bool)
33 from rhodecode.lib.vcs.backends import base
33 from rhodecode.lib.vcs.backends import base
@@ -376,7 +376,7 b' class IssueTrackerSettingsModel(object):'
376
376
377 def url_cleaner(input_str):
377 def url_cleaner(input_str):
378 input_str = input_str.replace('"', '').replace("'", '')
378 input_str = input_str.replace('"', '').replace("'", '')
379 input_str = bleach.clean(input_str, strip=True)
379 input_str = sanitize_html(input_str, strip=True)
380 return input_str
380 return input_str
381
381
382 # populate
382 # populate
@@ -394,7 +394,7 b' class IssueTrackerSettingsModel(object):'
394 'pat_compiled': pat_compiled,
394 'pat_compiled': pat_compiled,
395 'url': url_cleaner(
395 'url': url_cleaner(
396 qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''),
396 qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''),
397 'pref': bleach.clean(
397 'pref': sanitize_html(
398 qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''),
398 qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''),
399 'desc': qs.get(
399 'desc': qs.get(
400 self._get_keyname('desc', uid, 'rhodecode_')),
400 self._get_keyname('desc', uid, 'rhodecode_')),
General Comments 0
You need to be logged in to leave comments. Login now