rhodecode-enterprise-ce Commit - r5098:34f9ec38

html_sanitizer: abstracted bleach into own function/code for later replacement...

super-admin -

r5098:34f9ec38 default

parent child

rhodecode/tests/lib/test_html_sanitizer.py

0 created 644 +38 0

			@@ -0,0 +1,38 b''
		1
		2	# Copyright (C) 2010-2023 RhodeCode GmbH
		3	#
		4	# This program is free software: you can redistribute it and/or modify
		5	# it under the terms of the GNU Affero General Public License, version 3
		6	# (only), as published by the Free Software Foundation.
		7	#
		8	# This program is distributed in the hope that it will be useful,
		9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
		10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		11	# GNU General Public License for more details.
		12	#
		13	# You should have received a copy of the GNU Affero General Public License
		14	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		15	#
		16	# This program is dual-licensed. If you wish to learn more about the
		17	# RhodeCode Enterprise Edition, including its added features, Support services,
		18	# and proprietary license terms, please see https://rhodecode.com/licenses/
		19
		20	import pytest
		21
		22	from rhodecode.lib.html_filters import sanitize_html
		23
		24
		25	@pytest.mark.parametrize(
		26	"src_html, expected_html",
		27	[
		28	('<div>ITEM</div>', '<div>ITEM</div>'),
		29	('<div>ITEM</div> <!-- comment here -->', '<div>ITEM</div> <!-- comment here -->'),
		30	('<div style="not-allowed:true">ITEM</div>', '<div style="">ITEM</div>'),
		31	('<div onload="ACTION">ITEM</div>', '<div>ITEM</div>'),
		32	('<a onload="ACTION" style="color:red">ITEM</a>', '<a style="color:red;">ITEM</a>'),
		33	('<img src="/file.png"></img>', '<img src="/file.png">'),
		34	('<img src="/file.png"></img>', '<img src="/file.png">'),
		35	])
		36	def test_html_sanitizer_options(src_html, expected_html):
		37	parsed_html = sanitize_html(src_html)
		38	assert parsed_html == expected_html

rhodecode/lib/helpers.py

0 +2 -2

              import pygments
              import itertools
              import fnmatch
-             import bleach
              from datetime import datetime
              from functools import partial
              from rhodecode.lib._vendor.webhelpers_backports import raw_select
              from rhodecode.lib.action_parser import action_parser
+             from rhodecode.lib.html_filters import sanitize_html
              from rhodecode.lib.pagination import Page, RepoPage, SqlPage
              from rhodecode.lib import ext_json
              from rhodecode.lib.ext_json import json
                      'id-repr': issue_id,
                      'issue-prefix': entry['pref'],
                      'serv': entry['url'],
-                     'title': bleach.clean(desc, strip=True),
+                     'title': sanitize_html(desc, strip=True),
                      'hovercard_url': hovercard_url
                  }

rhodecode/lib/html_filters.py

0 +39 0

		@@ -16,8 +16,47 b''
16	16	# RhodeCode Enterprise Edition, including its added features, Support services,
17	17	# and proprietary license terms, please see https://rhodecode.com/licenses/
18	18
	19	import functools
	20	import logging
	21	from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles
	22
	23
	24	log = logging.getLogger(__name__)
	25
	26
19	27	# base64 filter e.g ${ example \| base64,n }
20	28	def base64(text):
21	29	from rhodecode.lib.str_utils import base64_to_str
22	30	return base64_to_str(text)
23	31
	32
	33	def sanitize_html(text, **kwargs):
	34	# TODO: replace this with https://nh3.readthedocs.io/en/latest
	35	# bleach is abandoned and deprecated :/
	36	import bleach
	37	from bleach.css_sanitizer import CSSSanitizer
	38
	39	css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles)
	40
	41	markdown = kwargs.pop('markdown', False)
	42
	43	allowed_attrs = markdown_attrs
	44
	45	cleaner = functools.partial(bleach.clean,
	46	tags=all_tags,
	47	attributes=allowed_attrs,
	48	css_sanitizer=css_sanitizer,
	49	strip_comments=False, **kwargs)
	50
	51	if markdown:
	52	cleaner = functools.partial(bleach.clean,
	53	tags=markdown_tags,
	54	attributes=markdown_attrs,
	55	css_sanitizer=css_sanitizer,
	56	strip_comments=False, **kwargs)
	57
	58	try:
	59	return cleaner(text)
	60	except Exception:
	61	log.exception('Failed to sanitize html')
	62	return 'TEXT CANNOT BE PARSED USING HTML SANITIZE'

rhodecode/lib/html_sanitizer_defs.py ~~rhodecode/lib/bleach_whitelist.py~~

0 renamed +2 -1

                  "a",
                  "input",
                  "details",
-                 "summary"
+                 "summary",
+                 "div"
              ]
              markdown_attrs = {

rhodecode/lib/markup_renderer.py

0 +3 -13

              import lxml
              import logging
              import urllib.parse
-             import bleach
              import pycmarkgfm
              from mako.lookup import TemplateLookup
              from docutils.writers import html4css1
              import markdown
-             from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
+             from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX
              log = logging.getLogger(__name__)
                  @classmethod
                  def sanitize_html(cls, text):
-                     # TODO: replace this with https://nh3.readthedocs.io/en/latest
-                     # bleach is abandoned and deprecated :/
-                     from .bleach_whitelist import markdown_attrs, markdown_tags
-                     allowed_tags = markdown_tags
-                     allowed_attrs = markdown_attrs
-                     try:
-                         return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
-                     except Exception:
-                         return 'TEXT CANNOT BE PARSED USING SANITIZE'
+                     from .html_filters import sanitize_html
+                     return sanitize_html(text, markdown=True)
                  @classmethod
                  def renderer_from_filename(cls, filename, exclude):

rhodecode/model/settings.py

0 +3 -3

              import logging
              import time
              import functools
-             import bleach
              from collections import namedtuple
              from pyramid.threadlocal import get_current_request
              from rhodecode.lib import rc_cache
              from rhodecode.lib.hash_utils import sha1_safe
+             from rhodecode.lib.html_filters import sanitize_html
              from rhodecode.lib.utils2 import (
                  Optional, AttributeDict, safe_str, remove_prefix, str2bool)
              from rhodecode.lib.vcs.backends import base
                      def url_cleaner(input_str):
                          input_str = input_str.replace('"', '').replace("'", '')
-                         input_str = bleach.clean(input_str, strip=True)
+                         input_str = sanitize_html(input_str, strip=True)
                          return input_str
                      # populate
                              'pat_compiled': pat_compiled,
                              'url': url_cleaner(
                                  qs.get(self._get_keyname('url', uid, 'rhodecode_')) or ''),
-                             'pref': bleach.clean(
+                             'pref': sanitize_html(
                                  qs.get(self._get_keyname('pref', uid, 'rhodecode_')) or ''),
                              'desc': qs.get(
                                  self._get_keyname('desc', uid, 'rhodecode_')),

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages