# Copyright (C) 2020-2024 RhodeCode GmbH # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License, version 3 # (only), as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # # This program is dual-licensed. If you wish to learn more about the # RhodeCode Enterprise Edition, including its added features, Support services, # and proprietary license terms, please see https://rhodecode.com/licenses/ import functools import logging from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles log = logging.getLogger(__name__) # base64 filter e.g ${ example | base64,n } def base64(text): from rhodecode.lib.str_utils import base64_to_str return base64_to_str(text) def sanitize_html(text, **kwargs): # TODO: replace this with https://nh3.readthedocs.io/en/latest # bleach is abandoned and deprecated :/ import bleach from bleach.css_sanitizer import CSSSanitizer css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles) markdown = kwargs.pop('markdown', False) allowed_attrs = markdown_attrs cleaner = functools.partial(bleach.clean, tags=all_tags, attributes=allowed_attrs, css_sanitizer=css_sanitizer, strip_comments=False, **kwargs) if markdown: cleaner = functools.partial(bleach.clean, tags=markdown_tags, attributes=markdown_attrs, css_sanitizer=css_sanitizer, strip_comments=False, **kwargs) try: return cleaner(text) except Exception: log.exception('Failed to sanitize html') return 'TEXT CANNOT BE PARSED USING HTML SANITIZE'