|
|
# Copyright (C) 2020-2024 RhodeCode GmbH
|
|
|
#
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
# it under the terms of the GNU Affero General Public License, version 3
|
|
|
# (only), as published by the Free Software Foundation.
|
|
|
#
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
# GNU General Public License for more details.
|
|
|
#
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
#
|
|
|
# This program is dual-licensed. If you wish to learn more about the
|
|
|
# RhodeCode Enterprise Edition, including its added features, Support services,
|
|
|
# and proprietary license terms, please see https://rhodecode.com/licenses/
|
|
|
|
|
|
import functools
|
|
|
import logging
|
|
|
from .html_sanitizer_defs import markdown_attrs, markdown_tags, all_tags, all_styles
|
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
# base64 filter e.g ${ example | base64,n }
|
|
|
def base64(text):
|
|
|
from rhodecode.lib.str_utils import base64_to_str
|
|
|
return base64_to_str(text)
|
|
|
|
|
|
|
|
|
def sanitize_html(text, **kwargs):
|
|
|
# TODO: replace this with https://nh3.readthedocs.io/en/latest
|
|
|
# bleach is abandoned and deprecated :/
|
|
|
import bleach
|
|
|
from bleach.css_sanitizer import CSSSanitizer
|
|
|
|
|
|
css_sanitizer = CSSSanitizer(allowed_css_properties=all_styles)
|
|
|
|
|
|
markdown = kwargs.pop('markdown', False)
|
|
|
|
|
|
allowed_attrs = markdown_attrs
|
|
|
|
|
|
cleaner = functools.partial(bleach.clean,
|
|
|
tags=all_tags,
|
|
|
attributes=allowed_attrs,
|
|
|
css_sanitizer=css_sanitizer,
|
|
|
strip_comments=False, **kwargs)
|
|
|
|
|
|
if markdown:
|
|
|
cleaner = functools.partial(bleach.clean,
|
|
|
tags=markdown_tags,
|
|
|
attributes=markdown_attrs,
|
|
|
css_sanitizer=css_sanitizer,
|
|
|
strip_comments=False, **kwargs)
|
|
|
|
|
|
try:
|
|
|
return cleaner(text)
|
|
|
except Exception:
|
|
|
log.exception('Failed to sanitize html')
|
|
|
return 'TEXT CANNOT BE PARSED USING HTML SANITIZE'
|
|
|
|