# HG changeset patch # User Serhii Ilin # Date 2023-12-29 12:11:29 # Node ID 1ecdda6474c4d3d7ef8cf11521dfab743d1d874f # Parent 711a48785cddeb9f4c8e7d7eb7f62f785ecb67ff fix(jupiter): sanitizing rendering (HTML) of jupyter notebooks, upgraded bleach version. Fixes RCCE-15, RCCE-14 diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -121,7 +121,7 @@ mysqlclient==2.1.1 nbconvert==7.7.3 beautifulsoup4==4.11.2 soupsieve==2.4 - bleach==6.0.0 + bleach==6.1.0 six==1.16.0 webencodings==0.5.1 defusedxml==0.7.1 diff --git a/rhodecode/lib/markup_renderer.py b/rhodecode/lib/markup_renderer.py --- a/rhodecode/lib/markup_renderer.py +++ b/rhodecode/lib/markup_renderer.py @@ -424,11 +424,13 @@ class MarkupRenderer(object): @classmethod def jupyter(cls, source, safe=True): from rhodecode.lib import helpers + from .html_sanitizer_defs import markdown_attrs, all_tags, all_styles from traitlets import default, config import nbformat from nbconvert import HTMLExporter from nbconvert.preprocessors import Preprocessor + from nbconvert.preprocessors.sanitize import SanitizeHTML class CustomHTMLExporter(HTMLExporter): @@ -439,24 +441,20 @@ class MarkupRenderer(object): class Sandbox(Preprocessor): - def preprocess(self, nb, resources): + def preprocess_cell(self, cell, resources, cell_index): + if not safe: + return cell, resources sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)' - for cell in nb['cells']: - if not safe: - continue + if cell.cell_type == "markdown": + cell.source = cls.sanitize_html(cell.source) + return cell, resources - if 'outputs' in cell: - for cell_output in cell['outputs']: - if 'data' in cell_output: - if 'application/javascript' in cell_output['data']: - cell_output['data']['text/plain'] = sandbox_text - cell_output['data'].pop('application/javascript', None) - - if 'source' in cell and cell['cell_type'] == 'markdown': - # sanitize similar like in markdown - cell['source'] = cls.sanitize_html(cell['source']) - - return nb, resources + for cell_output in cell.outputs: + if 'data' in cell_output: + if 'application/javascript' in cell_output['data']: + cell_output['data']['text/plain'] = sandbox_text + cell_output['data'].pop('application/javascript', None) + return cell, resources def _sanitize_resources(input_resources): """ @@ -475,8 +473,29 @@ class MarkupRenderer(object): def as_html(notebook): conf = config.Config() - conf.CustomHTMLExporter.default_preprocessors = [Sandbox] + # TODO: Keep an eye on the order of preprocessors + conf.CustomHTMLExporter.default_preprocessors = [Sandbox, SanitizeHTML] conf.Sandbox.enabled = True + conf.SanitizeHTML.enabled = True + conf.SanitizeHTML.attributes = markdown_attrs + conf.SanitizeHTML.tags = all_tags + conf.SanitizeHTML.styles = all_styles + conf.SanitizeHTML.sanitized_output_types = { + "text/html", + "text/markdown", + } + conf.SanitizeHTML.safe_output_keys = { + "metadata", + "text/plain", + "text/latex", + "application/json", + "image/png", + "image/jpg" + "image/jpeg", + "image/svg", + "image/svg+xml" + } + html_exporter = CustomHTMLExporter(config=conf) (body, resources) = html_exporter.from_notebook_node(notebook) diff --git a/rhodecode/tests/lib/test_markup_renderer.py b/rhodecode/tests/lib/test_markup_renderer.py --- a/rhodecode/tests/lib/test_markup_renderer.py +++ b/rhodecode/tests/lib/test_markup_renderer.py @@ -17,6 +17,7 @@ # RhodeCode Enterprise Edition, including its added features, Support services, # and proprietary license terms, please see https://rhodecode.com/licenses/ +import mock import pytest from rhodecode.lib.markup_renderer import ( @@ -778,8 +779,119 @@ def test_relative_links(src_html, expect """, "Hello, World!") ]) def test_jp_notebook_html_generation(notebook_source, expected_output): - import mock with mock.patch('rhodecode.lib.helpers.asset'): body = MarkupRenderer.jupyter(notebook_source) assert "" in body assert expected_output in body + + +@pytest.mark.parametrize("notebook_source, expected_output", [ + ({"cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 + }, 'No description has been provided for this image'), + ({ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "highlighter": "codemirror" + }, + "source": "
" + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "xrender": True + }, + "outputs": [ + { + "data": { + "text/html": [ + "