##// END OF EJS Templates
fix(jupyter): adopted to support more formats. Fixes: RCCE-38
ilin.s -
r5273:5af2b517 default
parent child Browse files
Show More
@@ -1,581 +1,581 b''
1
1
2
2
3 # Copyright (C) 2011-2023 RhodeCode GmbH
3 # Copyright (C) 2011-2023 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urllib.parse
30 import urllib.parse
31 import pycmarkgfm
31 import pycmarkgfm
32
32
33 from mako.lookup import TemplateLookup
33 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
34 from mako.template import Template as MakoTemplate
35
35
36 from docutils.core import publish_parts
36 from docutils.core import publish_parts
37 from docutils.parsers.rst import directives
37 from docutils.parsers.rst import directives
38 from docutils import writers
38 from docutils import writers
39 from docutils.writers import html4css1
39 from docutils.writers import html4css1
40 import markdown
40 import markdown
41
41
42 from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX
42 from rhodecode.lib.utils2 import safe_str, MENTIONS_REGEX
43
43
44 log = logging.getLogger(__name__)
44 log = logging.getLogger(__name__)
45
45
46 # default renderer used to generate automated comments
46 # default renderer used to generate automated comments
47 DEFAULT_COMMENTS_RENDERER = 'rst'
47 DEFAULT_COMMENTS_RENDERER = 'rst'
48
48
49 try:
49 try:
50 from lxml.html import fromstring
50 from lxml.html import fromstring
51 from lxml.html import tostring
51 from lxml.html import tostring
52 except ImportError:
52 except ImportError:
53 log.exception('Failed to import lxml')
53 log.exception('Failed to import lxml')
54 fromstring = None
54 fromstring = None
55 tostring = None
55 tostring = None
56
56
57
57
58 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
58 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
59 """
59 """
60 Custom HTML Translator used for sandboxing potential
60 Custom HTML Translator used for sandboxing potential
61 JS injections in ref links
61 JS injections in ref links
62 """
62 """
63 def visit_literal_block(self, node):
63 def visit_literal_block(self, node):
64 self.body.append(self.starttag(node, 'pre', CLASS='codehilite literal-block'))
64 self.body.append(self.starttag(node, 'pre', CLASS='codehilite literal-block'))
65
65
66 def visit_reference(self, node):
66 def visit_reference(self, node):
67 if 'refuri' in node.attributes:
67 if 'refuri' in node.attributes:
68 refuri = node['refuri']
68 refuri = node['refuri']
69 if ':' in refuri:
69 if ':' in refuri:
70 prefix, link = refuri.lstrip().split(':', 1)
70 prefix, link = refuri.lstrip().split(':', 1)
71 prefix = prefix or ''
71 prefix = prefix or ''
72
72
73 if prefix.lower() == 'javascript':
73 if prefix.lower() == 'javascript':
74 # we don't allow javascript type of refs...
74 # we don't allow javascript type of refs...
75 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
75 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
76
76
77 # old style class requires this...
77 # old style class requires this...
78 return html4css1.HTMLTranslator.visit_reference(self, node)
78 return html4css1.HTMLTranslator.visit_reference(self, node)
79
79
80
80
81 class RhodeCodeWriter(writers.html4css1.Writer):
81 class RhodeCodeWriter(writers.html4css1.Writer):
82 def __init__(self):
82 def __init__(self):
83 super(RhodeCodeWriter, self).__init__()
83 super(RhodeCodeWriter, self).__init__()
84 self.translator_class = CustomHTMLTranslator
84 self.translator_class = CustomHTMLTranslator
85
85
86
86
87 def relative_links(html_source, server_paths):
87 def relative_links(html_source, server_paths):
88 if not html_source:
88 if not html_source:
89 return html_source
89 return html_source
90
90
91 if not fromstring and tostring:
91 if not fromstring and tostring:
92 return html_source
92 return html_source
93
93
94 try:
94 try:
95 doc = lxml.html.fromstring(html_source)
95 doc = lxml.html.fromstring(html_source)
96 except Exception:
96 except Exception:
97 return html_source
97 return html_source
98
98
99 for el in doc.cssselect('img, video'):
99 for el in doc.cssselect('img, video'):
100 src = el.attrib.get('src')
100 src = el.attrib.get('src')
101 if src:
101 if src:
102 el.attrib['src'] = relative_path(src, server_paths['raw'])
102 el.attrib['src'] = relative_path(src, server_paths['raw'])
103
103
104 for el in doc.cssselect('a:not(.gfm)'):
104 for el in doc.cssselect('a:not(.gfm)'):
105 src = el.attrib.get('href')
105 src = el.attrib.get('href')
106 if src:
106 if src:
107 raw_mode = el.attrib['href'].endswith('?raw=1')
107 raw_mode = el.attrib['href'].endswith('?raw=1')
108 if raw_mode:
108 if raw_mode:
109 el.attrib['href'] = relative_path(src, server_paths['raw'])
109 el.attrib['href'] = relative_path(src, server_paths['raw'])
110 else:
110 else:
111 el.attrib['href'] = relative_path(src, server_paths['standard'])
111 el.attrib['href'] = relative_path(src, server_paths['standard'])
112
112
113 return lxml.html.tostring(doc, encoding='unicode')
113 return lxml.html.tostring(doc, encoding='unicode')
114
114
115
115
116 def relative_path(path, request_path, is_repo_file=None):
116 def relative_path(path, request_path, is_repo_file=None):
117 """
117 """
118 relative link support, path is a rel path, and request_path is current
118 relative link support, path is a rel path, and request_path is current
119 server path (not absolute)
119 server path (not absolute)
120
120
121 e.g.
121 e.g.
122
122
123 path = '../logo.png'
123 path = '../logo.png'
124 request_path= '/repo/files/path/file.md'
124 request_path= '/repo/files/path/file.md'
125 produces: '/repo/files/logo.png'
125 produces: '/repo/files/logo.png'
126 """
126 """
127 # TODO(marcink): unicode/str support ?
127 # TODO(marcink): unicode/str support ?
128 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
128 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
129
129
130 def dummy_check(p):
130 def dummy_check(p):
131 return True # assume default is a valid file path
131 return True # assume default is a valid file path
132
132
133 is_repo_file = is_repo_file or dummy_check
133 is_repo_file = is_repo_file or dummy_check
134 if not path:
134 if not path:
135 return request_path
135 return request_path
136
136
137 path = safe_str(path)
137 path = safe_str(path)
138 request_path = safe_str(request_path)
138 request_path = safe_str(request_path)
139
139
140 if path.startswith(('data:', 'javascript:', '#', ':')):
140 if path.startswith(('data:', 'javascript:', '#', ':')):
141 # skip data, anchor, invalid links
141 # skip data, anchor, invalid links
142 return path
142 return path
143
143
144 is_absolute = bool(urllib.parse.urlparse(path).netloc)
144 is_absolute = bool(urllib.parse.urlparse(path).netloc)
145 if is_absolute:
145 if is_absolute:
146 return path
146 return path
147
147
148 if not request_path:
148 if not request_path:
149 return path
149 return path
150
150
151 if path.startswith('/'):
151 if path.startswith('/'):
152 path = path[1:]
152 path = path[1:]
153
153
154 if path.startswith('./'):
154 if path.startswith('./'):
155 path = path[2:]
155 path = path[2:]
156
156
157 parts = request_path.split('/')
157 parts = request_path.split('/')
158 # compute how deep we need to traverse the request_path
158 # compute how deep we need to traverse the request_path
159 depth = 0
159 depth = 0
160
160
161 if is_repo_file(request_path):
161 if is_repo_file(request_path):
162 # if request path is a VALID file, we use a relative path with
162 # if request path is a VALID file, we use a relative path with
163 # one level up
163 # one level up
164 depth += 1
164 depth += 1
165
165
166 while path.startswith('../'):
166 while path.startswith('../'):
167 depth += 1
167 depth += 1
168 path = path[3:]
168 path = path[3:]
169
169
170 if depth > 0:
170 if depth > 0:
171 parts = parts[:-depth]
171 parts = parts[:-depth]
172
172
173 parts.append(path)
173 parts.append(path)
174 final_path = '/'.join(parts).lstrip('/')
174 final_path = '/'.join(parts).lstrip('/')
175
175
176 return '/' + final_path
176 return '/' + final_path
177
177
178
178
179 _cached_markdown_renderer = None
179 _cached_markdown_renderer = None
180
180
181
181
182 def get_markdown_renderer(extensions, output_format):
182 def get_markdown_renderer(extensions, output_format):
183 global _cached_markdown_renderer
183 global _cached_markdown_renderer
184
184
185 if _cached_markdown_renderer is None:
185 if _cached_markdown_renderer is None:
186 _cached_markdown_renderer = markdown.Markdown(
186 _cached_markdown_renderer = markdown.Markdown(
187 extensions=extensions + ['legacy_attrs'],
187 extensions=extensions + ['legacy_attrs'],
188 output_format=output_format)
188 output_format=output_format)
189 return _cached_markdown_renderer
189 return _cached_markdown_renderer
190
190
191
191
192 def get_markdown_renderer_flavored(extensions, output_format):
192 def get_markdown_renderer_flavored(extensions, output_format):
193 """
193 """
194 Dummy wrapper to mimic markdown API and render github HTML rendered
194 Dummy wrapper to mimic markdown API and render github HTML rendered
195
195
196 """
196 """
197 md = get_markdown_renderer(extensions, output_format)
197 md = get_markdown_renderer(extensions, output_format)
198
198
199 class GFM(object):
199 class GFM(object):
200 def convert(self, source):
200 def convert(self, source):
201 with pycmarkgfm.parse_gfm(source, options=pycmarkgfm.options.hardbreaks) as document:
201 with pycmarkgfm.parse_gfm(source, options=pycmarkgfm.options.hardbreaks) as document:
202 parsed_md = document.to_commonmark()
202 parsed_md = document.to_commonmark()
203 return md.convert(parsed_md)
203 return md.convert(parsed_md)
204
204
205 return GFM()
205 return GFM()
206
206
207
207
208 class MarkupRenderer(object):
208 class MarkupRenderer(object):
209 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
209 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
210
210
211 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
211 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
212 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
212 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
213 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
213 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
214 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
214 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
215
215
216 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
216 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
217 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
217 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
218
218
219 MENTION_PAT = re.compile(MENTIONS_REGEX)
219 MENTION_PAT = re.compile(MENTIONS_REGEX)
220
220
221 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
221 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
222 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
222 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
223
223
224 output_format = 'html4'
224 output_format = 'html4'
225
225
226 # extension together with weights. Lower is first means we control how
226 # extension together with weights. Lower is first means we control how
227 # extensions are attached to readme names with those.
227 # extensions are attached to readme names with those.
228 PLAIN_EXTS = [
228 PLAIN_EXTS = [
229 # prefer no extension
229 # prefer no extension
230 ('', 0), # special case that renders READMES names without extension
230 ('', 0), # special case that renders READMES names without extension
231 ('.text', 2), ('.TEXT', 2),
231 ('.text', 2), ('.TEXT', 2),
232 ('.txt', 3), ('.TXT', 3)
232 ('.txt', 3), ('.TXT', 3)
233 ]
233 ]
234
234
235 RST_EXTS = [
235 RST_EXTS = [
236 ('.rst', 1), ('.rest', 1),
236 ('.rst', 1), ('.rest', 1),
237 ('.RST', 2), ('.REST', 2)
237 ('.RST', 2), ('.REST', 2)
238 ]
238 ]
239
239
240 MARKDOWN_EXTS = [
240 MARKDOWN_EXTS = [
241 ('.md', 1), ('.MD', 1),
241 ('.md', 1), ('.MD', 1),
242 ('.mkdn', 2), ('.MKDN', 2),
242 ('.mkdn', 2), ('.MKDN', 2),
243 ('.mdown', 3), ('.MDOWN', 3),
243 ('.mdown', 3), ('.MDOWN', 3),
244 ('.markdown', 4), ('.MARKDOWN', 4)
244 ('.markdown', 4), ('.MARKDOWN', 4)
245 ]
245 ]
246
246
247 def _detect_renderer(self, source, filename=None):
247 def _detect_renderer(self, source, filename=None):
248 """
248 """
249 runs detection of what renderer should be used for generating html
249 runs detection of what renderer should be used for generating html
250 from a markup language
250 from a markup language
251
251
252 filename can be also explicitly a renderer name
252 filename can be also explicitly a renderer name
253
253
254 :param source:
254 :param source:
255 :param filename:
255 :param filename:
256 """
256 """
257
257
258 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
258 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
259 detected_renderer = 'markdown'
259 detected_renderer = 'markdown'
260 elif MarkupRenderer.RST_PAT.findall(filename):
260 elif MarkupRenderer.RST_PAT.findall(filename):
261 detected_renderer = 'rst'
261 detected_renderer = 'rst'
262 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
262 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
263 detected_renderer = 'jupyter'
263 detected_renderer = 'jupyter'
264 elif MarkupRenderer.PLAIN_PAT.findall(filename):
264 elif MarkupRenderer.PLAIN_PAT.findall(filename):
265 detected_renderer = 'plain'
265 detected_renderer = 'plain'
266 else:
266 else:
267 detected_renderer = 'plain'
267 detected_renderer = 'plain'
268
268
269 return getattr(MarkupRenderer, detected_renderer)
269 return getattr(MarkupRenderer, detected_renderer)
270
270
271 @classmethod
271 @classmethod
272 def sanitize_html(cls, text):
272 def sanitize_html(cls, text):
273 from .html_filters import sanitize_html
273 from .html_filters import sanitize_html
274 return sanitize_html(text, markdown=True)
274 return sanitize_html(text, markdown=True)
275
275
276 @classmethod
276 @classmethod
277 def renderer_from_filename(cls, filename, exclude):
277 def renderer_from_filename(cls, filename, exclude):
278 """
278 """
279 Detect renderer markdown/rst from filename and optionally use exclude
279 Detect renderer markdown/rst from filename and optionally use exclude
280 list to remove some options. This is mostly used in helpers.
280 list to remove some options. This is mostly used in helpers.
281 Returns None when no renderer can be detected.
281 Returns None when no renderer can be detected.
282 """
282 """
283 def _filter(elements):
283 def _filter(elements):
284 if isinstance(exclude, (list, tuple)):
284 if isinstance(exclude, (list, tuple)):
285 return [x for x in elements if x not in exclude]
285 return [x for x in elements if x not in exclude]
286 return elements
286 return elements
287
287
288 if filename.endswith(
288 if filename.endswith(
289 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
289 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
290 return 'markdown'
290 return 'markdown'
291 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
291 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
292 return 'rst'
292 return 'rst'
293
293
294 return None
294 return None
295
295
296 def render(self, source, filename=None):
296 def render(self, source, filename=None):
297 """
297 """
298 Renders a given filename using detected renderer
298 Renders a given filename using detected renderer
299 it detects renderers based on file extension or mimetype.
299 it detects renderers based on file extension or mimetype.
300 At last it will just do a simple html replacing new lines with <br/>
300 At last it will just do a simple html replacing new lines with <br/>
301 """
301 """
302
302
303 renderer = self._detect_renderer(source, filename)
303 renderer = self._detect_renderer(source, filename)
304 readme_data = renderer(source)
304 readme_data = renderer(source)
305 return readme_data
305 return readme_data
306
306
307 @classmethod
307 @classmethod
308 def urlify_text(cls, text):
308 def urlify_text(cls, text):
309 def url_func(match_obj):
309 def url_func(match_obj):
310 url_full = match_obj.groups()[0]
310 url_full = match_obj.groups()[0]
311 return f'<a href="{url_full}">{url_full}</a>'
311 return f'<a href="{url_full}">{url_full}</a>'
312
312
313 return cls.URL_PAT.sub(url_func, text)
313 return cls.URL_PAT.sub(url_func, text)
314
314
315 @classmethod
315 @classmethod
316 def convert_mentions(cls, text, mode):
316 def convert_mentions(cls, text, mode):
317 mention_pat = cls.MENTION_PAT
317 mention_pat = cls.MENTION_PAT
318
318
319 def wrapp(match_obj):
319 def wrapp(match_obj):
320 uname = match_obj.groups()[0]
320 uname = match_obj.groups()[0]
321 hovercard_url = "pyroutes.url('hovercard_username', {'username': '%s'});" % uname
321 hovercard_url = "pyroutes.url('hovercard_username', {'username': '%s'});" % uname
322
322
323 if mode == 'markdown':
323 if mode == 'markdown':
324 tmpl = '<strong class="tooltip-hovercard" data-hovercard-alt="{uname}" data-hovercard-url="{hovercard_url}">@{uname}</strong>'
324 tmpl = '<strong class="tooltip-hovercard" data-hovercard-alt="{uname}" data-hovercard-url="{hovercard_url}">@{uname}</strong>'
325 elif mode == 'rst':
325 elif mode == 'rst':
326 tmpl = ' **@{uname}** '
326 tmpl = ' **@{uname}** '
327 else:
327 else:
328 raise ValueError('mode must be rst or markdown')
328 raise ValueError('mode must be rst or markdown')
329
329
330 return tmpl.format(**{'uname': uname,
330 return tmpl.format(**{'uname': uname,
331 'hovercard_url': hovercard_url})
331 'hovercard_url': hovercard_url})
332
332
333 return mention_pat.sub(wrapp, text).strip()
333 return mention_pat.sub(wrapp, text).strip()
334
334
335 @classmethod
335 @classmethod
336 def plain(cls, source, universal_newline=True, leading_newline=True):
336 def plain(cls, source, universal_newline=True, leading_newline=True):
337 source = safe_str(source)
337 source = safe_str(source)
338 if universal_newline:
338 if universal_newline:
339 newline = '\n'
339 newline = '\n'
340 source = newline.join(source.splitlines())
340 source = newline.join(source.splitlines())
341
341
342 rendered_source = cls.urlify_text(source)
342 rendered_source = cls.urlify_text(source)
343 source = ''
343 source = ''
344 if leading_newline:
344 if leading_newline:
345 source += '<br />'
345 source += '<br />'
346 source += rendered_source.replace("\n", '<br />')
346 source += rendered_source.replace("\n", '<br />')
347
347
348 rendered = cls.sanitize_html(source)
348 rendered = cls.sanitize_html(source)
349 return rendered
349 return rendered
350
350
351 @classmethod
351 @classmethod
352 def markdown(cls, source, safe=True, flavored=True, mentions=False,
352 def markdown(cls, source, safe=True, flavored=True, mentions=False,
353 clean_html=True):
353 clean_html=True):
354 """
354 """
355 returns markdown rendered code cleaned by the bleach library
355 returns markdown rendered code cleaned by the bleach library
356 """
356 """
357
357
358 if flavored:
358 if flavored:
359 markdown_renderer = get_markdown_renderer_flavored(
359 markdown_renderer = get_markdown_renderer_flavored(
360 cls.extensions, cls.output_format)
360 cls.extensions, cls.output_format)
361 else:
361 else:
362 markdown_renderer = get_markdown_renderer(
362 markdown_renderer = get_markdown_renderer(
363 cls.extensions, cls.output_format)
363 cls.extensions, cls.output_format)
364
364
365 if mentions:
365 if mentions:
366 mention_hl = cls.convert_mentions(source, mode='markdown')
366 mention_hl = cls.convert_mentions(source, mode='markdown')
367 # we extracted mentions render with this using Mentions false
367 # we extracted mentions render with this using Mentions false
368 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
368 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
369 mentions=False)
369 mentions=False)
370
370
371 try:
371 try:
372 rendered = markdown_renderer.convert(source)
372 rendered = markdown_renderer.convert(source)
373
373
374 except Exception:
374 except Exception:
375 log.exception('Error when rendering Markdown')
375 log.exception('Error when rendering Markdown')
376 if safe:
376 if safe:
377 log.debug('Fallback to render in plain mode')
377 log.debug('Fallback to render in plain mode')
378 rendered = cls.plain(source)
378 rendered = cls.plain(source)
379 else:
379 else:
380 raise
380 raise
381
381
382 if clean_html:
382 if clean_html:
383 rendered = cls.sanitize_html(rendered)
383 rendered = cls.sanitize_html(rendered)
384 return rendered
384 return rendered
385
385
386 @classmethod
386 @classmethod
387 def rst(cls, source, safe=True, mentions=False, clean_html=False):
387 def rst(cls, source, safe=True, mentions=False, clean_html=False):
388
388
389 if mentions:
389 if mentions:
390 mention_hl = cls.convert_mentions(source, mode='rst')
390 mention_hl = cls.convert_mentions(source, mode='rst')
391 # we extracted mentions render with this using Mentions false
391 # we extracted mentions render with this using Mentions false
392 return cls.rst(mention_hl, safe=safe, mentions=False)
392 return cls.rst(mention_hl, safe=safe, mentions=False)
393
393
394 source = safe_str(source)
394 source = safe_str(source)
395 try:
395 try:
396 docutils_settings = dict(
396 docutils_settings = dict(
397 [(alias, None) for alias in
397 [(alias, None) for alias in
398 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
398 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
399
399
400 docutils_settings.update({
400 docutils_settings.update({
401 'input_encoding': 'unicode',
401 'input_encoding': 'unicode',
402 'report_level': 4,
402 'report_level': 4,
403 'syntax_highlight': 'short',
403 'syntax_highlight': 'short',
404 })
404 })
405
405
406 for k, v in list(docutils_settings.items()):
406 for k, v in list(docutils_settings.items()):
407 directives.register_directive(k, v)
407 directives.register_directive(k, v)
408
408
409 parts = publish_parts(source=source,
409 parts = publish_parts(source=source,
410 writer=RhodeCodeWriter(),
410 writer=RhodeCodeWriter(),
411 settings_overrides=docutils_settings)
411 settings_overrides=docutils_settings)
412 rendered = parts["fragment"]
412 rendered = parts["fragment"]
413 if clean_html:
413 if clean_html:
414 rendered = cls.sanitize_html(rendered)
414 rendered = cls.sanitize_html(rendered)
415 return parts['html_title'] + rendered
415 return parts['html_title'] + rendered
416 except Exception:
416 except Exception:
417 log.exception('Error when rendering RST')
417 log.exception('Error when rendering RST')
418 if safe:
418 if safe:
419 log.debug('Fallback to render in plain mode')
419 log.debug('Fallback to render in plain mode')
420 return cls.plain(source)
420 return cls.plain(source)
421 else:
421 else:
422 raise
422 raise
423
423
424 @classmethod
424 @classmethod
425 def jupyter(cls, source, safe=True):
425 def jupyter(cls, source, safe=True):
426 from rhodecode.lib import helpers
426 from rhodecode.lib import helpers
427 from .html_sanitizer_defs import markdown_attrs, all_tags, all_styles
427 from .html_sanitizer_defs import markdown_attrs, all_tags, all_styles
428
428
429 from traitlets import default, config
429 from traitlets import default, config
430 import nbformat
430 import nbformat
431 from nbconvert import HTMLExporter
431 from nbconvert import HTMLExporter
432 from nbconvert.preprocessors import Preprocessor
432 from nbconvert.preprocessors import Preprocessor
433 from nbconvert.preprocessors.sanitize import SanitizeHTML
433 from nbconvert.preprocessors.sanitize import SanitizeHTML
434
434
435 class CustomHTMLExporter(HTMLExporter):
435 class CustomHTMLExporter(HTMLExporter):
436
436
437 @default("template_file")
437 @default("template_file")
438 def _template_file_default(self):
438 def _template_file_default(self):
439 if self.template_extension:
439 if self.template_extension:
440 return "basic/index" + self.template_extension
440 return "basic/index" + self.template_extension
441
441
442 class Sandbox(Preprocessor):
442 class Sandbox(Preprocessor):
443
443
444 def preprocess_cell(self, cell, resources, cell_index):
444 def preprocess_cell(self, cell, resources, cell_index):
445 if not safe:
445 if not safe:
446 return cell, resources
446 return cell, resources
447 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
447 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
448 if cell.cell_type == "markdown":
448 if cell.cell_type == "markdown":
449 cell.source = cls.sanitize_html(cell.source)
449 cell.source = cls.sanitize_html(cell.source)
450 return cell, resources
450 return cell, resources
451
451
452 for cell_output in cell.outputs:
452 for cell_output in cell.get('outputs', []):
453 if 'data' in cell_output:
453 if 'data' in cell_output:
454 if 'application/javascript' in cell_output['data']:
454 if 'application/javascript' in cell_output['data']:
455 cell_output['data']['text/plain'] = sandbox_text
455 cell_output['data']['text/plain'] = sandbox_text
456 cell_output['data'].pop('application/javascript', None)
456 cell_output['data'].pop('application/javascript', None)
457 return cell, resources
457 return cell, resources
458
458
459 def _sanitize_resources(input_resources):
459 def _sanitize_resources(input_resources):
460 """
460 """
461 Skip/sanitize some of the CSS generated and included in jupyter
461 Skip/sanitize some of the CSS generated and included in jupyter
462 so it doesn't mess up UI so much
462 so it doesn't mess up UI so much
463 """
463 """
464
464
465 # TODO(marcink): probably we should replace this with whole custom
465 # TODO(marcink): probably we should replace this with whole custom
466 # CSS set that doesn't screw up, but jupyter generated html has some
466 # CSS set that doesn't screw up, but jupyter generated html has some
467 # special markers, so it requires Custom HTML exporter template with
467 # special markers, so it requires Custom HTML exporter template with
468 # _default_template_path_default, to achieve that
468 # _default_template_path_default, to achieve that
469
469
470 # strip the reset CSS
470 # strip the reset CSS
471 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
471 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
472 return input_resources
472 return input_resources
473
473
474 def as_html(notebook):
474 def as_html(notebook):
475 conf = config.Config()
475 conf = config.Config()
476 # TODO: Keep an eye on the order of preprocessors
476 # TODO: Keep an eye on the order of preprocessors
477 conf.CustomHTMLExporter.default_preprocessors = [Sandbox, SanitizeHTML]
477 conf.CustomHTMLExporter.default_preprocessors = [Sandbox, SanitizeHTML]
478 conf.Sandbox.enabled = True
478 conf.Sandbox.enabled = True
479 conf.SanitizeHTML.enabled = True
479 conf.SanitizeHTML.enabled = True
480 conf.SanitizeHTML.attributes = markdown_attrs
480 conf.SanitizeHTML.attributes = markdown_attrs
481 conf.SanitizeHTML.tags = all_tags
481 conf.SanitizeHTML.tags = all_tags
482 conf.SanitizeHTML.styles = all_styles
482 conf.SanitizeHTML.styles = all_styles
483 conf.SanitizeHTML.sanitized_output_types = {
483 conf.SanitizeHTML.sanitized_output_types = {
484 "text/html",
484 "text/html",
485 "text/markdown",
485 "text/markdown",
486 }
486 }
487 conf.SanitizeHTML.safe_output_keys = {
487 conf.SanitizeHTML.safe_output_keys = {
488 "metadata",
488 "metadata",
489 "text/plain",
489 "text/plain",
490 "text/latex",
490 "text/latex",
491 "application/json",
491 "application/json",
492 "image/png",
492 "image/png",
493 "image/jpg"
493 "image/jpg"
494 "image/jpeg",
494 "image/jpeg",
495 "image/svg",
495 "image/svg",
496 "image/svg+xml"
496 "image/svg+xml"
497 }
497 }
498
498
499 html_exporter = CustomHTMLExporter(config=conf)
499 html_exporter = CustomHTMLExporter(config=conf)
500
500
501 (body, resources) = html_exporter.from_notebook_node(notebook)
501 (body, resources) = html_exporter.from_notebook_node(notebook)
502
502
503 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
503 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
504 js = MakoTemplate(r'''
504 js = MakoTemplate(r'''
505 <!-- MathJax configuration -->
505 <!-- MathJax configuration -->
506 <script type="text/x-mathjax-config">
506 <script type="text/x-mathjax-config">
507 MathJax.Hub.Config({
507 MathJax.Hub.Config({
508 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
508 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
509 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
509 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
510 TeX: {
510 TeX: {
511 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
511 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
512 },
512 },
513 tex2jax: {
513 tex2jax: {
514 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
514 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
515 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
515 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
516 processEscapes: true,
516 processEscapes: true,
517 processEnvironments: true
517 processEnvironments: true
518 },
518 },
519 // Center justify equations in code and markdown cells. Elsewhere
519 // Center justify equations in code and markdown cells. Elsewhere
520 // we use CSS to left justify single line equations in code cells.
520 // we use CSS to left justify single line equations in code cells.
521 displayAlign: 'center',
521 displayAlign: 'center',
522 "HTML-CSS": {
522 "HTML-CSS": {
523 styles: {'.MathJax_Display': {"margin": 0}},
523 styles: {'.MathJax_Display': {"margin": 0}},
524 linebreaks: { automatic: true },
524 linebreaks: { automatic: true },
525 availableFonts: ["STIX", "TeX"]
525 availableFonts: ["STIX", "TeX"]
526 },
526 },
527 showMathMenu: false
527 showMathMenu: false
528 });
528 });
529 </script>
529 </script>
530 <!-- End of MathJax configuration -->
530 <!-- End of MathJax configuration -->
531 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
531 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
532 ''').render(h=helpers)
532 ''').render(h=helpers)
533
533
534 css = MakoTemplate(r'''
534 css = MakoTemplate(r'''
535 <link rel="stylesheet" type="text/css" href="${h.asset('css/style-ipython.css', ver=ver)}" media="screen"/>
535 <link rel="stylesheet" type="text/css" href="${h.asset('css/style-ipython.css', ver=ver)}" media="screen"/>
536 ''').render(h=helpers, ver='ver1')
536 ''').render(h=helpers, ver='ver1')
537
537
538 body = '\n'.join([header, css, js, body])
538 body = '\n'.join([header, css, js, body])
539 return body, resources
539 return body, resources
540
540
541 captured_errors = {}
541 captured_errors = {}
542 error_body = """
542 error_body = """
543 <div style="text-align: center;">
543 <div style="text-align: center;">
544 <h3>Invalid Notebook!</h3>
544 <h3>Invalid Notebook!</h3>
545 <p>{}</p>
545 <p>{}</p>
546 </div>
546 </div>
547 """
547 """
548 # TODO: In the event of a newer jupyter notebook version, consider increasing the as_version parameter
548 # TODO: In the event of a newer jupyter notebook version, consider increasing the as_version parameter
549 notebook = nbformat.reads(source, as_version=4, capture_validation_error=captured_errors)
549 notebook = nbformat.reads(source, as_version=4, capture_validation_error=captured_errors)
550 if captured_errors:
550 if captured_errors:
551 error_messages = '<br>'.join(str(error) for error in captured_errors.values())
551 error_messages = '<br>'.join(str(error) for error in captured_errors.values())
552 body = error_body.format(error_messages)
552 body = error_body.format(error_messages)
553 else:
553 else:
554 try:
554 try:
555 body, _ = as_html(notebook)
555 body, _ = as_html(notebook)
556 except AttributeError:
556 except (AttributeError, nbformat.ValidationError):
557 try:
557 try:
558 nbformat.validate(nbformat.reader.reads(source))
558 nbformat.validate(nbformat.reader.reads(source))
559 except nbformat.ValidationError as exc:
559 except nbformat.ValidationError as exc:
560 body = error_body.format(str(exc))
560 body = error_body.format(str(exc))
561 else:
561 else:
562 raise
562 raise
563 return body
563 return body
564
564
565
565
566 class RstTemplateRenderer(object):
566 class RstTemplateRenderer(object):
567
567
568 def __init__(self):
568 def __init__(self):
569 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
569 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
570 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
570 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
571 self.template_store = TemplateLookup(
571 self.template_store = TemplateLookup(
572 directories=rst_template_dirs,
572 directories=rst_template_dirs,
573 input_encoding='utf-8',
573 input_encoding='utf-8',
574 imports=['from rhodecode.lib import helpers as h'])
574 imports=['from rhodecode.lib import helpers as h'])
575
575
576 def _get_template(self, templatename):
576 def _get_template(self, templatename):
577 return self.template_store.get_template(templatename)
577 return self.template_store.get_template(templatename)
578
578
579 def render(self, template_name, **kwargs):
579 def render(self, template_name, **kwargs):
580 template = self._get_template(template_name)
580 template = self._get_template(template_name)
581 return template.render(**kwargs)
581 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now