##// END OF EJS Templates
jupyter: sanitize markdown cells similar as we do for our own markdown cleanup.
marcink -
r3148:fb1dc128 default
parent child Browse files
Show More
@@ -1,526 +1,534 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urlparse
30 import urlparse
31 import bleach
31 import bleach
32
32
33 from mako.lookup import TemplateLookup
33 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
34 from mako.template import Template as MakoTemplate
35
35
36 from docutils.core import publish_parts
36 from docutils.core import publish_parts
37 from docutils.parsers.rst import directives
37 from docutils.parsers.rst import directives
38 from docutils import writers
38 from docutils import writers
39 from docutils.writers import html4css1
39 from docutils.writers import html4css1
40 import markdown
40 import markdown
41
41
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import (
43 from rhodecode.lib.utils2 import (
44 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
44 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
45
45
46 log = logging.getLogger(__name__)
46 log = logging.getLogger(__name__)
47
47
48 # default renderer used to generate automated comments
48 # default renderer used to generate automated comments
49 DEFAULT_COMMENTS_RENDERER = 'rst'
49 DEFAULT_COMMENTS_RENDERER = 'rst'
50
50
51
51
52 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
52 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
53 """
53 """
54 Custom HTML Translator used for sandboxing potential
54 Custom HTML Translator used for sandboxing potential
55 JS injections in ref links
55 JS injections in ref links
56 """
56 """
57
57
58 def visit_reference(self, node):
58 def visit_reference(self, node):
59 if 'refuri' in node.attributes:
59 if 'refuri' in node.attributes:
60 refuri = node['refuri']
60 refuri = node['refuri']
61 if ':' in refuri:
61 if ':' in refuri:
62 prefix, link = refuri.lstrip().split(':', 1)
62 prefix, link = refuri.lstrip().split(':', 1)
63 prefix = prefix or ''
63 prefix = prefix or ''
64
64
65 if prefix.lower() == 'javascript':
65 if prefix.lower() == 'javascript':
66 # we don't allow javascript type of refs...
66 # we don't allow javascript type of refs...
67 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
67 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
68
68
69 # old style class requires this...
69 # old style class requires this...
70 return html4css1.HTMLTranslator.visit_reference(self, node)
70 return html4css1.HTMLTranslator.visit_reference(self, node)
71
71
72
72
73 class RhodeCodeWriter(writers.html4css1.Writer):
73 class RhodeCodeWriter(writers.html4css1.Writer):
74 def __init__(self):
74 def __init__(self):
75 writers.Writer.__init__(self)
75 writers.Writer.__init__(self)
76 self.translator_class = CustomHTMLTranslator
76 self.translator_class = CustomHTMLTranslator
77
77
78
78
79 def relative_links(html_source, server_paths):
79 def relative_links(html_source, server_paths):
80 if not html_source:
80 if not html_source:
81 return html_source
81 return html_source
82
82
83 try:
83 try:
84 from lxml.html import fromstring
84 from lxml.html import fromstring
85 from lxml.html import tostring
85 from lxml.html import tostring
86 except ImportError:
86 except ImportError:
87 log.exception('Failed to import lxml')
87 log.exception('Failed to import lxml')
88 return html_source
88 return html_source
89
89
90 try:
90 try:
91 doc = lxml.html.fromstring(html_source)
91 doc = lxml.html.fromstring(html_source)
92 except Exception:
92 except Exception:
93 return html_source
93 return html_source
94
94
95 for el in doc.cssselect('img, video'):
95 for el in doc.cssselect('img, video'):
96 src = el.attrib.get('src')
96 src = el.attrib.get('src')
97 if src:
97 if src:
98 el.attrib['src'] = relative_path(src, server_paths['raw'])
98 el.attrib['src'] = relative_path(src, server_paths['raw'])
99
99
100 for el in doc.cssselect('a:not(.gfm)'):
100 for el in doc.cssselect('a:not(.gfm)'):
101 src = el.attrib.get('href')
101 src = el.attrib.get('href')
102 if src:
102 if src:
103 raw_mode = el.attrib['href'].endswith('?raw=1')
103 raw_mode = el.attrib['href'].endswith('?raw=1')
104 if raw_mode:
104 if raw_mode:
105 el.attrib['href'] = relative_path(src, server_paths['raw'])
105 el.attrib['href'] = relative_path(src, server_paths['raw'])
106 else:
106 else:
107 el.attrib['href'] = relative_path(src, server_paths['standard'])
107 el.attrib['href'] = relative_path(src, server_paths['standard'])
108
108
109 return lxml.html.tostring(doc)
109 return lxml.html.tostring(doc)
110
110
111
111
112 def relative_path(path, request_path, is_repo_file=None):
112 def relative_path(path, request_path, is_repo_file=None):
113 """
113 """
114 relative link support, path is a rel path, and request_path is current
114 relative link support, path is a rel path, and request_path is current
115 server path (not absolute)
115 server path (not absolute)
116
116
117 e.g.
117 e.g.
118
118
119 path = '../logo.png'
119 path = '../logo.png'
120 request_path= '/repo/files/path/file.md'
120 request_path= '/repo/files/path/file.md'
121 produces: '/repo/files/logo.png'
121 produces: '/repo/files/logo.png'
122 """
122 """
123 # TODO(marcink): unicode/str support ?
123 # TODO(marcink): unicode/str support ?
124 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
124 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
125
125
126 def dummy_check(p):
126 def dummy_check(p):
127 return True # assume default is a valid file path
127 return True # assume default is a valid file path
128
128
129 is_repo_file = is_repo_file or dummy_check
129 is_repo_file = is_repo_file or dummy_check
130 if not path:
130 if not path:
131 return request_path
131 return request_path
132
132
133 path = safe_unicode(path)
133 path = safe_unicode(path)
134 request_path = safe_unicode(request_path)
134 request_path = safe_unicode(request_path)
135
135
136 if path.startswith((u'data:', u'javascript:', u'#', u':')):
136 if path.startswith((u'data:', u'javascript:', u'#', u':')):
137 # skip data, anchor, invalid links
137 # skip data, anchor, invalid links
138 return path
138 return path
139
139
140 is_absolute = bool(urlparse.urlparse(path).netloc)
140 is_absolute = bool(urlparse.urlparse(path).netloc)
141 if is_absolute:
141 if is_absolute:
142 return path
142 return path
143
143
144 if not request_path:
144 if not request_path:
145 return path
145 return path
146
146
147 if path.startswith(u'/'):
147 if path.startswith(u'/'):
148 path = path[1:]
148 path = path[1:]
149
149
150 if path.startswith(u'./'):
150 if path.startswith(u'./'):
151 path = path[2:]
151 path = path[2:]
152
152
153 parts = request_path.split('/')
153 parts = request_path.split('/')
154 # compute how deep we need to traverse the request_path
154 # compute how deep we need to traverse the request_path
155 depth = 0
155 depth = 0
156
156
157 if is_repo_file(request_path):
157 if is_repo_file(request_path):
158 # if request path is a VALID file, we use a relative path with
158 # if request path is a VALID file, we use a relative path with
159 # one level up
159 # one level up
160 depth += 1
160 depth += 1
161
161
162 while path.startswith(u'../'):
162 while path.startswith(u'../'):
163 depth += 1
163 depth += 1
164 path = path[3:]
164 path = path[3:]
165
165
166 if depth > 0:
166 if depth > 0:
167 parts = parts[:-depth]
167 parts = parts[:-depth]
168
168
169 parts.append(path)
169 parts.append(path)
170 final_path = u'/'.join(parts).lstrip(u'/')
170 final_path = u'/'.join(parts).lstrip(u'/')
171
171
172 return u'/' + final_path
172 return u'/' + final_path
173
173
174
174
175 class MarkupRenderer(object):
175 class MarkupRenderer(object):
176 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
176 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
177
177
178 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
178 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
179 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
179 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
180 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
180 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
181 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
181 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
182
182
183 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
183 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
184 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
184 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
185
185
186 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
186 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
187 output_format = 'html4'
187 output_format = 'html4'
188 markdown_renderer = markdown.Markdown(
188 markdown_renderer = markdown.Markdown(
189 extensions, enable_attributes=False, output_format=output_format)
189 extensions, enable_attributes=False, output_format=output_format)
190
190
191 markdown_renderer_flavored = markdown.Markdown(
191 markdown_renderer_flavored = markdown.Markdown(
192 extensions + [GithubFlavoredMarkdownExtension()],
192 extensions + [GithubFlavoredMarkdownExtension()],
193 enable_attributes=False, output_format=output_format)
193 enable_attributes=False, output_format=output_format)
194
194
195 # extension together with weights. Lower is first means we control how
195 # extension together with weights. Lower is first means we control how
196 # extensions are attached to readme names with those.
196 # extensions are attached to readme names with those.
197 PLAIN_EXTS = [
197 PLAIN_EXTS = [
198 # prefer no extension
198 # prefer no extension
199 ('', 0), # special case that renders READMES names without extension
199 ('', 0), # special case that renders READMES names without extension
200 ('.text', 2), ('.TEXT', 2),
200 ('.text', 2), ('.TEXT', 2),
201 ('.txt', 3), ('.TXT', 3)
201 ('.txt', 3), ('.TXT', 3)
202 ]
202 ]
203
203
204 RST_EXTS = [
204 RST_EXTS = [
205 ('.rst', 1), ('.rest', 1),
205 ('.rst', 1), ('.rest', 1),
206 ('.RST', 2), ('.REST', 2)
206 ('.RST', 2), ('.REST', 2)
207 ]
207 ]
208
208
209 MARKDOWN_EXTS = [
209 MARKDOWN_EXTS = [
210 ('.md', 1), ('.MD', 1),
210 ('.md', 1), ('.MD', 1),
211 ('.mkdn', 2), ('.MKDN', 2),
211 ('.mkdn', 2), ('.MKDN', 2),
212 ('.mdown', 3), ('.MDOWN', 3),
212 ('.mdown', 3), ('.MDOWN', 3),
213 ('.markdown', 4), ('.MARKDOWN', 4)
213 ('.markdown', 4), ('.MARKDOWN', 4)
214 ]
214 ]
215
215
216 def _detect_renderer(self, source, filename=None):
216 def _detect_renderer(self, source, filename=None):
217 """
217 """
218 runs detection of what renderer should be used for generating html
218 runs detection of what renderer should be used for generating html
219 from a markup language
219 from a markup language
220
220
221 filename can be also explicitly a renderer name
221 filename can be also explicitly a renderer name
222
222
223 :param source:
223 :param source:
224 :param filename:
224 :param filename:
225 """
225 """
226
226
227 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
227 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
228 detected_renderer = 'markdown'
228 detected_renderer = 'markdown'
229 elif MarkupRenderer.RST_PAT.findall(filename):
229 elif MarkupRenderer.RST_PAT.findall(filename):
230 detected_renderer = 'rst'
230 detected_renderer = 'rst'
231 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
231 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
232 detected_renderer = 'jupyter'
232 detected_renderer = 'jupyter'
233 elif MarkupRenderer.PLAIN_PAT.findall(filename):
233 elif MarkupRenderer.PLAIN_PAT.findall(filename):
234 detected_renderer = 'plain'
234 detected_renderer = 'plain'
235 else:
235 else:
236 detected_renderer = 'plain'
236 detected_renderer = 'plain'
237
237
238 return getattr(MarkupRenderer, detected_renderer)
238 return getattr(MarkupRenderer, detected_renderer)
239
239
240 @classmethod
240 @classmethod
241 def bleach_clean(cls, text):
241 def bleach_clean(cls, text):
242 from .bleach_whitelist import markdown_attrs, markdown_tags
242 from .bleach_whitelist import markdown_attrs, markdown_tags
243 allowed_tags = markdown_tags
243 allowed_tags = markdown_tags
244 allowed_attrs = markdown_attrs
244 allowed_attrs = markdown_attrs
245
245
246 try:
246 try:
247 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
247 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
248 except Exception:
248 except Exception:
249 return 'UNPARSEABLE TEXT'
249 return 'UNPARSEABLE TEXT'
250
250
251 @classmethod
251 @classmethod
252 def renderer_from_filename(cls, filename, exclude):
252 def renderer_from_filename(cls, filename, exclude):
253 """
253 """
254 Detect renderer markdown/rst from filename and optionally use exclude
254 Detect renderer markdown/rst from filename and optionally use exclude
255 list to remove some options. This is mostly used in helpers.
255 list to remove some options. This is mostly used in helpers.
256 Returns None when no renderer can be detected.
256 Returns None when no renderer can be detected.
257 """
257 """
258 def _filter(elements):
258 def _filter(elements):
259 if isinstance(exclude, (list, tuple)):
259 if isinstance(exclude, (list, tuple)):
260 return [x for x in elements if x not in exclude]
260 return [x for x in elements if x not in exclude]
261 return elements
261 return elements
262
262
263 if filename.endswith(
263 if filename.endswith(
264 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
264 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
265 return 'markdown'
265 return 'markdown'
266 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
266 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
267 return 'rst'
267 return 'rst'
268
268
269 return None
269 return None
270
270
271 def render(self, source, filename=None):
271 def render(self, source, filename=None):
272 """
272 """
273 Renders a given filename using detected renderer
273 Renders a given filename using detected renderer
274 it detects renderers based on file extension or mimetype.
274 it detects renderers based on file extension or mimetype.
275 At last it will just do a simple html replacing new lines with <br/>
275 At last it will just do a simple html replacing new lines with <br/>
276
276
277 :param file_name:
277 :param file_name:
278 :param source:
278 :param source:
279 """
279 """
280
280
281 renderer = self._detect_renderer(source, filename)
281 renderer = self._detect_renderer(source, filename)
282 readme_data = renderer(source)
282 readme_data = renderer(source)
283 return readme_data
283 return readme_data
284
284
285 @classmethod
285 @classmethod
286 def _flavored_markdown(cls, text):
286 def _flavored_markdown(cls, text):
287 """
287 """
288 Github style flavored markdown
288 Github style flavored markdown
289
289
290 :param text:
290 :param text:
291 """
291 """
292
292
293 # Extract pre blocks.
293 # Extract pre blocks.
294 extractions = {}
294 extractions = {}
295
295
296 def pre_extraction_callback(matchobj):
296 def pre_extraction_callback(matchobj):
297 digest = md5_safe(matchobj.group(0))
297 digest = md5_safe(matchobj.group(0))
298 extractions[digest] = matchobj.group(0)
298 extractions[digest] = matchobj.group(0)
299 return "{gfm-extraction-%s}" % digest
299 return "{gfm-extraction-%s}" % digest
300 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
300 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
301 text = re.sub(pattern, pre_extraction_callback, text)
301 text = re.sub(pattern, pre_extraction_callback, text)
302
302
303 # Prevent foo_bar_baz from ending up with an italic word in the middle.
303 # Prevent foo_bar_baz from ending up with an italic word in the middle.
304 def italic_callback(matchobj):
304 def italic_callback(matchobj):
305 s = matchobj.group(0)
305 s = matchobj.group(0)
306 if list(s).count('_') >= 2:
306 if list(s).count('_') >= 2:
307 return s.replace('_', r'\_')
307 return s.replace('_', r'\_')
308 return s
308 return s
309 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
309 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
310
310
311 # Insert pre block extractions.
311 # Insert pre block extractions.
312 def pre_insert_callback(matchobj):
312 def pre_insert_callback(matchobj):
313 return '\n\n' + extractions[matchobj.group(1)]
313 return '\n\n' + extractions[matchobj.group(1)]
314 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
314 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
315 pre_insert_callback, text)
315 pre_insert_callback, text)
316
316
317 return text
317 return text
318
318
319 @classmethod
319 @classmethod
320 def urlify_text(cls, text):
320 def urlify_text(cls, text):
321 def url_func(match_obj):
321 def url_func(match_obj):
322 url_full = match_obj.groups()[0]
322 url_full = match_obj.groups()[0]
323 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
323 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
324
324
325 return cls.URL_PAT.sub(url_func, text)
325 return cls.URL_PAT.sub(url_func, text)
326
326
327 @classmethod
327 @classmethod
328 def plain(cls, source, universal_newline=True, leading_newline=True):
328 def plain(cls, source, universal_newline=True, leading_newline=True):
329 source = safe_unicode(source)
329 source = safe_unicode(source)
330 if universal_newline:
330 if universal_newline:
331 newline = '\n'
331 newline = '\n'
332 source = newline.join(source.splitlines())
332 source = newline.join(source.splitlines())
333
333
334 rendered_source = cls.urlify_text(source)
334 rendered_source = cls.urlify_text(source)
335 source = ''
335 source = ''
336 if leading_newline:
336 if leading_newline:
337 source += '<br />'
337 source += '<br />'
338 source += rendered_source.replace("\n", '<br />')
338 source += rendered_source.replace("\n", '<br />')
339 return source
339 return source
340
340
341 @classmethod
341 @classmethod
342 def markdown(cls, source, safe=True, flavored=True, mentions=False,
342 def markdown(cls, source, safe=True, flavored=True, mentions=False,
343 clean_html=True):
343 clean_html=True):
344 """
344 """
345 returns markdown rendered code cleaned by the bleach library
345 returns markdown rendered code cleaned by the bleach library
346 """
346 """
347
347
348 if flavored:
348 if flavored:
349 markdown_renderer = cls.markdown_renderer_flavored
349 markdown_renderer = cls.markdown_renderer_flavored
350 else:
350 else:
351 markdown_renderer = cls.markdown_renderer
351 markdown_renderer = cls.markdown_renderer
352
352
353 if mentions:
353 if mentions:
354 mention_pat = re.compile(MENTIONS_REGEX)
354 mention_pat = re.compile(MENTIONS_REGEX)
355
355
356 def wrapp(match_obj):
356 def wrapp(match_obj):
357 uname = match_obj.groups()[0]
357 uname = match_obj.groups()[0]
358 return ' **@%(uname)s** ' % {'uname': uname}
358 return ' **@%(uname)s** ' % {'uname': uname}
359 mention_hl = mention_pat.sub(wrapp, source).strip()
359 mention_hl = mention_pat.sub(wrapp, source).strip()
360 # we extracted mentions render with this using Mentions false
360 # we extracted mentions render with this using Mentions false
361 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
361 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
362 mentions=False)
362 mentions=False)
363
363
364 source = safe_unicode(source)
364 source = safe_unicode(source)
365
365
366 try:
366 try:
367 if flavored:
367 if flavored:
368 source = cls._flavored_markdown(source)
368 source = cls._flavored_markdown(source)
369 rendered = markdown_renderer.convert(source)
369 rendered = markdown_renderer.convert(source)
370 except Exception:
370 except Exception:
371 log.exception('Error when rendering Markdown')
371 log.exception('Error when rendering Markdown')
372 if safe:
372 if safe:
373 log.debug('Fallback to render in plain mode')
373 log.debug('Fallback to render in plain mode')
374 rendered = cls.plain(source)
374 rendered = cls.plain(source)
375 else:
375 else:
376 raise
376 raise
377
377
378 if clean_html:
378 if clean_html:
379 rendered = cls.bleach_clean(rendered)
379 rendered = cls.bleach_clean(rendered)
380 return rendered
380 return rendered
381
381
382 @classmethod
382 @classmethod
383 def rst(cls, source, safe=True, mentions=False, clean_html=False):
383 def rst(cls, source, safe=True, mentions=False, clean_html=False):
384 if mentions:
384 if mentions:
385 mention_pat = re.compile(MENTIONS_REGEX)
385 mention_pat = re.compile(MENTIONS_REGEX)
386
386
387 def wrapp(match_obj):
387 def wrapp(match_obj):
388 uname = match_obj.groups()[0]
388 uname = match_obj.groups()[0]
389 return ' **@%(uname)s** ' % {'uname': uname}
389 return ' **@%(uname)s** ' % {'uname': uname}
390 mention_hl = mention_pat.sub(wrapp, source).strip()
390 mention_hl = mention_pat.sub(wrapp, source).strip()
391 # we extracted mentions render with this using Mentions false
391 # we extracted mentions render with this using Mentions false
392 return cls.rst(mention_hl, safe=safe, mentions=False)
392 return cls.rst(mention_hl, safe=safe, mentions=False)
393
393
394 source = safe_unicode(source)
394 source = safe_unicode(source)
395 try:
395 try:
396 docutils_settings = dict(
396 docutils_settings = dict(
397 [(alias, None) for alias in
397 [(alias, None) for alias in
398 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
398 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
399
399
400 docutils_settings.update({
400 docutils_settings.update({
401 'input_encoding': 'unicode', 'report_level': 4})
401 'input_encoding': 'unicode', 'report_level': 4})
402
402
403 for k, v in docutils_settings.iteritems():
403 for k, v in docutils_settings.iteritems():
404 directives.register_directive(k, v)
404 directives.register_directive(k, v)
405
405
406 parts = publish_parts(source=source,
406 parts = publish_parts(source=source,
407 writer=RhodeCodeWriter(),
407 writer=RhodeCodeWriter(),
408 settings_overrides=docutils_settings)
408 settings_overrides=docutils_settings)
409 rendered = parts["fragment"]
409 rendered = parts["fragment"]
410 if clean_html:
410 if clean_html:
411 rendered = cls.bleach_clean(rendered)
411 rendered = cls.bleach_clean(rendered)
412 return parts['html_title'] + rendered
412 return parts['html_title'] + rendered
413 except Exception:
413 except Exception:
414 log.exception('Error when rendering RST')
414 log.exception('Error when rendering RST')
415 if safe:
415 if safe:
416 log.debug('Fallbacking to render in plain mode')
416 log.debug('Fallbacking to render in plain mode')
417 return cls.plain(source)
417 return cls.plain(source)
418 else:
418 else:
419 raise
419 raise
420
420
421 @classmethod
421 @classmethod
422 def jupyter(cls, source, safe=True):
422 def jupyter(cls, source, safe=True):
423 from rhodecode.lib import helpers
423 from rhodecode.lib import helpers
424
424
425 from traitlets.config import Config
425 from traitlets.config import Config
426 import nbformat
426 import nbformat
427 from nbconvert import HTMLExporter
427 from nbconvert import HTMLExporter
428 from nbconvert.preprocessors import Preprocessor
428 from nbconvert.preprocessors import Preprocessor
429
429
430 class CustomHTMLExporter(HTMLExporter):
430 class CustomHTMLExporter(HTMLExporter):
431 def _template_file_default(self):
431 def _template_file_default(self):
432 return 'basic'
432 return 'basic'
433
433
434 class Sandbox(Preprocessor):
434 class Sandbox(Preprocessor):
435
435
436 def preprocess(self, nb, resources):
436 def preprocess(self, nb, resources):
437 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
437 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
438 for cell in nb['cells']:
438 for cell in nb['cells']:
439 if safe and 'outputs' in cell:
439 if not safe:
440 continue
441
442 if 'outputs' in cell:
440 for cell_output in cell['outputs']:
443 for cell_output in cell['outputs']:
441 if 'data' in cell_output:
444 if 'data' in cell_output:
442 if 'application/javascript' in cell_output['data']:
445 if 'application/javascript' in cell_output['data']:
443 cell_output['data']['text/plain'] = sandbox_text
446 cell_output['data']['text/plain'] = sandbox_text
444 cell_output['data'].pop('application/javascript', None)
447 cell_output['data'].pop('application/javascript', None)
448
449 if 'source' in cell and cell['cell_type'] == 'markdown':
450 # sanitize similar like in markdown
451 cell['source'] = cls.bleach_clean(cell['source'])
452
445 return nb, resources
453 return nb, resources
446
454
447 def _sanitize_resources(resources):
455 def _sanitize_resources(resources):
448 """
456 """
449 Skip/sanitize some of the CSS generated and included in jupyter
457 Skip/sanitize some of the CSS generated and included in jupyter
450 so it doesn't messes up UI so much
458 so it doesn't messes up UI so much
451 """
459 """
452
460
453 # TODO(marcink): probably we should replace this with whole custom
461 # TODO(marcink): probably we should replace this with whole custom
454 # CSS set that doesn't screw up, but jupyter generated html has some
462 # CSS set that doesn't screw up, but jupyter generated html has some
455 # special markers, so it requires Custom HTML exporter template with
463 # special markers, so it requires Custom HTML exporter template with
456 # _default_template_path_default, to achieve that
464 # _default_template_path_default, to achieve that
457
465
458 # strip the reset CSS
466 # strip the reset CSS
459 resources[0] = resources[0][resources[0].find('/*! Source'):]
467 resources[0] = resources[0][resources[0].find('/*! Source'):]
460 return resources
468 return resources
461
469
462 def as_html(notebook):
470 def as_html(notebook):
463 conf = Config()
471 conf = Config()
464 conf.CustomHTMLExporter.preprocessors = [Sandbox]
472 conf.CustomHTMLExporter.preprocessors = [Sandbox]
465 html_exporter = CustomHTMLExporter(config=conf)
473 html_exporter = CustomHTMLExporter(config=conf)
466
474
467 (body, resources) = html_exporter.from_notebook_node(notebook)
475 (body, resources) = html_exporter.from_notebook_node(notebook)
468 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
476 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
469 js = MakoTemplate(r'''
477 js = MakoTemplate(r'''
470 <!-- Load mathjax -->
478 <!-- Load mathjax -->
471 <!-- MathJax configuration -->
479 <!-- MathJax configuration -->
472 <script type="text/x-mathjax-config">
480 <script type="text/x-mathjax-config">
473 MathJax.Hub.Config({
481 MathJax.Hub.Config({
474 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
482 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
475 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
483 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
476 TeX: {
484 TeX: {
477 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
485 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
478 },
486 },
479 tex2jax: {
487 tex2jax: {
480 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
488 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
481 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
489 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
482 processEscapes: true,
490 processEscapes: true,
483 processEnvironments: true
491 processEnvironments: true
484 },
492 },
485 // Center justify equations in code and markdown cells. Elsewhere
493 // Center justify equations in code and markdown cells. Elsewhere
486 // we use CSS to left justify single line equations in code cells.
494 // we use CSS to left justify single line equations in code cells.
487 displayAlign: 'center',
495 displayAlign: 'center',
488 "HTML-CSS": {
496 "HTML-CSS": {
489 styles: {'.MathJax_Display': {"margin": 0}},
497 styles: {'.MathJax_Display': {"margin": 0}},
490 linebreaks: { automatic: true },
498 linebreaks: { automatic: true },
491 availableFonts: ["STIX", "TeX"]
499 availableFonts: ["STIX", "TeX"]
492 },
500 },
493 showMathMenu: false
501 showMathMenu: false
494 });
502 });
495 </script>
503 </script>
496 <!-- End of mathjax configuration -->
504 <!-- End of mathjax configuration -->
497 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
505 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
498 ''').render(h=helpers)
506 ''').render(h=helpers)
499
507
500 css = '<style>{}</style>'.format(
508 css = '<style>{}</style>'.format(
501 ''.join(_sanitize_resources(resources['inlining']['css'])))
509 ''.join(_sanitize_resources(resources['inlining']['css'])))
502
510
503 body = '\n'.join([header, css, js, body])
511 body = '\n'.join([header, css, js, body])
504 return body, resources
512 return body, resources
505
513
506 notebook = nbformat.reads(source, as_version=4)
514 notebook = nbformat.reads(source, as_version=4)
507 (body, resources) = as_html(notebook)
515 (body, resources) = as_html(notebook)
508 return body
516 return body
509
517
510
518
511 class RstTemplateRenderer(object):
519 class RstTemplateRenderer(object):
512
520
513 def __init__(self):
521 def __init__(self):
514 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
522 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
515 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
523 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
516 self.template_store = TemplateLookup(
524 self.template_store = TemplateLookup(
517 directories=rst_template_dirs,
525 directories=rst_template_dirs,
518 input_encoding='utf-8',
526 input_encoding='utf-8',
519 imports=['from rhodecode.lib import helpers as h'])
527 imports=['from rhodecode.lib import helpers as h'])
520
528
521 def _get_template(self, templatename):
529 def _get_template(self, templatename):
522 return self.template_store.get_template(templatename)
530 return self.template_store.get_template(templatename)
523
531
524 def render(self, template_name, **kwargs):
532 def render(self, template_name, **kwargs):
525 template = self._get_template(template_name)
533 template = self._get_template(template_name)
526 return template.render(**kwargs)
534 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now