##// END OF EJS Templates
security: sanitize plaintext renderer with bleach.
marcink -
r3485:80e2c96a default
parent child Browse files
Show More
@@ -1,557 +1,559 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2019 RhodeCode GmbH
3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urlparse
30 import urlparse
31 import bleach
31 import bleach
32
32
33 from mako.lookup import TemplateLookup
33 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
34 from mako.template import Template as MakoTemplate
35
35
36 from docutils.core import publish_parts
36 from docutils.core import publish_parts
37 from docutils.parsers.rst import directives
37 from docutils.parsers.rst import directives
38 from docutils import writers
38 from docutils import writers
39 from docutils.writers import html4css1
39 from docutils.writers import html4css1
40 import markdown
40 import markdown
41
41
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
44
44
45 log = logging.getLogger(__name__)
45 log = logging.getLogger(__name__)
46
46
47 # default renderer used to generate automated comments
47 # default renderer used to generate automated comments
48 DEFAULT_COMMENTS_RENDERER = 'rst'
48 DEFAULT_COMMENTS_RENDERER = 'rst'
49
49
50
50
51 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
51 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
52 """
52 """
53 Custom HTML Translator used for sandboxing potential
53 Custom HTML Translator used for sandboxing potential
54 JS injections in ref links
54 JS injections in ref links
55 """
55 """
56
56
57 def visit_reference(self, node):
57 def visit_reference(self, node):
58 if 'refuri' in node.attributes:
58 if 'refuri' in node.attributes:
59 refuri = node['refuri']
59 refuri = node['refuri']
60 if ':' in refuri:
60 if ':' in refuri:
61 prefix, link = refuri.lstrip().split(':', 1)
61 prefix, link = refuri.lstrip().split(':', 1)
62 prefix = prefix or ''
62 prefix = prefix or ''
63
63
64 if prefix.lower() == 'javascript':
64 if prefix.lower() == 'javascript':
65 # we don't allow javascript type of refs...
65 # we don't allow javascript type of refs...
66 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
66 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
67
67
68 # old style class requires this...
68 # old style class requires this...
69 return html4css1.HTMLTranslator.visit_reference(self, node)
69 return html4css1.HTMLTranslator.visit_reference(self, node)
70
70
71
71
72 class RhodeCodeWriter(writers.html4css1.Writer):
72 class RhodeCodeWriter(writers.html4css1.Writer):
73 def __init__(self):
73 def __init__(self):
74 writers.Writer.__init__(self)
74 writers.Writer.__init__(self)
75 self.translator_class = CustomHTMLTranslator
75 self.translator_class = CustomHTMLTranslator
76
76
77
77
78 def relative_links(html_source, server_paths):
78 def relative_links(html_source, server_paths):
79 if not html_source:
79 if not html_source:
80 return html_source
80 return html_source
81
81
82 try:
82 try:
83 from lxml.html import fromstring
83 from lxml.html import fromstring
84 from lxml.html import tostring
84 from lxml.html import tostring
85 except ImportError:
85 except ImportError:
86 log.exception('Failed to import lxml')
86 log.exception('Failed to import lxml')
87 return html_source
87 return html_source
88
88
89 try:
89 try:
90 doc = lxml.html.fromstring(html_source)
90 doc = lxml.html.fromstring(html_source)
91 except Exception:
91 except Exception:
92 return html_source
92 return html_source
93
93
94 for el in doc.cssselect('img, video'):
94 for el in doc.cssselect('img, video'):
95 src = el.attrib.get('src')
95 src = el.attrib.get('src')
96 if src:
96 if src:
97 el.attrib['src'] = relative_path(src, server_paths['raw'])
97 el.attrib['src'] = relative_path(src, server_paths['raw'])
98
98
99 for el in doc.cssselect('a:not(.gfm)'):
99 for el in doc.cssselect('a:not(.gfm)'):
100 src = el.attrib.get('href')
100 src = el.attrib.get('href')
101 if src:
101 if src:
102 raw_mode = el.attrib['href'].endswith('?raw=1')
102 raw_mode = el.attrib['href'].endswith('?raw=1')
103 if raw_mode:
103 if raw_mode:
104 el.attrib['href'] = relative_path(src, server_paths['raw'])
104 el.attrib['href'] = relative_path(src, server_paths['raw'])
105 else:
105 else:
106 el.attrib['href'] = relative_path(src, server_paths['standard'])
106 el.attrib['href'] = relative_path(src, server_paths['standard'])
107
107
108 return lxml.html.tostring(doc)
108 return lxml.html.tostring(doc)
109
109
110
110
111 def relative_path(path, request_path, is_repo_file=None):
111 def relative_path(path, request_path, is_repo_file=None):
112 """
112 """
113 relative link support, path is a rel path, and request_path is current
113 relative link support, path is a rel path, and request_path is current
114 server path (not absolute)
114 server path (not absolute)
115
115
116 e.g.
116 e.g.
117
117
118 path = '../logo.png'
118 path = '../logo.png'
119 request_path= '/repo/files/path/file.md'
119 request_path= '/repo/files/path/file.md'
120 produces: '/repo/files/logo.png'
120 produces: '/repo/files/logo.png'
121 """
121 """
122 # TODO(marcink): unicode/str support ?
122 # TODO(marcink): unicode/str support ?
123 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
123 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
124
124
125 def dummy_check(p):
125 def dummy_check(p):
126 return True # assume default is a valid file path
126 return True # assume default is a valid file path
127
127
128 is_repo_file = is_repo_file or dummy_check
128 is_repo_file = is_repo_file or dummy_check
129 if not path:
129 if not path:
130 return request_path
130 return request_path
131
131
132 path = safe_unicode(path)
132 path = safe_unicode(path)
133 request_path = safe_unicode(request_path)
133 request_path = safe_unicode(request_path)
134
134
135 if path.startswith((u'data:', u'javascript:', u'#', u':')):
135 if path.startswith((u'data:', u'javascript:', u'#', u':')):
136 # skip data, anchor, invalid links
136 # skip data, anchor, invalid links
137 return path
137 return path
138
138
139 is_absolute = bool(urlparse.urlparse(path).netloc)
139 is_absolute = bool(urlparse.urlparse(path).netloc)
140 if is_absolute:
140 if is_absolute:
141 return path
141 return path
142
142
143 if not request_path:
143 if not request_path:
144 return path
144 return path
145
145
146 if path.startswith(u'/'):
146 if path.startswith(u'/'):
147 path = path[1:]
147 path = path[1:]
148
148
149 if path.startswith(u'./'):
149 if path.startswith(u'./'):
150 path = path[2:]
150 path = path[2:]
151
151
152 parts = request_path.split('/')
152 parts = request_path.split('/')
153 # compute how deep we need to traverse the request_path
153 # compute how deep we need to traverse the request_path
154 depth = 0
154 depth = 0
155
155
156 if is_repo_file(request_path):
156 if is_repo_file(request_path):
157 # if request path is a VALID file, we use a relative path with
157 # if request path is a VALID file, we use a relative path with
158 # one level up
158 # one level up
159 depth += 1
159 depth += 1
160
160
161 while path.startswith(u'../'):
161 while path.startswith(u'../'):
162 depth += 1
162 depth += 1
163 path = path[3:]
163 path = path[3:]
164
164
165 if depth > 0:
165 if depth > 0:
166 parts = parts[:-depth]
166 parts = parts[:-depth]
167
167
168 parts.append(path)
168 parts.append(path)
169 final_path = u'/'.join(parts).lstrip(u'/')
169 final_path = u'/'.join(parts).lstrip(u'/')
170
170
171 return u'/' + final_path
171 return u'/' + final_path
172
172
173
173
174 _cached_markdown_renderer = None
174 _cached_markdown_renderer = None
175
175
176
176
177 def get_markdown_renderer(extensions, output_format):
177 def get_markdown_renderer(extensions, output_format):
178 global _cached_markdown_renderer
178 global _cached_markdown_renderer
179
179
180 if _cached_markdown_renderer is None:
180 if _cached_markdown_renderer is None:
181 _cached_markdown_renderer = markdown.Markdown(
181 _cached_markdown_renderer = markdown.Markdown(
182 extensions=extensions,
182 extensions=extensions,
183 enable_attributes=False, output_format=output_format)
183 enable_attributes=False, output_format=output_format)
184 return _cached_markdown_renderer
184 return _cached_markdown_renderer
185
185
186
186
187 _cached_markdown_renderer_flavored = None
187 _cached_markdown_renderer_flavored = None
188
188
189
189
190 def get_markdown_renderer_flavored(extensions, output_format):
190 def get_markdown_renderer_flavored(extensions, output_format):
191 global _cached_markdown_renderer_flavored
191 global _cached_markdown_renderer_flavored
192
192
193 if _cached_markdown_renderer_flavored is None:
193 if _cached_markdown_renderer_flavored is None:
194 _cached_markdown_renderer_flavored = markdown.Markdown(
194 _cached_markdown_renderer_flavored = markdown.Markdown(
195 extensions=extensions + [GithubFlavoredMarkdownExtension()],
195 extensions=extensions + [GithubFlavoredMarkdownExtension()],
196 enable_attributes=False, output_format=output_format)
196 enable_attributes=False, output_format=output_format)
197 return _cached_markdown_renderer_flavored
197 return _cached_markdown_renderer_flavored
198
198
199
199
200 class MarkupRenderer(object):
200 class MarkupRenderer(object):
201 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
201 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
202
202
203 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
203 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
204 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
204 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
205 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
205 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
206 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
206 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
207
207
208 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
208 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
209 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
209 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
210
210
211 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
211 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
212 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
212 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
213
213
214 output_format = 'html4'
214 output_format = 'html4'
215
215
216 # extension together with weights. Lower is first means we control how
216 # extension together with weights. Lower is first means we control how
217 # extensions are attached to readme names with those.
217 # extensions are attached to readme names with those.
218 PLAIN_EXTS = [
218 PLAIN_EXTS = [
219 # prefer no extension
219 # prefer no extension
220 ('', 0), # special case that renders READMES names without extension
220 ('', 0), # special case that renders READMES names without extension
221 ('.text', 2), ('.TEXT', 2),
221 ('.text', 2), ('.TEXT', 2),
222 ('.txt', 3), ('.TXT', 3)
222 ('.txt', 3), ('.TXT', 3)
223 ]
223 ]
224
224
225 RST_EXTS = [
225 RST_EXTS = [
226 ('.rst', 1), ('.rest', 1),
226 ('.rst', 1), ('.rest', 1),
227 ('.RST', 2), ('.REST', 2)
227 ('.RST', 2), ('.REST', 2)
228 ]
228 ]
229
229
230 MARKDOWN_EXTS = [
230 MARKDOWN_EXTS = [
231 ('.md', 1), ('.MD', 1),
231 ('.md', 1), ('.MD', 1),
232 ('.mkdn', 2), ('.MKDN', 2),
232 ('.mkdn', 2), ('.MKDN', 2),
233 ('.mdown', 3), ('.MDOWN', 3),
233 ('.mdown', 3), ('.MDOWN', 3),
234 ('.markdown', 4), ('.MARKDOWN', 4)
234 ('.markdown', 4), ('.MARKDOWN', 4)
235 ]
235 ]
236
236
237 def _detect_renderer(self, source, filename=None):
237 def _detect_renderer(self, source, filename=None):
238 """
238 """
239 runs detection of what renderer should be used for generating html
239 runs detection of what renderer should be used for generating html
240 from a markup language
240 from a markup language
241
241
242 filename can be also explicitly a renderer name
242 filename can be also explicitly a renderer name
243
243
244 :param source:
244 :param source:
245 :param filename:
245 :param filename:
246 """
246 """
247
247
248 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
248 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
249 detected_renderer = 'markdown'
249 detected_renderer = 'markdown'
250 elif MarkupRenderer.RST_PAT.findall(filename):
250 elif MarkupRenderer.RST_PAT.findall(filename):
251 detected_renderer = 'rst'
251 detected_renderer = 'rst'
252 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
252 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
253 detected_renderer = 'jupyter'
253 detected_renderer = 'jupyter'
254 elif MarkupRenderer.PLAIN_PAT.findall(filename):
254 elif MarkupRenderer.PLAIN_PAT.findall(filename):
255 detected_renderer = 'plain'
255 detected_renderer = 'plain'
256 else:
256 else:
257 detected_renderer = 'plain'
257 detected_renderer = 'plain'
258
258
259 return getattr(MarkupRenderer, detected_renderer)
259 return getattr(MarkupRenderer, detected_renderer)
260
260
261 @classmethod
261 @classmethod
262 def bleach_clean(cls, text):
262 def bleach_clean(cls, text):
263 from .bleach_whitelist import markdown_attrs, markdown_tags
263 from .bleach_whitelist import markdown_attrs, markdown_tags
264 allowed_tags = markdown_tags
264 allowed_tags = markdown_tags
265 allowed_attrs = markdown_attrs
265 allowed_attrs = markdown_attrs
266
266
267 try:
267 try:
268 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
268 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
269 except Exception:
269 except Exception:
270 return 'UNPARSEABLE TEXT'
270 return 'UNPARSEABLE TEXT'
271
271
272 @classmethod
272 @classmethod
273 def renderer_from_filename(cls, filename, exclude):
273 def renderer_from_filename(cls, filename, exclude):
274 """
274 """
275 Detect renderer markdown/rst from filename and optionally use exclude
275 Detect renderer markdown/rst from filename and optionally use exclude
276 list to remove some options. This is mostly used in helpers.
276 list to remove some options. This is mostly used in helpers.
277 Returns None when no renderer can be detected.
277 Returns None when no renderer can be detected.
278 """
278 """
279 def _filter(elements):
279 def _filter(elements):
280 if isinstance(exclude, (list, tuple)):
280 if isinstance(exclude, (list, tuple)):
281 return [x for x in elements if x not in exclude]
281 return [x for x in elements if x not in exclude]
282 return elements
282 return elements
283
283
284 if filename.endswith(
284 if filename.endswith(
285 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
285 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
286 return 'markdown'
286 return 'markdown'
287 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
287 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
288 return 'rst'
288 return 'rst'
289
289
290 return None
290 return None
291
291
292 def render(self, source, filename=None):
292 def render(self, source, filename=None):
293 """
293 """
294 Renders a given filename using detected renderer
294 Renders a given filename using detected renderer
295 it detects renderers based on file extension or mimetype.
295 it detects renderers based on file extension or mimetype.
296 At last it will just do a simple html replacing new lines with <br/>
296 At last it will just do a simple html replacing new lines with <br/>
297
297
298 :param file_name:
298 :param file_name:
299 :param source:
299 :param source:
300 """
300 """
301
301
302 renderer = self._detect_renderer(source, filename)
302 renderer = self._detect_renderer(source, filename)
303 readme_data = renderer(source)
303 readme_data = renderer(source)
304 return readme_data
304 return readme_data
305
305
306 @classmethod
306 @classmethod
307 def _flavored_markdown(cls, text):
307 def _flavored_markdown(cls, text):
308 """
308 """
309 Github style flavored markdown
309 Github style flavored markdown
310
310
311 :param text:
311 :param text:
312 """
312 """
313
313
314 # Extract pre blocks.
314 # Extract pre blocks.
315 extractions = {}
315 extractions = {}
316
316
317 def pre_extraction_callback(matchobj):
317 def pre_extraction_callback(matchobj):
318 digest = md5_safe(matchobj.group(0))
318 digest = md5_safe(matchobj.group(0))
319 extractions[digest] = matchobj.group(0)
319 extractions[digest] = matchobj.group(0)
320 return "{gfm-extraction-%s}" % digest
320 return "{gfm-extraction-%s}" % digest
321 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
321 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
322 text = re.sub(pattern, pre_extraction_callback, text)
322 text = re.sub(pattern, pre_extraction_callback, text)
323
323
324 # Prevent foo_bar_baz from ending up with an italic word in the middle.
324 # Prevent foo_bar_baz from ending up with an italic word in the middle.
325 def italic_callback(matchobj):
325 def italic_callback(matchobj):
326 s = matchobj.group(0)
326 s = matchobj.group(0)
327 if list(s).count('_') >= 2:
327 if list(s).count('_') >= 2:
328 return s.replace('_', r'\_')
328 return s.replace('_', r'\_')
329 return s
329 return s
330 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
330 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
331
331
332 # Insert pre block extractions.
332 # Insert pre block extractions.
333 def pre_insert_callback(matchobj):
333 def pre_insert_callback(matchobj):
334 return '\n\n' + extractions[matchobj.group(1)]
334 return '\n\n' + extractions[matchobj.group(1)]
335 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
335 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
336 pre_insert_callback, text)
336 pre_insert_callback, text)
337
337
338 return text
338 return text
339
339
340 @classmethod
340 @classmethod
341 def urlify_text(cls, text):
341 def urlify_text(cls, text):
342 def url_func(match_obj):
342 def url_func(match_obj):
343 url_full = match_obj.groups()[0]
343 url_full = match_obj.groups()[0]
344 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
344 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
345
345
346 return cls.URL_PAT.sub(url_func, text)
346 return cls.URL_PAT.sub(url_func, text)
347
347
348 @classmethod
348 @classmethod
349 def plain(cls, source, universal_newline=True, leading_newline=True):
349 def plain(cls, source, universal_newline=True, leading_newline=True):
350 source = safe_unicode(source)
350 source = safe_unicode(source)
351 if universal_newline:
351 if universal_newline:
352 newline = '\n'
352 newline = '\n'
353 source = newline.join(source.splitlines())
353 source = newline.join(source.splitlines())
354
354
355 rendered_source = cls.urlify_text(source)
355 rendered_source = cls.urlify_text(source)
356 source = ''
356 source = ''
357 if leading_newline:
357 if leading_newline:
358 source += '<br />'
358 source += '<br />'
359 source += rendered_source.replace("\n", '<br />')
359 source += rendered_source.replace("\n", '<br />')
360 return source
360
361 rendered = cls.bleach_clean(source)
362 return rendered
361
363
362 @classmethod
364 @classmethod
363 def markdown(cls, source, safe=True, flavored=True, mentions=False,
365 def markdown(cls, source, safe=True, flavored=True, mentions=False,
364 clean_html=True):
366 clean_html=True):
365 """
367 """
366 returns markdown rendered code cleaned by the bleach library
368 returns markdown rendered code cleaned by the bleach library
367 """
369 """
368
370
369 if flavored:
371 if flavored:
370 markdown_renderer = get_markdown_renderer_flavored(
372 markdown_renderer = get_markdown_renderer_flavored(
371 cls.extensions, cls.output_format)
373 cls.extensions, cls.output_format)
372 else:
374 else:
373 markdown_renderer = get_markdown_renderer(
375 markdown_renderer = get_markdown_renderer(
374 cls.extensions, cls.output_format)
376 cls.extensions, cls.output_format)
375
377
376 if mentions:
378 if mentions:
377 mention_pat = re.compile(MENTIONS_REGEX)
379 mention_pat = re.compile(MENTIONS_REGEX)
378
380
379 def wrapp(match_obj):
381 def wrapp(match_obj):
380 uname = match_obj.groups()[0]
382 uname = match_obj.groups()[0]
381 return ' **@%(uname)s** ' % {'uname': uname}
383 return ' **@%(uname)s** ' % {'uname': uname}
382 mention_hl = mention_pat.sub(wrapp, source).strip()
384 mention_hl = mention_pat.sub(wrapp, source).strip()
383 # we extracted mentions render with this using Mentions false
385 # we extracted mentions render with this using Mentions false
384 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
386 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
385 mentions=False)
387 mentions=False)
386
388
387 source = safe_unicode(source)
389 source = safe_unicode(source)
388
390
389 try:
391 try:
390 if flavored:
392 if flavored:
391 source = cls._flavored_markdown(source)
393 source = cls._flavored_markdown(source)
392 rendered = markdown_renderer.convert(source)
394 rendered = markdown_renderer.convert(source)
393 except Exception:
395 except Exception:
394 log.exception('Error when rendering Markdown')
396 log.exception('Error when rendering Markdown')
395 if safe:
397 if safe:
396 log.debug('Fallback to render in plain mode')
398 log.debug('Fallback to render in plain mode')
397 rendered = cls.plain(source)
399 rendered = cls.plain(source)
398 else:
400 else:
399 raise
401 raise
400
402
401 if clean_html:
403 if clean_html:
402 rendered = cls.bleach_clean(rendered)
404 rendered = cls.bleach_clean(rendered)
403 return rendered
405 return rendered
404
406
405 @classmethod
407 @classmethod
406 def rst(cls, source, safe=True, mentions=False, clean_html=False):
408 def rst(cls, source, safe=True, mentions=False, clean_html=False):
407 if mentions:
409 if mentions:
408 mention_pat = re.compile(MENTIONS_REGEX)
410 mention_pat = re.compile(MENTIONS_REGEX)
409
411
410 def wrapp(match_obj):
412 def wrapp(match_obj):
411 uname = match_obj.groups()[0]
413 uname = match_obj.groups()[0]
412 return ' **@%(uname)s** ' % {'uname': uname}
414 return ' **@%(uname)s** ' % {'uname': uname}
413 mention_hl = mention_pat.sub(wrapp, source).strip()
415 mention_hl = mention_pat.sub(wrapp, source).strip()
414 # we extracted mentions render with this using Mentions false
416 # we extracted mentions render with this using Mentions false
415 return cls.rst(mention_hl, safe=safe, mentions=False)
417 return cls.rst(mention_hl, safe=safe, mentions=False)
416
418
417 source = safe_unicode(source)
419 source = safe_unicode(source)
418 try:
420 try:
419 docutils_settings = dict(
421 docutils_settings = dict(
420 [(alias, None) for alias in
422 [(alias, None) for alias in
421 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
423 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
422
424
423 docutils_settings.update({
425 docutils_settings.update({
424 'input_encoding': 'unicode', 'report_level': 4})
426 'input_encoding': 'unicode', 'report_level': 4})
425
427
426 for k, v in docutils_settings.iteritems():
428 for k, v in docutils_settings.iteritems():
427 directives.register_directive(k, v)
429 directives.register_directive(k, v)
428
430
429 parts = publish_parts(source=source,
431 parts = publish_parts(source=source,
430 writer=RhodeCodeWriter(),
432 writer=RhodeCodeWriter(),
431 settings_overrides=docutils_settings)
433 settings_overrides=docutils_settings)
432 rendered = parts["fragment"]
434 rendered = parts["fragment"]
433 if clean_html:
435 if clean_html:
434 rendered = cls.bleach_clean(rendered)
436 rendered = cls.bleach_clean(rendered)
435 return parts['html_title'] + rendered
437 return parts['html_title'] + rendered
436 except Exception:
438 except Exception:
437 log.exception('Error when rendering RST')
439 log.exception('Error when rendering RST')
438 if safe:
440 if safe:
439 log.debug('Fallbacking to render in plain mode')
441 log.debug('Fallbacking to render in plain mode')
440 return cls.plain(source)
442 return cls.plain(source)
441 else:
443 else:
442 raise
444 raise
443
445
444 @classmethod
446 @classmethod
445 def jupyter(cls, source, safe=True):
447 def jupyter(cls, source, safe=True):
446 from rhodecode.lib import helpers
448 from rhodecode.lib import helpers
447
449
448 from traitlets.config import Config
450 from traitlets.config import Config
449 import nbformat
451 import nbformat
450 from nbconvert import HTMLExporter
452 from nbconvert import HTMLExporter
451 from nbconvert.preprocessors import Preprocessor
453 from nbconvert.preprocessors import Preprocessor
452
454
453 class CustomHTMLExporter(HTMLExporter):
455 class CustomHTMLExporter(HTMLExporter):
454 def _template_file_default(self):
456 def _template_file_default(self):
455 return 'basic'
457 return 'basic'
456
458
457 class Sandbox(Preprocessor):
459 class Sandbox(Preprocessor):
458
460
459 def preprocess(self, nb, resources):
461 def preprocess(self, nb, resources):
460 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
462 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
461 for cell in nb['cells']:
463 for cell in nb['cells']:
462 if not safe:
464 if not safe:
463 continue
465 continue
464
466
465 if 'outputs' in cell:
467 if 'outputs' in cell:
466 for cell_output in cell['outputs']:
468 for cell_output in cell['outputs']:
467 if 'data' in cell_output:
469 if 'data' in cell_output:
468 if 'application/javascript' in cell_output['data']:
470 if 'application/javascript' in cell_output['data']:
469 cell_output['data']['text/plain'] = sandbox_text
471 cell_output['data']['text/plain'] = sandbox_text
470 cell_output['data'].pop('application/javascript', None)
472 cell_output['data'].pop('application/javascript', None)
471
473
472 if 'source' in cell and cell['cell_type'] == 'markdown':
474 if 'source' in cell and cell['cell_type'] == 'markdown':
473 # sanitize similar like in markdown
475 # sanitize similar like in markdown
474 cell['source'] = cls.bleach_clean(cell['source'])
476 cell['source'] = cls.bleach_clean(cell['source'])
475
477
476 return nb, resources
478 return nb, resources
477
479
478 def _sanitize_resources(input_resources):
480 def _sanitize_resources(input_resources):
479 """
481 """
480 Skip/sanitize some of the CSS generated and included in jupyter
482 Skip/sanitize some of the CSS generated and included in jupyter
481 so it doesn't messes up UI so much
483 so it doesn't messes up UI so much
482 """
484 """
483
485
484 # TODO(marcink): probably we should replace this with whole custom
486 # TODO(marcink): probably we should replace this with whole custom
485 # CSS set that doesn't screw up, but jupyter generated html has some
487 # CSS set that doesn't screw up, but jupyter generated html has some
486 # special markers, so it requires Custom HTML exporter template with
488 # special markers, so it requires Custom HTML exporter template with
487 # _default_template_path_default, to achieve that
489 # _default_template_path_default, to achieve that
488
490
489 # strip the reset CSS
491 # strip the reset CSS
490 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
492 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
491 return input_resources
493 return input_resources
492
494
493 def as_html(notebook):
495 def as_html(notebook):
494 conf = Config()
496 conf = Config()
495 conf.CustomHTMLExporter.preprocessors = [Sandbox]
497 conf.CustomHTMLExporter.preprocessors = [Sandbox]
496 html_exporter = CustomHTMLExporter(config=conf)
498 html_exporter = CustomHTMLExporter(config=conf)
497
499
498 (body, resources) = html_exporter.from_notebook_node(notebook)
500 (body, resources) = html_exporter.from_notebook_node(notebook)
499 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
501 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
500 js = MakoTemplate(r'''
502 js = MakoTemplate(r'''
501 <!-- Load mathjax -->
503 <!-- Load mathjax -->
502 <!-- MathJax configuration -->
504 <!-- MathJax configuration -->
503 <script type="text/x-mathjax-config">
505 <script type="text/x-mathjax-config">
504 MathJax.Hub.Config({
506 MathJax.Hub.Config({
505 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
507 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
506 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
508 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
507 TeX: {
509 TeX: {
508 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
510 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
509 },
511 },
510 tex2jax: {
512 tex2jax: {
511 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
513 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
512 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
514 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
513 processEscapes: true,
515 processEscapes: true,
514 processEnvironments: true
516 processEnvironments: true
515 },
517 },
516 // Center justify equations in code and markdown cells. Elsewhere
518 // Center justify equations in code and markdown cells. Elsewhere
517 // we use CSS to left justify single line equations in code cells.
519 // we use CSS to left justify single line equations in code cells.
518 displayAlign: 'center',
520 displayAlign: 'center',
519 "HTML-CSS": {
521 "HTML-CSS": {
520 styles: {'.MathJax_Display': {"margin": 0}},
522 styles: {'.MathJax_Display': {"margin": 0}},
521 linebreaks: { automatic: true },
523 linebreaks: { automatic: true },
522 availableFonts: ["STIX", "TeX"]
524 availableFonts: ["STIX", "TeX"]
523 },
525 },
524 showMathMenu: false
526 showMathMenu: false
525 });
527 });
526 </script>
528 </script>
527 <!-- End of mathjax configuration -->
529 <!-- End of mathjax configuration -->
528 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
530 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
529 ''').render(h=helpers)
531 ''').render(h=helpers)
530
532
531 css = '<style>{}</style>'.format(
533 css = '<style>{}</style>'.format(
532 ''.join(_sanitize_resources(resources['inlining']['css'])))
534 ''.join(_sanitize_resources(resources['inlining']['css'])))
533
535
534 body = '\n'.join([header, css, js, body])
536 body = '\n'.join([header, css, js, body])
535 return body, resources
537 return body, resources
536
538
537 notebook = nbformat.reads(source, as_version=4)
539 notebook = nbformat.reads(source, as_version=4)
538 (body, resources) = as_html(notebook)
540 (body, resources) = as_html(notebook)
539 return body
541 return body
540
542
541
543
542 class RstTemplateRenderer(object):
544 class RstTemplateRenderer(object):
543
545
544 def __init__(self):
546 def __init__(self):
545 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
547 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
546 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
548 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
547 self.template_store = TemplateLookup(
549 self.template_store = TemplateLookup(
548 directories=rst_template_dirs,
550 directories=rst_template_dirs,
549 input_encoding='utf-8',
551 input_encoding='utf-8',
550 imports=['from rhodecode.lib import helpers as h'])
552 imports=['from rhodecode.lib import helpers as h'])
551
553
552 def _get_template(self, templatename):
554 def _get_template(self, templatename):
553 return self.template_store.get_template(templatename)
555 return self.template_store.get_template(templatename)
554
556
555 def render(self, template_name, **kwargs):
557 def render(self, template_name, **kwargs):
556 template = self._get_template(template_name)
558 template = self._get_template(template_name)
557 return template.render(**kwargs)
559 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now