##// END OF EJS Templates
security: improve Javascript RST sandbox to also catch mixed case.
marcink -
r3147:7609f194 default
parent child Browse files
Show More
@@ -1,524 +1,526 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2018 RhodeCode GmbH
3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urlparse
30 import urlparse
31 import bleach
31 import bleach
32
32
33 from mako.lookup import TemplateLookup
33 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
34 from mako.template import Template as MakoTemplate
35
35
36 from docutils.core import publish_parts
36 from docutils.core import publish_parts
37 from docutils.parsers.rst import directives
37 from docutils.parsers.rst import directives
38 from docutils import writers
38 from docutils import writers
39 from docutils.writers import html4css1
39 from docutils.writers import html4css1
40 import markdown
40 import markdown
41
41
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import (
43 from rhodecode.lib.utils2 import (
44 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
44 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
45
45
46 log = logging.getLogger(__name__)
46 log = logging.getLogger(__name__)
47
47
48 # default renderer used to generate automated comments
48 # default renderer used to generate automated comments
49 DEFAULT_COMMENTS_RENDERER = 'rst'
49 DEFAULT_COMMENTS_RENDERER = 'rst'
50
50
51
51
52 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
52 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
53 """
53 """
54 Custom HTML Translator used for sandboxing potential
54 Custom HTML Translator used for sandboxing potential
55 JS injections in ref links
55 JS injections in ref links
56 """
56 """
57
57
58 def visit_reference(self, node):
58 def visit_reference(self, node):
59 if 'refuri' in node.attributes:
59 if 'refuri' in node.attributes:
60 refuri = node['refuri']
60 refuri = node['refuri']
61 if ':' in refuri:
61 if ':' in refuri:
62 prefix, link = refuri.lstrip().split(':', 1)
62 prefix, link = refuri.lstrip().split(':', 1)
63 if prefix == 'javascript':
63 prefix = prefix or ''
64
65 if prefix.lower() == 'javascript':
64 # we don't allow javascript type of refs...
66 # we don't allow javascript type of refs...
65 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
67 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
66
68
67 # old style class requires this...
69 # old style class requires this...
68 return html4css1.HTMLTranslator.visit_reference(self, node)
70 return html4css1.HTMLTranslator.visit_reference(self, node)
69
71
70
72
71 class RhodeCodeWriter(writers.html4css1.Writer):
73 class RhodeCodeWriter(writers.html4css1.Writer):
72 def __init__(self):
74 def __init__(self):
73 writers.Writer.__init__(self)
75 writers.Writer.__init__(self)
74 self.translator_class = CustomHTMLTranslator
76 self.translator_class = CustomHTMLTranslator
75
77
76
78
77 def relative_links(html_source, server_paths):
79 def relative_links(html_source, server_paths):
78 if not html_source:
80 if not html_source:
79 return html_source
81 return html_source
80
82
81 try:
83 try:
82 from lxml.html import fromstring
84 from lxml.html import fromstring
83 from lxml.html import tostring
85 from lxml.html import tostring
84 except ImportError:
86 except ImportError:
85 log.exception('Failed to import lxml')
87 log.exception('Failed to import lxml')
86 return html_source
88 return html_source
87
89
88 try:
90 try:
89 doc = lxml.html.fromstring(html_source)
91 doc = lxml.html.fromstring(html_source)
90 except Exception:
92 except Exception:
91 return html_source
93 return html_source
92
94
93 for el in doc.cssselect('img, video'):
95 for el in doc.cssselect('img, video'):
94 src = el.attrib.get('src')
96 src = el.attrib.get('src')
95 if src:
97 if src:
96 el.attrib['src'] = relative_path(src, server_paths['raw'])
98 el.attrib['src'] = relative_path(src, server_paths['raw'])
97
99
98 for el in doc.cssselect('a:not(.gfm)'):
100 for el in doc.cssselect('a:not(.gfm)'):
99 src = el.attrib.get('href')
101 src = el.attrib.get('href')
100 if src:
102 if src:
101 raw_mode = el.attrib['href'].endswith('?raw=1')
103 raw_mode = el.attrib['href'].endswith('?raw=1')
102 if raw_mode:
104 if raw_mode:
103 el.attrib['href'] = relative_path(src, server_paths['raw'])
105 el.attrib['href'] = relative_path(src, server_paths['raw'])
104 else:
106 else:
105 el.attrib['href'] = relative_path(src, server_paths['standard'])
107 el.attrib['href'] = relative_path(src, server_paths['standard'])
106
108
107 return lxml.html.tostring(doc)
109 return lxml.html.tostring(doc)
108
110
109
111
110 def relative_path(path, request_path, is_repo_file=None):
112 def relative_path(path, request_path, is_repo_file=None):
111 """
113 """
112 relative link support, path is a rel path, and request_path is current
114 relative link support, path is a rel path, and request_path is current
113 server path (not absolute)
115 server path (not absolute)
114
116
115 e.g.
117 e.g.
116
118
117 path = '../logo.png'
119 path = '../logo.png'
118 request_path= '/repo/files/path/file.md'
120 request_path= '/repo/files/path/file.md'
119 produces: '/repo/files/logo.png'
121 produces: '/repo/files/logo.png'
120 """
122 """
121 # TODO(marcink): unicode/str support ?
123 # TODO(marcink): unicode/str support ?
122 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
124 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
123
125
124 def dummy_check(p):
126 def dummy_check(p):
125 return True # assume default is a valid file path
127 return True # assume default is a valid file path
126
128
127 is_repo_file = is_repo_file or dummy_check
129 is_repo_file = is_repo_file or dummy_check
128 if not path:
130 if not path:
129 return request_path
131 return request_path
130
132
131 path = safe_unicode(path)
133 path = safe_unicode(path)
132 request_path = safe_unicode(request_path)
134 request_path = safe_unicode(request_path)
133
135
134 if path.startswith((u'data:', u'javascript:', u'#', u':')):
136 if path.startswith((u'data:', u'javascript:', u'#', u':')):
135 # skip data, anchor, invalid links
137 # skip data, anchor, invalid links
136 return path
138 return path
137
139
138 is_absolute = bool(urlparse.urlparse(path).netloc)
140 is_absolute = bool(urlparse.urlparse(path).netloc)
139 if is_absolute:
141 if is_absolute:
140 return path
142 return path
141
143
142 if not request_path:
144 if not request_path:
143 return path
145 return path
144
146
145 if path.startswith(u'/'):
147 if path.startswith(u'/'):
146 path = path[1:]
148 path = path[1:]
147
149
148 if path.startswith(u'./'):
150 if path.startswith(u'./'):
149 path = path[2:]
151 path = path[2:]
150
152
151 parts = request_path.split('/')
153 parts = request_path.split('/')
152 # compute how deep we need to traverse the request_path
154 # compute how deep we need to traverse the request_path
153 depth = 0
155 depth = 0
154
156
155 if is_repo_file(request_path):
157 if is_repo_file(request_path):
156 # if request path is a VALID file, we use a relative path with
158 # if request path is a VALID file, we use a relative path with
157 # one level up
159 # one level up
158 depth += 1
160 depth += 1
159
161
160 while path.startswith(u'../'):
162 while path.startswith(u'../'):
161 depth += 1
163 depth += 1
162 path = path[3:]
164 path = path[3:]
163
165
164 if depth > 0:
166 if depth > 0:
165 parts = parts[:-depth]
167 parts = parts[:-depth]
166
168
167 parts.append(path)
169 parts.append(path)
168 final_path = u'/'.join(parts).lstrip(u'/')
170 final_path = u'/'.join(parts).lstrip(u'/')
169
171
170 return u'/' + final_path
172 return u'/' + final_path
171
173
172
174
173 class MarkupRenderer(object):
175 class MarkupRenderer(object):
174 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
176 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
175
177
176 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
178 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
177 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
179 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
178 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
180 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
179 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
181 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
180
182
181 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
183 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
182 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
184 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
183
185
184 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
186 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
185 output_format = 'html4'
187 output_format = 'html4'
186 markdown_renderer = markdown.Markdown(
188 markdown_renderer = markdown.Markdown(
187 extensions, enable_attributes=False, output_format=output_format)
189 extensions, enable_attributes=False, output_format=output_format)
188
190
189 markdown_renderer_flavored = markdown.Markdown(
191 markdown_renderer_flavored = markdown.Markdown(
190 extensions + [GithubFlavoredMarkdownExtension()],
192 extensions + [GithubFlavoredMarkdownExtension()],
191 enable_attributes=False, output_format=output_format)
193 enable_attributes=False, output_format=output_format)
192
194
193 # extension together with weights. Lower is first means we control how
195 # extension together with weights. Lower is first means we control how
194 # extensions are attached to readme names with those.
196 # extensions are attached to readme names with those.
195 PLAIN_EXTS = [
197 PLAIN_EXTS = [
196 # prefer no extension
198 # prefer no extension
197 ('', 0), # special case that renders READMES names without extension
199 ('', 0), # special case that renders READMES names without extension
198 ('.text', 2), ('.TEXT', 2),
200 ('.text', 2), ('.TEXT', 2),
199 ('.txt', 3), ('.TXT', 3)
201 ('.txt', 3), ('.TXT', 3)
200 ]
202 ]
201
203
202 RST_EXTS = [
204 RST_EXTS = [
203 ('.rst', 1), ('.rest', 1),
205 ('.rst', 1), ('.rest', 1),
204 ('.RST', 2), ('.REST', 2)
206 ('.RST', 2), ('.REST', 2)
205 ]
207 ]
206
208
207 MARKDOWN_EXTS = [
209 MARKDOWN_EXTS = [
208 ('.md', 1), ('.MD', 1),
210 ('.md', 1), ('.MD', 1),
209 ('.mkdn', 2), ('.MKDN', 2),
211 ('.mkdn', 2), ('.MKDN', 2),
210 ('.mdown', 3), ('.MDOWN', 3),
212 ('.mdown', 3), ('.MDOWN', 3),
211 ('.markdown', 4), ('.MARKDOWN', 4)
213 ('.markdown', 4), ('.MARKDOWN', 4)
212 ]
214 ]
213
215
214 def _detect_renderer(self, source, filename=None):
216 def _detect_renderer(self, source, filename=None):
215 """
217 """
216 runs detection of what renderer should be used for generating html
218 runs detection of what renderer should be used for generating html
217 from a markup language
219 from a markup language
218
220
219 filename can be also explicitly a renderer name
221 filename can be also explicitly a renderer name
220
222
221 :param source:
223 :param source:
222 :param filename:
224 :param filename:
223 """
225 """
224
226
225 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
227 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
226 detected_renderer = 'markdown'
228 detected_renderer = 'markdown'
227 elif MarkupRenderer.RST_PAT.findall(filename):
229 elif MarkupRenderer.RST_PAT.findall(filename):
228 detected_renderer = 'rst'
230 detected_renderer = 'rst'
229 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
231 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
230 detected_renderer = 'jupyter'
232 detected_renderer = 'jupyter'
231 elif MarkupRenderer.PLAIN_PAT.findall(filename):
233 elif MarkupRenderer.PLAIN_PAT.findall(filename):
232 detected_renderer = 'plain'
234 detected_renderer = 'plain'
233 else:
235 else:
234 detected_renderer = 'plain'
236 detected_renderer = 'plain'
235
237
236 return getattr(MarkupRenderer, detected_renderer)
238 return getattr(MarkupRenderer, detected_renderer)
237
239
238 @classmethod
240 @classmethod
239 def bleach_clean(cls, text):
241 def bleach_clean(cls, text):
240 from .bleach_whitelist import markdown_attrs, markdown_tags
242 from .bleach_whitelist import markdown_attrs, markdown_tags
241 allowed_tags = markdown_tags
243 allowed_tags = markdown_tags
242 allowed_attrs = markdown_attrs
244 allowed_attrs = markdown_attrs
243
245
244 try:
246 try:
245 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
247 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
246 except Exception:
248 except Exception:
247 return 'UNPARSEABLE TEXT'
249 return 'UNPARSEABLE TEXT'
248
250
249 @classmethod
251 @classmethod
250 def renderer_from_filename(cls, filename, exclude):
252 def renderer_from_filename(cls, filename, exclude):
251 """
253 """
252 Detect renderer markdown/rst from filename and optionally use exclude
254 Detect renderer markdown/rst from filename and optionally use exclude
253 list to remove some options. This is mostly used in helpers.
255 list to remove some options. This is mostly used in helpers.
254 Returns None when no renderer can be detected.
256 Returns None when no renderer can be detected.
255 """
257 """
256 def _filter(elements):
258 def _filter(elements):
257 if isinstance(exclude, (list, tuple)):
259 if isinstance(exclude, (list, tuple)):
258 return [x for x in elements if x not in exclude]
260 return [x for x in elements if x not in exclude]
259 return elements
261 return elements
260
262
261 if filename.endswith(
263 if filename.endswith(
262 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
264 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
263 return 'markdown'
265 return 'markdown'
264 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
266 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
265 return 'rst'
267 return 'rst'
266
268
267 return None
269 return None
268
270
269 def render(self, source, filename=None):
271 def render(self, source, filename=None):
270 """
272 """
271 Renders a given filename using detected renderer
273 Renders a given filename using detected renderer
272 it detects renderers based on file extension or mimetype.
274 it detects renderers based on file extension or mimetype.
273 At last it will just do a simple html replacing new lines with <br/>
275 At last it will just do a simple html replacing new lines with <br/>
274
276
275 :param file_name:
277 :param file_name:
276 :param source:
278 :param source:
277 """
279 """
278
280
279 renderer = self._detect_renderer(source, filename)
281 renderer = self._detect_renderer(source, filename)
280 readme_data = renderer(source)
282 readme_data = renderer(source)
281 return readme_data
283 return readme_data
282
284
283 @classmethod
285 @classmethod
284 def _flavored_markdown(cls, text):
286 def _flavored_markdown(cls, text):
285 """
287 """
286 Github style flavored markdown
288 Github style flavored markdown
287
289
288 :param text:
290 :param text:
289 """
291 """
290
292
291 # Extract pre blocks.
293 # Extract pre blocks.
292 extractions = {}
294 extractions = {}
293
295
294 def pre_extraction_callback(matchobj):
296 def pre_extraction_callback(matchobj):
295 digest = md5_safe(matchobj.group(0))
297 digest = md5_safe(matchobj.group(0))
296 extractions[digest] = matchobj.group(0)
298 extractions[digest] = matchobj.group(0)
297 return "{gfm-extraction-%s}" % digest
299 return "{gfm-extraction-%s}" % digest
298 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
300 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
299 text = re.sub(pattern, pre_extraction_callback, text)
301 text = re.sub(pattern, pre_extraction_callback, text)
300
302
301 # Prevent foo_bar_baz from ending up with an italic word in the middle.
303 # Prevent foo_bar_baz from ending up with an italic word in the middle.
302 def italic_callback(matchobj):
304 def italic_callback(matchobj):
303 s = matchobj.group(0)
305 s = matchobj.group(0)
304 if list(s).count('_') >= 2:
306 if list(s).count('_') >= 2:
305 return s.replace('_', r'\_')
307 return s.replace('_', r'\_')
306 return s
308 return s
307 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
309 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
308
310
309 # Insert pre block extractions.
311 # Insert pre block extractions.
310 def pre_insert_callback(matchobj):
312 def pre_insert_callback(matchobj):
311 return '\n\n' + extractions[matchobj.group(1)]
313 return '\n\n' + extractions[matchobj.group(1)]
312 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
314 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
313 pre_insert_callback, text)
315 pre_insert_callback, text)
314
316
315 return text
317 return text
316
318
317 @classmethod
319 @classmethod
318 def urlify_text(cls, text):
320 def urlify_text(cls, text):
319 def url_func(match_obj):
321 def url_func(match_obj):
320 url_full = match_obj.groups()[0]
322 url_full = match_obj.groups()[0]
321 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
323 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
322
324
323 return cls.URL_PAT.sub(url_func, text)
325 return cls.URL_PAT.sub(url_func, text)
324
326
325 @classmethod
327 @classmethod
326 def plain(cls, source, universal_newline=True, leading_newline=True):
328 def plain(cls, source, universal_newline=True, leading_newline=True):
327 source = safe_unicode(source)
329 source = safe_unicode(source)
328 if universal_newline:
330 if universal_newline:
329 newline = '\n'
331 newline = '\n'
330 source = newline.join(source.splitlines())
332 source = newline.join(source.splitlines())
331
333
332 rendered_source = cls.urlify_text(source)
334 rendered_source = cls.urlify_text(source)
333 source = ''
335 source = ''
334 if leading_newline:
336 if leading_newline:
335 source += '<br />'
337 source += '<br />'
336 source += rendered_source.replace("\n", '<br />')
338 source += rendered_source.replace("\n", '<br />')
337 return source
339 return source
338
340
339 @classmethod
341 @classmethod
340 def markdown(cls, source, safe=True, flavored=True, mentions=False,
342 def markdown(cls, source, safe=True, flavored=True, mentions=False,
341 clean_html=True):
343 clean_html=True):
342 """
344 """
343 returns markdown rendered code cleaned by the bleach library
345 returns markdown rendered code cleaned by the bleach library
344 """
346 """
345
347
346 if flavored:
348 if flavored:
347 markdown_renderer = cls.markdown_renderer_flavored
349 markdown_renderer = cls.markdown_renderer_flavored
348 else:
350 else:
349 markdown_renderer = cls.markdown_renderer
351 markdown_renderer = cls.markdown_renderer
350
352
351 if mentions:
353 if mentions:
352 mention_pat = re.compile(MENTIONS_REGEX)
354 mention_pat = re.compile(MENTIONS_REGEX)
353
355
354 def wrapp(match_obj):
356 def wrapp(match_obj):
355 uname = match_obj.groups()[0]
357 uname = match_obj.groups()[0]
356 return ' **@%(uname)s** ' % {'uname': uname}
358 return ' **@%(uname)s** ' % {'uname': uname}
357 mention_hl = mention_pat.sub(wrapp, source).strip()
359 mention_hl = mention_pat.sub(wrapp, source).strip()
358 # we extracted mentions render with this using Mentions false
360 # we extracted mentions render with this using Mentions false
359 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
361 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
360 mentions=False)
362 mentions=False)
361
363
362 source = safe_unicode(source)
364 source = safe_unicode(source)
363
365
364 try:
366 try:
365 if flavored:
367 if flavored:
366 source = cls._flavored_markdown(source)
368 source = cls._flavored_markdown(source)
367 rendered = markdown_renderer.convert(source)
369 rendered = markdown_renderer.convert(source)
368 except Exception:
370 except Exception:
369 log.exception('Error when rendering Markdown')
371 log.exception('Error when rendering Markdown')
370 if safe:
372 if safe:
371 log.debug('Fallback to render in plain mode')
373 log.debug('Fallback to render in plain mode')
372 rendered = cls.plain(source)
374 rendered = cls.plain(source)
373 else:
375 else:
374 raise
376 raise
375
377
376 if clean_html:
378 if clean_html:
377 rendered = cls.bleach_clean(rendered)
379 rendered = cls.bleach_clean(rendered)
378 return rendered
380 return rendered
379
381
380 @classmethod
382 @classmethod
381 def rst(cls, source, safe=True, mentions=False, clean_html=False):
383 def rst(cls, source, safe=True, mentions=False, clean_html=False):
382 if mentions:
384 if mentions:
383 mention_pat = re.compile(MENTIONS_REGEX)
385 mention_pat = re.compile(MENTIONS_REGEX)
384
386
385 def wrapp(match_obj):
387 def wrapp(match_obj):
386 uname = match_obj.groups()[0]
388 uname = match_obj.groups()[0]
387 return ' **@%(uname)s** ' % {'uname': uname}
389 return ' **@%(uname)s** ' % {'uname': uname}
388 mention_hl = mention_pat.sub(wrapp, source).strip()
390 mention_hl = mention_pat.sub(wrapp, source).strip()
389 # we extracted mentions render with this using Mentions false
391 # we extracted mentions render with this using Mentions false
390 return cls.rst(mention_hl, safe=safe, mentions=False)
392 return cls.rst(mention_hl, safe=safe, mentions=False)
391
393
392 source = safe_unicode(source)
394 source = safe_unicode(source)
393 try:
395 try:
394 docutils_settings = dict(
396 docutils_settings = dict(
395 [(alias, None) for alias in
397 [(alias, None) for alias in
396 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
398 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
397
399
398 docutils_settings.update({
400 docutils_settings.update({
399 'input_encoding': 'unicode', 'report_level': 4})
401 'input_encoding': 'unicode', 'report_level': 4})
400
402
401 for k, v in docutils_settings.iteritems():
403 for k, v in docutils_settings.iteritems():
402 directives.register_directive(k, v)
404 directives.register_directive(k, v)
403
405
404 parts = publish_parts(source=source,
406 parts = publish_parts(source=source,
405 writer=RhodeCodeWriter(),
407 writer=RhodeCodeWriter(),
406 settings_overrides=docutils_settings)
408 settings_overrides=docutils_settings)
407 rendered = parts["fragment"]
409 rendered = parts["fragment"]
408 if clean_html:
410 if clean_html:
409 rendered = cls.bleach_clean(rendered)
411 rendered = cls.bleach_clean(rendered)
410 return parts['html_title'] + rendered
412 return parts['html_title'] + rendered
411 except Exception:
413 except Exception:
412 log.exception('Error when rendering RST')
414 log.exception('Error when rendering RST')
413 if safe:
415 if safe:
414 log.debug('Fallbacking to render in plain mode')
416 log.debug('Fallbacking to render in plain mode')
415 return cls.plain(source)
417 return cls.plain(source)
416 else:
418 else:
417 raise
419 raise
418
420
419 @classmethod
421 @classmethod
420 def jupyter(cls, source, safe=True):
422 def jupyter(cls, source, safe=True):
421 from rhodecode.lib import helpers
423 from rhodecode.lib import helpers
422
424
423 from traitlets.config import Config
425 from traitlets.config import Config
424 import nbformat
426 import nbformat
425 from nbconvert import HTMLExporter
427 from nbconvert import HTMLExporter
426 from nbconvert.preprocessors import Preprocessor
428 from nbconvert.preprocessors import Preprocessor
427
429
428 class CustomHTMLExporter(HTMLExporter):
430 class CustomHTMLExporter(HTMLExporter):
429 def _template_file_default(self):
431 def _template_file_default(self):
430 return 'basic'
432 return 'basic'
431
433
432 class Sandbox(Preprocessor):
434 class Sandbox(Preprocessor):
433
435
434 def preprocess(self, nb, resources):
436 def preprocess(self, nb, resources):
435 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
437 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
436 for cell in nb['cells']:
438 for cell in nb['cells']:
437 if safe and 'outputs' in cell:
439 if safe and 'outputs' in cell:
438 for cell_output in cell['outputs']:
440 for cell_output in cell['outputs']:
439 if 'data' in cell_output:
441 if 'data' in cell_output:
440 if 'application/javascript' in cell_output['data']:
442 if 'application/javascript' in cell_output['data']:
441 cell_output['data']['text/plain'] = sandbox_text
443 cell_output['data']['text/plain'] = sandbox_text
442 cell_output['data'].pop('application/javascript', None)
444 cell_output['data'].pop('application/javascript', None)
443 return nb, resources
445 return nb, resources
444
446
445 def _sanitize_resources(resources):
447 def _sanitize_resources(resources):
446 """
448 """
447 Skip/sanitize some of the CSS generated and included in jupyter
449 Skip/sanitize some of the CSS generated and included in jupyter
448 so it doesn't messes up UI so much
450 so it doesn't messes up UI so much
449 """
451 """
450
452
451 # TODO(marcink): probably we should replace this with whole custom
453 # TODO(marcink): probably we should replace this with whole custom
452 # CSS set that doesn't screw up, but jupyter generated html has some
454 # CSS set that doesn't screw up, but jupyter generated html has some
453 # special markers, so it requires Custom HTML exporter template with
455 # special markers, so it requires Custom HTML exporter template with
454 # _default_template_path_default, to achieve that
456 # _default_template_path_default, to achieve that
455
457
456 # strip the reset CSS
458 # strip the reset CSS
457 resources[0] = resources[0][resources[0].find('/*! Source'):]
459 resources[0] = resources[0][resources[0].find('/*! Source'):]
458 return resources
460 return resources
459
461
460 def as_html(notebook):
462 def as_html(notebook):
461 conf = Config()
463 conf = Config()
462 conf.CustomHTMLExporter.preprocessors = [Sandbox]
464 conf.CustomHTMLExporter.preprocessors = [Sandbox]
463 html_exporter = CustomHTMLExporter(config=conf)
465 html_exporter = CustomHTMLExporter(config=conf)
464
466
465 (body, resources) = html_exporter.from_notebook_node(notebook)
467 (body, resources) = html_exporter.from_notebook_node(notebook)
466 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
468 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
467 js = MakoTemplate(r'''
469 js = MakoTemplate(r'''
468 <!-- Load mathjax -->
470 <!-- Load mathjax -->
469 <!-- MathJax configuration -->
471 <!-- MathJax configuration -->
470 <script type="text/x-mathjax-config">
472 <script type="text/x-mathjax-config">
471 MathJax.Hub.Config({
473 MathJax.Hub.Config({
472 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
474 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
473 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
475 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
474 TeX: {
476 TeX: {
475 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
477 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
476 },
478 },
477 tex2jax: {
479 tex2jax: {
478 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
480 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
479 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
481 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
480 processEscapes: true,
482 processEscapes: true,
481 processEnvironments: true
483 processEnvironments: true
482 },
484 },
483 // Center justify equations in code and markdown cells. Elsewhere
485 // Center justify equations in code and markdown cells. Elsewhere
484 // we use CSS to left justify single line equations in code cells.
486 // we use CSS to left justify single line equations in code cells.
485 displayAlign: 'center',
487 displayAlign: 'center',
486 "HTML-CSS": {
488 "HTML-CSS": {
487 styles: {'.MathJax_Display': {"margin": 0}},
489 styles: {'.MathJax_Display': {"margin": 0}},
488 linebreaks: { automatic: true },
490 linebreaks: { automatic: true },
489 availableFonts: ["STIX", "TeX"]
491 availableFonts: ["STIX", "TeX"]
490 },
492 },
491 showMathMenu: false
493 showMathMenu: false
492 });
494 });
493 </script>
495 </script>
494 <!-- End of mathjax configuration -->
496 <!-- End of mathjax configuration -->
495 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
497 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
496 ''').render(h=helpers)
498 ''').render(h=helpers)
497
499
498 css = '<style>{}</style>'.format(
500 css = '<style>{}</style>'.format(
499 ''.join(_sanitize_resources(resources['inlining']['css'])))
501 ''.join(_sanitize_resources(resources['inlining']['css'])))
500
502
501 body = '\n'.join([header, css, js, body])
503 body = '\n'.join([header, css, js, body])
502 return body, resources
504 return body, resources
503
505
504 notebook = nbformat.reads(source, as_version=4)
506 notebook = nbformat.reads(source, as_version=4)
505 (body, resources) = as_html(notebook)
507 (body, resources) = as_html(notebook)
506 return body
508 return body
507
509
508
510
509 class RstTemplateRenderer(object):
511 class RstTemplateRenderer(object):
510
512
511 def __init__(self):
513 def __init__(self):
512 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
514 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
513 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
515 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
514 self.template_store = TemplateLookup(
516 self.template_store = TemplateLookup(
515 directories=rst_template_dirs,
517 directories=rst_template_dirs,
516 input_encoding='utf-8',
518 input_encoding='utf-8',
517 imports=['from rhodecode.lib import helpers as h'])
519 imports=['from rhodecode.lib import helpers as h'])
518
520
519 def _get_template(self, templatename):
521 def _get_template(self, templatename):
520 return self.template_store.get_template(templatename)
522 return self.template_store.get_template(templatename)
521
523
522 def render(self, template_name, **kwargs):
524 def render(self, template_name, **kwargs):
523 template = self._get_template(template_name)
525 template = self._get_template(template_name)
524 return template.render(**kwargs)
526 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now