##// END OF EJS Templates
jupyter-rendering: added a custom preprocessor to implement Javascript object...
marcink -
r1495:4a485783 default
parent child Browse files
Show More
@@ -1,359 +1,375 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Renderer for markup languages with ability to parse using rst or markdown
24 24 """
25 25
26 26 import re
27 27 import os
28 28 import logging
29 29 import itertools
30 30
31 31 from mako.lookup import TemplateLookup
32 32 from mako.template import Template as MakoTemplate
33 33
34 34 from docutils.core import publish_parts
35 35 from docutils.parsers.rst import directives
36 36 import markdown
37 37
38 38 from rhodecode.lib.markdown_ext import (
39 39 UrlizeExtension, GithubFlavoredMarkdownExtension)
40 40 from rhodecode.lib.utils2 import safe_unicode, md5_safe, MENTIONS_REGEX
41 41
42 42 log = logging.getLogger(__name__)
43 43
44 44 # default renderer used to generate automated comments
45 45 DEFAULT_COMMENTS_RENDERER = 'rst'
46 46
47 47
48 48 class MarkupRenderer(object):
49 49 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
50 50
51 51 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
52 52 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
53 53 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
54 54 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
55 55
56 56 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
57 57 markdown_renderer = markdown.Markdown(
58 58 extensions, safe_mode=True, enable_attributes=False)
59 59
60 60 markdown_renderer_flavored = markdown.Markdown(
61 61 extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
62 62 enable_attributes=False)
63 63
64 64 # extension together with weights. Lower is first means we control how
65 65 # extensions are attached to readme names with those.
66 66 PLAIN_EXTS = [
67 67 # prefer no extension
68 68 ('', 0), # special case that renders READMES names without extension
69 69 ('.text', 2), ('.TEXT', 2),
70 70 ('.txt', 3), ('.TXT', 3)
71 71 ]
72 72
73 73 RST_EXTS = [
74 74 ('.rst', 1), ('.rest', 1),
75 75 ('.RST', 2), ('.REST', 2)
76 76 ]
77 77
78 78 MARKDOWN_EXTS = [
79 79 ('.md', 1), ('.MD', 1),
80 80 ('.mkdn', 2), ('.MKDN', 2),
81 81 ('.mdown', 3), ('.MDOWN', 3),
82 82 ('.markdown', 4), ('.MARKDOWN', 4)
83 83 ]
84 84
85 85 def _detect_renderer(self, source, filename=None):
86 86 """
87 87 runs detection of what renderer should be used for generating html
88 88 from a markup language
89 89
90 90 filename can be also explicitly a renderer name
91 91
92 92 :param source:
93 93 :param filename:
94 94 """
95 95
96 96 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
97 97 detected_renderer = 'markdown'
98 98 elif MarkupRenderer.RST_PAT.findall(filename):
99 99 detected_renderer = 'rst'
100 100 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
101 101 detected_renderer = 'jupyter'
102 102 elif MarkupRenderer.PLAIN_PAT.findall(filename):
103 103 detected_renderer = 'plain'
104 104 else:
105 105 detected_renderer = 'plain'
106 106
107 107 return getattr(MarkupRenderer, detected_renderer)
108 108
109 109 @classmethod
110 110 def renderer_from_filename(cls, filename, exclude):
111 111 """
112 112 Detect renderer markdown/rst from filename and optionally use exclude
113 113 list to remove some options. This is mostly used in helpers.
114 114 Returns None when no renderer can be detected.
115 115 """
116 116 def _filter(elements):
117 117 if isinstance(exclude, (list, tuple)):
118 118 return [x for x in elements if x not in exclude]
119 119 return elements
120 120
121 121 if filename.endswith(
122 122 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
123 123 return 'markdown'
124 124 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
125 125 return 'rst'
126 126
127 127 return None
128 128
129 129 def render(self, source, filename=None):
130 130 """
131 131 Renders a given filename using detected renderer
132 132 it detects renderers based on file extension or mimetype.
133 133 At last it will just do a simple html replacing new lines with <br/>
134 134
135 135 :param file_name:
136 136 :param source:
137 137 """
138 138
139 139 renderer = self._detect_renderer(source, filename)
140 140 readme_data = renderer(source)
141 141 return readme_data
142 142
143 143 @classmethod
144 144 def _flavored_markdown(cls, text):
145 145 """
146 146 Github style flavored markdown
147 147
148 148 :param text:
149 149 """
150 150
151 151 # Extract pre blocks.
152 152 extractions = {}
153 153
154 154 def pre_extraction_callback(matchobj):
155 155 digest = md5_safe(matchobj.group(0))
156 156 extractions[digest] = matchobj.group(0)
157 157 return "{gfm-extraction-%s}" % digest
158 158 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
159 159 text = re.sub(pattern, pre_extraction_callback, text)
160 160
161 161 # Prevent foo_bar_baz from ending up with an italic word in the middle.
162 162 def italic_callback(matchobj):
163 163 s = matchobj.group(0)
164 164 if list(s).count('_') >= 2:
165 165 return s.replace('_', r'\_')
166 166 return s
167 167 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
168 168
169 169 # Insert pre block extractions.
170 170 def pre_insert_callback(matchobj):
171 171 return '\n\n' + extractions[matchobj.group(1)]
172 172 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
173 173 pre_insert_callback, text)
174 174
175 175 return text
176 176
177 177 @classmethod
178 178 def urlify_text(cls, text):
179 179 url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
180 180 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
181 181
182 182 def url_func(match_obj):
183 183 url_full = match_obj.groups()[0]
184 184 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
185 185
186 186 return url_pat.sub(url_func, text)
187 187
188 188 @classmethod
189 189 def plain(cls, source, universal_newline=True):
190 190 source = safe_unicode(source)
191 191 if universal_newline:
192 192 newline = '\n'
193 193 source = newline.join(source.splitlines())
194 194
195 195 source = cls.urlify_text(source)
196 196 return '<br />' + source.replace("\n", '<br />')
197 197
198 198 @classmethod
199 199 def markdown(cls, source, safe=True, flavored=True, mentions=False):
200 200 # It does not allow to insert inline HTML. In presence of HTML tags, it
201 201 # will replace them instead with [HTML_REMOVED]. This is controlled by
202 202 # the safe_mode=True parameter of the markdown method.
203 203
204 204 if flavored:
205 205 markdown_renderer = cls.markdown_renderer_flavored
206 206 else:
207 207 markdown_renderer = cls.markdown_renderer
208 208
209 209 if mentions:
210 210 mention_pat = re.compile(MENTIONS_REGEX)
211 211
212 212 def wrapp(match_obj):
213 213 uname = match_obj.groups()[0]
214 214 return ' **@%(uname)s** ' % {'uname': uname}
215 215 mention_hl = mention_pat.sub(wrapp, source).strip()
216 216 # we extracted mentions render with this using Mentions false
217 217 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
218 218 mentions=False)
219 219
220 220 source = safe_unicode(source)
221 221 try:
222 222 if flavored:
223 223 source = cls._flavored_markdown(source)
224 224 return markdown_renderer.convert(source)
225 225 except Exception:
226 226 log.exception('Error when rendering Markdown')
227 227 if safe:
228 228 log.debug('Fallback to render in plain mode')
229 229 return cls.plain(source)
230 230 else:
231 231 raise
232 232
233 233 @classmethod
234 234 def rst(cls, source, safe=True, mentions=False):
235 235 if mentions:
236 236 mention_pat = re.compile(MENTIONS_REGEX)
237 237
238 238 def wrapp(match_obj):
239 239 uname = match_obj.groups()[0]
240 240 return ' **@%(uname)s** ' % {'uname': uname}
241 241 mention_hl = mention_pat.sub(wrapp, source).strip()
242 242 # we extracted mentions render with this using Mentions false
243 243 return cls.rst(mention_hl, safe=safe, mentions=False)
244 244
245 245 source = safe_unicode(source)
246 246 try:
247 247 docutils_settings = dict(
248 248 [(alias, None) for alias in
249 249 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
250 250
251 251 docutils_settings.update({'input_encoding': 'unicode',
252 252 'report_level': 4})
253 253
254 254 for k, v in docutils_settings.iteritems():
255 255 directives.register_directive(k, v)
256 256
257 257 parts = publish_parts(source=source,
258 258 writer_name="html4css1",
259 259 settings_overrides=docutils_settings)
260 260
261 261 return parts['html_title'] + parts["fragment"]
262 262 except Exception:
263 263 log.exception('Error when rendering RST')
264 264 if safe:
265 265 log.debug('Fallbacking to render in plain mode')
266 266 return cls.plain(source)
267 267 else:
268 268 raise
269 269
270 270 @classmethod
271 def jupyter(cls, source):
271 def jupyter(cls, source, safe=True):
272 272 from rhodecode.lib import helpers
273
274 from traitlets.config import Config
273 275 import nbformat
274 276 from nbconvert import HTMLExporter
275 from traitlets.config import Config
277 from nbconvert.preprocessors import Preprocessor
276 278
277 279 class CustomHTMLExporter(HTMLExporter):
278 280 def _template_file_default(self):
279 281 return 'basic'
280 282
283 class Sandbox(Preprocessor):
284
285 def preprocess(self, nb, resources):
286 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
287 for cell in nb['cells']:
288 if safe and 'outputs' in cell:
289 for cell_output in cell['outputs']:
290 if 'data' in cell_output:
291 if 'application/javascript' in cell_output['data']:
292 cell_output['data']['text/plain'] = sandbox_text
293 cell_output['data'].pop('application/javascript', None)
294 return nb, resources
295
281 296 def _sanitize_resources(resources):
282 297 """
283 298 Skip/sanitize some of the CSS generated and included in jupyter
284 299 so it doesn't messes up UI so much
285 300 """
286 301
287 302 # TODO(marcink): probably we should replace this with whole custom
288 303 # CSS set that doesn't screw up, but jupyter generated html has some
289 304 # special markers, so it requires Custom HTML exporter template with
290 305 # _default_template_path_default, to achieve that
291 306
292 307 # strip the reset CSS
293 308 resources[0] = resources[0][resources[0].find('/*! Source'):]
294 309 return resources
295 310
296 311 def as_html(notebook):
297 312 conf = Config()
313 conf.CustomHTMLExporter.preprocessors = [Sandbox]
298 314 html_exporter = CustomHTMLExporter(config=conf)
299 315
300 316 (body, resources) = html_exporter.from_notebook_node(notebook)
301 317 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
302 318 js = MakoTemplate(r'''
303 319 <!-- Load mathjax -->
304 320 <!-- MathJax configuration -->
305 321 <script type="text/x-mathjax-config">
306 322 MathJax.Hub.Config({
307 323 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
308 324 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
309 325 TeX: {
310 326 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
311 327 },
312 328 tex2jax: {
313 329 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
314 330 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
315 331 processEscapes: true,
316 332 processEnvironments: true
317 333 },
318 334 // Center justify equations in code and markdown cells. Elsewhere
319 335 // we use CSS to left justify single line equations in code cells.
320 336 displayAlign: 'center',
321 337 "HTML-CSS": {
322 338 styles: {'.MathJax_Display': {"margin": 0}},
323 339 linebreaks: { automatic: true },
324 340 availableFonts: ["STIX", "TeX"]
325 341 },
326 342 showMathMenu: false
327 343 });
328 344 </script>
329 345 <!-- End of mathjax configuration -->
330 346 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
331 347 ''').render(h=helpers)
332 348
333 349 css = '<style>{}</style>'.format(
334 350 ''.join(_sanitize_resources(resources['inlining']['css'])))
335 351
336 352 body = '\n'.join([header, css, js, body])
337 353 return body, resources
338 354
339 355 notebook = nbformat.reads(source, as_version=4)
340 356 (body, resources) = as_html(notebook)
341 357 return body
342 358
343 359
344 360 class RstTemplateRenderer(object):
345 361
346 362 def __init__(self):
347 363 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
348 364 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
349 365 self.template_store = TemplateLookup(
350 366 directories=rst_template_dirs,
351 367 input_encoding='utf-8',
352 368 imports=['from rhodecode.lib import helpers as h'])
353 369
354 370 def _get_template(self, templatename):
355 371 return self.template_store.get_template(templatename)
356 372
357 373 def render(self, template_name, **kwargs):
358 374 template = self._get_template(template_name)
359 375 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now