##// END OF EJS Templates
markdown-renderer: use lazy loaded markdown renderers initialization....
marcink -
r3239:1b708774 default
parent child Browse files
Show More
@@ -1,534 +1,557 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2018 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Renderer for markup languages with ability to parse using rst or markdown
24 24 """
25 25
26 26 import re
27 27 import os
28 28 import lxml
29 29 import logging
30 30 import urlparse
31 31 import bleach
32 32
33 33 from mako.lookup import TemplateLookup
34 34 from mako.template import Template as MakoTemplate
35 35
36 36 from docutils.core import publish_parts
37 37 from docutils.parsers.rst import directives
38 38 from docutils import writers
39 39 from docutils.writers import html4css1
40 40 import markdown
41 41
42 42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import (
44 safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
45 44
46 45 log = logging.getLogger(__name__)
47 46
48 47 # default renderer used to generate automated comments
49 48 DEFAULT_COMMENTS_RENDERER = 'rst'
50 49
51 50
52 51 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
53 52 """
54 53 Custom HTML Translator used for sandboxing potential
55 54 JS injections in ref links
56 55 """
57 56
58 57 def visit_reference(self, node):
59 58 if 'refuri' in node.attributes:
60 59 refuri = node['refuri']
61 60 if ':' in refuri:
62 61 prefix, link = refuri.lstrip().split(':', 1)
63 62 prefix = prefix or ''
64 63
65 64 if prefix.lower() == 'javascript':
66 65 # we don't allow javascript type of refs...
67 66 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
68 67
69 68 # old style class requires this...
70 69 return html4css1.HTMLTranslator.visit_reference(self, node)
71 70
72 71
73 72 class RhodeCodeWriter(writers.html4css1.Writer):
74 73 def __init__(self):
75 74 writers.Writer.__init__(self)
76 75 self.translator_class = CustomHTMLTranslator
77 76
78 77
79 78 def relative_links(html_source, server_paths):
80 79 if not html_source:
81 80 return html_source
82 81
83 82 try:
84 83 from lxml.html import fromstring
85 84 from lxml.html import tostring
86 85 except ImportError:
87 86 log.exception('Failed to import lxml')
88 87 return html_source
89 88
90 89 try:
91 90 doc = lxml.html.fromstring(html_source)
92 91 except Exception:
93 92 return html_source
94 93
95 94 for el in doc.cssselect('img, video'):
96 95 src = el.attrib.get('src')
97 96 if src:
98 97 el.attrib['src'] = relative_path(src, server_paths['raw'])
99 98
100 99 for el in doc.cssselect('a:not(.gfm)'):
101 100 src = el.attrib.get('href')
102 101 if src:
103 102 raw_mode = el.attrib['href'].endswith('?raw=1')
104 103 if raw_mode:
105 104 el.attrib['href'] = relative_path(src, server_paths['raw'])
106 105 else:
107 106 el.attrib['href'] = relative_path(src, server_paths['standard'])
108 107
109 108 return lxml.html.tostring(doc)
110 109
111 110
112 111 def relative_path(path, request_path, is_repo_file=None):
113 112 """
114 113 relative link support, path is a rel path, and request_path is current
115 114 server path (not absolute)
116 115
117 116 e.g.
118 117
119 118 path = '../logo.png'
120 119 request_path= '/repo/files/path/file.md'
121 120 produces: '/repo/files/logo.png'
122 121 """
123 122 # TODO(marcink): unicode/str support ?
124 123 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
125 124
126 125 def dummy_check(p):
127 126 return True # assume default is a valid file path
128 127
129 128 is_repo_file = is_repo_file or dummy_check
130 129 if not path:
131 130 return request_path
132 131
133 132 path = safe_unicode(path)
134 133 request_path = safe_unicode(request_path)
135 134
136 135 if path.startswith((u'data:', u'javascript:', u'#', u':')):
137 136 # skip data, anchor, invalid links
138 137 return path
139 138
140 139 is_absolute = bool(urlparse.urlparse(path).netloc)
141 140 if is_absolute:
142 141 return path
143 142
144 143 if not request_path:
145 144 return path
146 145
147 146 if path.startswith(u'/'):
148 147 path = path[1:]
149 148
150 149 if path.startswith(u'./'):
151 150 path = path[2:]
152 151
153 152 parts = request_path.split('/')
154 153 # compute how deep we need to traverse the request_path
155 154 depth = 0
156 155
157 156 if is_repo_file(request_path):
158 157 # if request path is a VALID file, we use a relative path with
159 158 # one level up
160 159 depth += 1
161 160
162 161 while path.startswith(u'../'):
163 162 depth += 1
164 163 path = path[3:]
165 164
166 165 if depth > 0:
167 166 parts = parts[:-depth]
168 167
169 168 parts.append(path)
170 169 final_path = u'/'.join(parts).lstrip(u'/')
171 170
172 171 return u'/' + final_path
173 172
174 173
174 _cached_markdown_renderer = None
175
176
177 def get_markdown_renderer(extensions, output_format):
178 global _cached_markdown_renderer
179
180 if _cached_markdown_renderer is None:
181 _cached_markdown_renderer = markdown.Markdown(
182 extensions=extensions,
183 enable_attributes=False, output_format=output_format)
184 return _cached_markdown_renderer
185
186
187 _cached_markdown_renderer_flavored = None
188
189
190 def get_markdown_renderer_flavored(extensions, output_format):
191 global _cached_markdown_renderer_flavored
192
193 if _cached_markdown_renderer_flavored is None:
194 _cached_markdown_renderer_flavored = markdown.Markdown(
195 extensions=extensions + [GithubFlavoredMarkdownExtension()],
196 enable_attributes=False, output_format=output_format)
197 return _cached_markdown_renderer_flavored
198
199
175 200 class MarkupRenderer(object):
176 201 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
177 202
178 203 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
179 204 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
180 205 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
181 206 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
182 207
183 208 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
184 209 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
185 210
186 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
211 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
212 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
213
187 214 output_format = 'html4'
188 markdown_renderer = markdown.Markdown(
189 extensions, enable_attributes=False, output_format=output_format)
190
191 markdown_renderer_flavored = markdown.Markdown(
192 extensions + [GithubFlavoredMarkdownExtension()],
193 enable_attributes=False, output_format=output_format)
194 215
195 216 # extension together with weights. Lower is first means we control how
196 217 # extensions are attached to readme names with those.
197 218 PLAIN_EXTS = [
198 219 # prefer no extension
199 220 ('', 0), # special case that renders READMES names without extension
200 221 ('.text', 2), ('.TEXT', 2),
201 222 ('.txt', 3), ('.TXT', 3)
202 223 ]
203 224
204 225 RST_EXTS = [
205 226 ('.rst', 1), ('.rest', 1),
206 227 ('.RST', 2), ('.REST', 2)
207 228 ]
208 229
209 230 MARKDOWN_EXTS = [
210 231 ('.md', 1), ('.MD', 1),
211 232 ('.mkdn', 2), ('.MKDN', 2),
212 233 ('.mdown', 3), ('.MDOWN', 3),
213 234 ('.markdown', 4), ('.MARKDOWN', 4)
214 235 ]
215 236
216 237 def _detect_renderer(self, source, filename=None):
217 238 """
218 239 runs detection of what renderer should be used for generating html
219 240 from a markup language
220 241
221 242 filename can be also explicitly a renderer name
222 243
223 244 :param source:
224 245 :param filename:
225 246 """
226 247
227 248 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
228 249 detected_renderer = 'markdown'
229 250 elif MarkupRenderer.RST_PAT.findall(filename):
230 251 detected_renderer = 'rst'
231 252 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
232 253 detected_renderer = 'jupyter'
233 254 elif MarkupRenderer.PLAIN_PAT.findall(filename):
234 255 detected_renderer = 'plain'
235 256 else:
236 257 detected_renderer = 'plain'
237 258
238 259 return getattr(MarkupRenderer, detected_renderer)
239 260
240 261 @classmethod
241 262 def bleach_clean(cls, text):
242 263 from .bleach_whitelist import markdown_attrs, markdown_tags
243 264 allowed_tags = markdown_tags
244 265 allowed_attrs = markdown_attrs
245 266
246 267 try:
247 268 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
248 269 except Exception:
249 270 return 'UNPARSEABLE TEXT'
250 271
251 272 @classmethod
252 273 def renderer_from_filename(cls, filename, exclude):
253 274 """
254 275 Detect renderer markdown/rst from filename and optionally use exclude
255 276 list to remove some options. This is mostly used in helpers.
256 277 Returns None when no renderer can be detected.
257 278 """
258 279 def _filter(elements):
259 280 if isinstance(exclude, (list, tuple)):
260 281 return [x for x in elements if x not in exclude]
261 282 return elements
262 283
263 284 if filename.endswith(
264 285 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
265 286 return 'markdown'
266 287 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
267 288 return 'rst'
268 289
269 290 return None
270 291
271 292 def render(self, source, filename=None):
272 293 """
273 294 Renders a given filename using detected renderer
274 295 it detects renderers based on file extension or mimetype.
275 296 At last it will just do a simple html replacing new lines with <br/>
276 297
277 298 :param file_name:
278 299 :param source:
279 300 """
280 301
281 302 renderer = self._detect_renderer(source, filename)
282 303 readme_data = renderer(source)
283 304 return readme_data
284 305
285 306 @classmethod
286 307 def _flavored_markdown(cls, text):
287 308 """
288 309 Github style flavored markdown
289 310
290 311 :param text:
291 312 """
292 313
293 314 # Extract pre blocks.
294 315 extractions = {}
295 316
296 317 def pre_extraction_callback(matchobj):
297 318 digest = md5_safe(matchobj.group(0))
298 319 extractions[digest] = matchobj.group(0)
299 320 return "{gfm-extraction-%s}" % digest
300 321 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
301 322 text = re.sub(pattern, pre_extraction_callback, text)
302 323
303 324 # Prevent foo_bar_baz from ending up with an italic word in the middle.
304 325 def italic_callback(matchobj):
305 326 s = matchobj.group(0)
306 327 if list(s).count('_') >= 2:
307 328 return s.replace('_', r'\_')
308 329 return s
309 330 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
310 331
311 332 # Insert pre block extractions.
312 333 def pre_insert_callback(matchobj):
313 334 return '\n\n' + extractions[matchobj.group(1)]
314 335 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
315 336 pre_insert_callback, text)
316 337
317 338 return text
318 339
319 340 @classmethod
320 341 def urlify_text(cls, text):
321 342 def url_func(match_obj):
322 343 url_full = match_obj.groups()[0]
323 344 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
324 345
325 346 return cls.URL_PAT.sub(url_func, text)
326 347
327 348 @classmethod
328 349 def plain(cls, source, universal_newline=True, leading_newline=True):
329 350 source = safe_unicode(source)
330 351 if universal_newline:
331 352 newline = '\n'
332 353 source = newline.join(source.splitlines())
333 354
334 355 rendered_source = cls.urlify_text(source)
335 356 source = ''
336 357 if leading_newline:
337 358 source += '<br />'
338 359 source += rendered_source.replace("\n", '<br />')
339 360 return source
340 361
341 362 @classmethod
342 363 def markdown(cls, source, safe=True, flavored=True, mentions=False,
343 364 clean_html=True):
344 365 """
345 366 returns markdown rendered code cleaned by the bleach library
346 367 """
347 368
348 369 if flavored:
349 markdown_renderer = cls.markdown_renderer_flavored
370 markdown_renderer = get_markdown_renderer_flavored(
371 cls.extensions, cls.output_format)
350 372 else:
351 markdown_renderer = cls.markdown_renderer
373 markdown_renderer = get_markdown_renderer(
374 cls.extensions, cls.output_format)
352 375
353 376 if mentions:
354 377 mention_pat = re.compile(MENTIONS_REGEX)
355 378
356 379 def wrapp(match_obj):
357 380 uname = match_obj.groups()[0]
358 381 return ' **@%(uname)s** ' % {'uname': uname}
359 382 mention_hl = mention_pat.sub(wrapp, source).strip()
360 383 # we extracted mentions render with this using Mentions false
361 384 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
362 385 mentions=False)
363 386
364 387 source = safe_unicode(source)
365 388
366 389 try:
367 390 if flavored:
368 391 source = cls._flavored_markdown(source)
369 392 rendered = markdown_renderer.convert(source)
370 393 except Exception:
371 394 log.exception('Error when rendering Markdown')
372 395 if safe:
373 396 log.debug('Fallback to render in plain mode')
374 397 rendered = cls.plain(source)
375 398 else:
376 399 raise
377 400
378 401 if clean_html:
379 402 rendered = cls.bleach_clean(rendered)
380 403 return rendered
381 404
382 405 @classmethod
383 406 def rst(cls, source, safe=True, mentions=False, clean_html=False):
384 407 if mentions:
385 408 mention_pat = re.compile(MENTIONS_REGEX)
386 409
387 410 def wrapp(match_obj):
388 411 uname = match_obj.groups()[0]
389 412 return ' **@%(uname)s** ' % {'uname': uname}
390 413 mention_hl = mention_pat.sub(wrapp, source).strip()
391 414 # we extracted mentions render with this using Mentions false
392 415 return cls.rst(mention_hl, safe=safe, mentions=False)
393 416
394 417 source = safe_unicode(source)
395 418 try:
396 419 docutils_settings = dict(
397 420 [(alias, None) for alias in
398 421 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
399 422
400 423 docutils_settings.update({
401 424 'input_encoding': 'unicode', 'report_level': 4})
402 425
403 426 for k, v in docutils_settings.iteritems():
404 427 directives.register_directive(k, v)
405 428
406 429 parts = publish_parts(source=source,
407 430 writer=RhodeCodeWriter(),
408 431 settings_overrides=docutils_settings)
409 432 rendered = parts["fragment"]
410 433 if clean_html:
411 434 rendered = cls.bleach_clean(rendered)
412 435 return parts['html_title'] + rendered
413 436 except Exception:
414 437 log.exception('Error when rendering RST')
415 438 if safe:
416 439 log.debug('Fallbacking to render in plain mode')
417 440 return cls.plain(source)
418 441 else:
419 442 raise
420 443
421 444 @classmethod
422 445 def jupyter(cls, source, safe=True):
423 446 from rhodecode.lib import helpers
424 447
425 448 from traitlets.config import Config
426 449 import nbformat
427 450 from nbconvert import HTMLExporter
428 451 from nbconvert.preprocessors import Preprocessor
429 452
430 453 class CustomHTMLExporter(HTMLExporter):
431 454 def _template_file_default(self):
432 455 return 'basic'
433 456
434 457 class Sandbox(Preprocessor):
435 458
436 459 def preprocess(self, nb, resources):
437 460 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
438 461 for cell in nb['cells']:
439 462 if not safe:
440 463 continue
441 464
442 465 if 'outputs' in cell:
443 466 for cell_output in cell['outputs']:
444 467 if 'data' in cell_output:
445 468 if 'application/javascript' in cell_output['data']:
446 469 cell_output['data']['text/plain'] = sandbox_text
447 470 cell_output['data'].pop('application/javascript', None)
448 471
449 472 if 'source' in cell and cell['cell_type'] == 'markdown':
450 473 # sanitize similar like in markdown
451 474 cell['source'] = cls.bleach_clean(cell['source'])
452 475
453 476 return nb, resources
454 477
455 def _sanitize_resources(resources):
478 def _sanitize_resources(input_resources):
456 479 """
457 480 Skip/sanitize some of the CSS generated and included in jupyter
458 481 so it doesn't messes up UI so much
459 482 """
460 483
461 484 # TODO(marcink): probably we should replace this with whole custom
462 485 # CSS set that doesn't screw up, but jupyter generated html has some
463 486 # special markers, so it requires Custom HTML exporter template with
464 487 # _default_template_path_default, to achieve that
465 488
466 489 # strip the reset CSS
467 resources[0] = resources[0][resources[0].find('/*! Source'):]
468 return resources
490 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
491 return input_resources
469 492
470 493 def as_html(notebook):
471 494 conf = Config()
472 495 conf.CustomHTMLExporter.preprocessors = [Sandbox]
473 496 html_exporter = CustomHTMLExporter(config=conf)
474 497
475 498 (body, resources) = html_exporter.from_notebook_node(notebook)
476 499 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
477 500 js = MakoTemplate(r'''
478 501 <!-- Load mathjax -->
479 502 <!-- MathJax configuration -->
480 503 <script type="text/x-mathjax-config">
481 504 MathJax.Hub.Config({
482 505 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
483 506 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
484 507 TeX: {
485 508 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
486 509 },
487 510 tex2jax: {
488 511 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
489 512 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
490 513 processEscapes: true,
491 514 processEnvironments: true
492 515 },
493 516 // Center justify equations in code and markdown cells. Elsewhere
494 517 // we use CSS to left justify single line equations in code cells.
495 518 displayAlign: 'center',
496 519 "HTML-CSS": {
497 520 styles: {'.MathJax_Display': {"margin": 0}},
498 521 linebreaks: { automatic: true },
499 522 availableFonts: ["STIX", "TeX"]
500 523 },
501 524 showMathMenu: false
502 525 });
503 526 </script>
504 527 <!-- End of mathjax configuration -->
505 528 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
506 529 ''').render(h=helpers)
507 530
508 531 css = '<style>{}</style>'.format(
509 532 ''.join(_sanitize_resources(resources['inlining']['css'])))
510 533
511 534 body = '\n'.join([header, css, js, body])
512 535 return body, resources
513 536
514 537 notebook = nbformat.reads(source, as_version=4)
515 538 (body, resources) = as_html(notebook)
516 539 return body
517 540
518 541
519 542 class RstTemplateRenderer(object):
520 543
521 544 def __init__(self):
522 545 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
523 546 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
524 547 self.template_store = TemplateLookup(
525 548 directories=rst_template_dirs,
526 549 input_encoding='utf-8',
527 550 imports=['from rhodecode.lib import helpers as h'])
528 551
529 552 def _get_template(self, templatename):
530 553 return self.template_store.get_template(templatename)
531 554
532 555 def render(self, template_name, **kwargs):
533 556 template = self._get_template(template_name)
534 557 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now