##// END OF EJS Templates
markup-renderers: fixed code highlite for rst
marcink -
r4117:81d225a3 default
parent child Browse files
Show More
@@ -1,559 +1,564 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2019 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Renderer for markup languages with ability to parse using rst or markdown
24 24 """
25 25
26 26 import re
27 27 import os
28 28 import lxml
29 29 import logging
30 30 import urlparse
31 31 import bleach
32 32
33 33 from mako.lookup import TemplateLookup
34 34 from mako.template import Template as MakoTemplate
35 35
36 36 from docutils.core import publish_parts
37 37 from docutils.parsers.rst import directives
38 38 from docutils import writers
39 39 from docutils.writers import html4css1
40 40 import markdown
41 41
42 42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
44 44
45 45 log = logging.getLogger(__name__)
46 46
47 47 # default renderer used to generate automated comments
48 48 DEFAULT_COMMENTS_RENDERER = 'rst'
49 49
50 50
51 51 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
52 52 """
53 53 Custom HTML Translator used for sandboxing potential
54 54 JS injections in ref links
55 55 """
56 def visit_literal_block(self, node):
57 self.body.append(self.starttag(node, 'pre', CLASS='codehilite literal-block'))
56 58
57 59 def visit_reference(self, node):
58 60 if 'refuri' in node.attributes:
59 61 refuri = node['refuri']
60 62 if ':' in refuri:
61 63 prefix, link = refuri.lstrip().split(':', 1)
62 64 prefix = prefix or ''
63 65
64 66 if prefix.lower() == 'javascript':
65 67 # we don't allow javascript type of refs...
66 68 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
67 69
68 70 # old style class requires this...
69 71 return html4css1.HTMLTranslator.visit_reference(self, node)
70 72
71 73
72 74 class RhodeCodeWriter(writers.html4css1.Writer):
73 75 def __init__(self):
74 76 writers.Writer.__init__(self)
75 77 self.translator_class = CustomHTMLTranslator
76 78
77 79
78 80 def relative_links(html_source, server_paths):
79 81 if not html_source:
80 82 return html_source
81 83
82 84 try:
83 85 from lxml.html import fromstring
84 86 from lxml.html import tostring
85 87 except ImportError:
86 88 log.exception('Failed to import lxml')
87 89 return html_source
88 90
89 91 try:
90 92 doc = lxml.html.fromstring(html_source)
91 93 except Exception:
92 94 return html_source
93 95
94 96 for el in doc.cssselect('img, video'):
95 97 src = el.attrib.get('src')
96 98 if src:
97 99 el.attrib['src'] = relative_path(src, server_paths['raw'])
98 100
99 101 for el in doc.cssselect('a:not(.gfm)'):
100 102 src = el.attrib.get('href')
101 103 if src:
102 104 raw_mode = el.attrib['href'].endswith('?raw=1')
103 105 if raw_mode:
104 106 el.attrib['href'] = relative_path(src, server_paths['raw'])
105 107 else:
106 108 el.attrib['href'] = relative_path(src, server_paths['standard'])
107 109
108 110 return lxml.html.tostring(doc)
109 111
110 112
111 113 def relative_path(path, request_path, is_repo_file=None):
112 114 """
113 115 relative link support, path is a rel path, and request_path is current
114 116 server path (not absolute)
115 117
116 118 e.g.
117 119
118 120 path = '../logo.png'
119 121 request_path= '/repo/files/path/file.md'
120 122 produces: '/repo/files/logo.png'
121 123 """
122 124 # TODO(marcink): unicode/str support ?
123 125 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
124 126
125 127 def dummy_check(p):
126 128 return True # assume default is a valid file path
127 129
128 130 is_repo_file = is_repo_file or dummy_check
129 131 if not path:
130 132 return request_path
131 133
132 134 path = safe_unicode(path)
133 135 request_path = safe_unicode(request_path)
134 136
135 137 if path.startswith((u'data:', u'javascript:', u'#', u':')):
136 138 # skip data, anchor, invalid links
137 139 return path
138 140
139 141 is_absolute = bool(urlparse.urlparse(path).netloc)
140 142 if is_absolute:
141 143 return path
142 144
143 145 if not request_path:
144 146 return path
145 147
146 148 if path.startswith(u'/'):
147 149 path = path[1:]
148 150
149 151 if path.startswith(u'./'):
150 152 path = path[2:]
151 153
152 154 parts = request_path.split('/')
153 155 # compute how deep we need to traverse the request_path
154 156 depth = 0
155 157
156 158 if is_repo_file(request_path):
157 159 # if request path is a VALID file, we use a relative path with
158 160 # one level up
159 161 depth += 1
160 162
161 163 while path.startswith(u'../'):
162 164 depth += 1
163 165 path = path[3:]
164 166
165 167 if depth > 0:
166 168 parts = parts[:-depth]
167 169
168 170 parts.append(path)
169 171 final_path = u'/'.join(parts).lstrip(u'/')
170 172
171 173 return u'/' + final_path
172 174
173 175
174 176 _cached_markdown_renderer = None
175 177
176 178
177 179 def get_markdown_renderer(extensions, output_format):
178 180 global _cached_markdown_renderer
179 181
180 182 if _cached_markdown_renderer is None:
181 183 _cached_markdown_renderer = markdown.Markdown(
182 184 extensions=extensions,
183 185 enable_attributes=False, output_format=output_format)
184 186 return _cached_markdown_renderer
185 187
186 188
187 189 _cached_markdown_renderer_flavored = None
188 190
189 191
190 192 def get_markdown_renderer_flavored(extensions, output_format):
191 193 global _cached_markdown_renderer_flavored
192 194
193 195 if _cached_markdown_renderer_flavored is None:
194 196 _cached_markdown_renderer_flavored = markdown.Markdown(
195 197 extensions=extensions + [GithubFlavoredMarkdownExtension()],
196 198 enable_attributes=False, output_format=output_format)
197 199 return _cached_markdown_renderer_flavored
198 200
199 201
200 202 class MarkupRenderer(object):
201 203 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
202 204
203 205 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
204 206 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
205 207 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
206 208 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
207 209
208 210 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
209 211 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
210 212
211 213 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
212 214 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
213 215
214 216 output_format = 'html4'
215 217
216 218 # extension together with weights. Lower is first means we control how
217 219 # extensions are attached to readme names with those.
218 220 PLAIN_EXTS = [
219 221 # prefer no extension
220 222 ('', 0), # special case that renders READMES names without extension
221 223 ('.text', 2), ('.TEXT', 2),
222 224 ('.txt', 3), ('.TXT', 3)
223 225 ]
224 226
225 227 RST_EXTS = [
226 228 ('.rst', 1), ('.rest', 1),
227 229 ('.RST', 2), ('.REST', 2)
228 230 ]
229 231
230 232 MARKDOWN_EXTS = [
231 233 ('.md', 1), ('.MD', 1),
232 234 ('.mkdn', 2), ('.MKDN', 2),
233 235 ('.mdown', 3), ('.MDOWN', 3),
234 236 ('.markdown', 4), ('.MARKDOWN', 4)
235 237 ]
236 238
237 239 def _detect_renderer(self, source, filename=None):
238 240 """
239 241 runs detection of what renderer should be used for generating html
240 242 from a markup language
241 243
242 244 filename can be also explicitly a renderer name
243 245
244 246 :param source:
245 247 :param filename:
246 248 """
247 249
248 250 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
249 251 detected_renderer = 'markdown'
250 252 elif MarkupRenderer.RST_PAT.findall(filename):
251 253 detected_renderer = 'rst'
252 254 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
253 255 detected_renderer = 'jupyter'
254 256 elif MarkupRenderer.PLAIN_PAT.findall(filename):
255 257 detected_renderer = 'plain'
256 258 else:
257 259 detected_renderer = 'plain'
258 260
259 261 return getattr(MarkupRenderer, detected_renderer)
260 262
261 263 @classmethod
262 264 def bleach_clean(cls, text):
263 265 from .bleach_whitelist import markdown_attrs, markdown_tags
264 266 allowed_tags = markdown_tags
265 267 allowed_attrs = markdown_attrs
266 268
267 269 try:
268 270 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
269 271 except Exception:
270 272 return 'UNPARSEABLE TEXT'
271 273
272 274 @classmethod
273 275 def renderer_from_filename(cls, filename, exclude):
274 276 """
275 277 Detect renderer markdown/rst from filename and optionally use exclude
276 278 list to remove some options. This is mostly used in helpers.
277 279 Returns None when no renderer can be detected.
278 280 """
279 281 def _filter(elements):
280 282 if isinstance(exclude, (list, tuple)):
281 283 return [x for x in elements if x not in exclude]
282 284 return elements
283 285
284 286 if filename.endswith(
285 287 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
286 288 return 'markdown'
287 289 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
288 290 return 'rst'
289 291
290 292 return None
291 293
292 294 def render(self, source, filename=None):
293 295 """
294 296 Renders a given filename using detected renderer
295 297 it detects renderers based on file extension or mimetype.
296 298 At last it will just do a simple html replacing new lines with <br/>
297 299
298 300 :param file_name:
299 301 :param source:
300 302 """
301 303
302 304 renderer = self._detect_renderer(source, filename)
303 305 readme_data = renderer(source)
304 306 return readme_data
305 307
306 308 @classmethod
307 309 def _flavored_markdown(cls, text):
308 310 """
309 311 Github style flavored markdown
310 312
311 313 :param text:
312 314 """
313 315
314 316 # Extract pre blocks.
315 317 extractions = {}
316 318
317 319 def pre_extraction_callback(matchobj):
318 320 digest = md5_safe(matchobj.group(0))
319 321 extractions[digest] = matchobj.group(0)
320 322 return "{gfm-extraction-%s}" % digest
321 323 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
322 324 text = re.sub(pattern, pre_extraction_callback, text)
323 325
324 326 # Prevent foo_bar_baz from ending up with an italic word in the middle.
325 327 def italic_callback(matchobj):
326 328 s = matchobj.group(0)
327 329 if list(s).count('_') >= 2:
328 330 return s.replace('_', r'\_')
329 331 return s
330 332 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
331 333
332 334 # Insert pre block extractions.
333 335 def pre_insert_callback(matchobj):
334 336 return '\n\n' + extractions[matchobj.group(1)]
335 337 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
336 338 pre_insert_callback, text)
337 339
338 340 return text
339 341
340 342 @classmethod
341 343 def urlify_text(cls, text):
342 344 def url_func(match_obj):
343 345 url_full = match_obj.groups()[0]
344 346 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
345 347
346 348 return cls.URL_PAT.sub(url_func, text)
347 349
348 350 @classmethod
349 351 def plain(cls, source, universal_newline=True, leading_newline=True):
350 352 source = safe_unicode(source)
351 353 if universal_newline:
352 354 newline = '\n'
353 355 source = newline.join(source.splitlines())
354 356
355 357 rendered_source = cls.urlify_text(source)
356 358 source = ''
357 359 if leading_newline:
358 360 source += '<br />'
359 361 source += rendered_source.replace("\n", '<br />')
360 362
361 363 rendered = cls.bleach_clean(source)
362 364 return rendered
363 365
364 366 @classmethod
365 367 def markdown(cls, source, safe=True, flavored=True, mentions=False,
366 368 clean_html=True):
367 369 """
368 370 returns markdown rendered code cleaned by the bleach library
369 371 """
370 372
371 373 if flavored:
372 374 markdown_renderer = get_markdown_renderer_flavored(
373 375 cls.extensions, cls.output_format)
374 376 else:
375 377 markdown_renderer = get_markdown_renderer(
376 378 cls.extensions, cls.output_format)
377 379
378 380 if mentions:
379 381 mention_pat = re.compile(MENTIONS_REGEX)
380 382
381 383 def wrapp(match_obj):
382 384 uname = match_obj.groups()[0]
383 385 return ' **@%(uname)s** ' % {'uname': uname}
384 386 mention_hl = mention_pat.sub(wrapp, source).strip()
385 387 # we extracted mentions render with this using Mentions false
386 388 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
387 389 mentions=False)
388 390
389 391 source = safe_unicode(source)
390 392
391 393 try:
392 394 if flavored:
393 395 source = cls._flavored_markdown(source)
394 396 rendered = markdown_renderer.convert(source)
395 397 except Exception:
396 398 log.exception('Error when rendering Markdown')
397 399 if safe:
398 400 log.debug('Fallback to render in plain mode')
399 401 rendered = cls.plain(source)
400 402 else:
401 403 raise
402 404
403 405 if clean_html:
404 406 rendered = cls.bleach_clean(rendered)
405 407 return rendered
406 408
407 409 @classmethod
408 410 def rst(cls, source, safe=True, mentions=False, clean_html=False):
409 411 if mentions:
410 412 mention_pat = re.compile(MENTIONS_REGEX)
411 413
412 414 def wrapp(match_obj):
413 415 uname = match_obj.groups()[0]
414 416 return ' **@%(uname)s** ' % {'uname': uname}
415 417 mention_hl = mention_pat.sub(wrapp, source).strip()
416 418 # we extracted mentions render with this using Mentions false
417 419 return cls.rst(mention_hl, safe=safe, mentions=False)
418 420
419 421 source = safe_unicode(source)
420 422 try:
421 423 docutils_settings = dict(
422 424 [(alias, None) for alias in
423 425 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
424 426
425 427 docutils_settings.update({
426 'input_encoding': 'unicode', 'report_level': 4})
428 'input_encoding': 'unicode',
429 'report_level': 4,
430 'syntax_highlight': 'short',
431 })
427 432
428 433 for k, v in docutils_settings.iteritems():
429 434 directives.register_directive(k, v)
430 435
431 436 parts = publish_parts(source=source,
432 437 writer=RhodeCodeWriter(),
433 438 settings_overrides=docutils_settings)
434 439 rendered = parts["fragment"]
435 440 if clean_html:
436 441 rendered = cls.bleach_clean(rendered)
437 442 return parts['html_title'] + rendered
438 443 except Exception:
439 444 log.exception('Error when rendering RST')
440 445 if safe:
441 446 log.debug('Fallbacking to render in plain mode')
442 447 return cls.plain(source)
443 448 else:
444 449 raise
445 450
446 451 @classmethod
447 452 def jupyter(cls, source, safe=True):
448 453 from rhodecode.lib import helpers
449 454
450 455 from traitlets.config import Config
451 456 import nbformat
452 457 from nbconvert import HTMLExporter
453 458 from nbconvert.preprocessors import Preprocessor
454 459
455 460 class CustomHTMLExporter(HTMLExporter):
456 461 def _template_file_default(self):
457 462 return 'basic'
458 463
459 464 class Sandbox(Preprocessor):
460 465
461 466 def preprocess(self, nb, resources):
462 467 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
463 468 for cell in nb['cells']:
464 469 if not safe:
465 470 continue
466 471
467 472 if 'outputs' in cell:
468 473 for cell_output in cell['outputs']:
469 474 if 'data' in cell_output:
470 475 if 'application/javascript' in cell_output['data']:
471 476 cell_output['data']['text/plain'] = sandbox_text
472 477 cell_output['data'].pop('application/javascript', None)
473 478
474 479 if 'source' in cell and cell['cell_type'] == 'markdown':
475 480 # sanitize similar like in markdown
476 481 cell['source'] = cls.bleach_clean(cell['source'])
477 482
478 483 return nb, resources
479 484
480 485 def _sanitize_resources(input_resources):
481 486 """
482 487 Skip/sanitize some of the CSS generated and included in jupyter
483 488 so it doesn't messes up UI so much
484 489 """
485 490
486 491 # TODO(marcink): probably we should replace this with whole custom
487 492 # CSS set that doesn't screw up, but jupyter generated html has some
488 493 # special markers, so it requires Custom HTML exporter template with
489 494 # _default_template_path_default, to achieve that
490 495
491 496 # strip the reset CSS
492 497 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
493 498 return input_resources
494 499
495 500 def as_html(notebook):
496 501 conf = Config()
497 502 conf.CustomHTMLExporter.preprocessors = [Sandbox]
498 503 html_exporter = CustomHTMLExporter(config=conf)
499 504
500 505 (body, resources) = html_exporter.from_notebook_node(notebook)
501 506 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
502 507 js = MakoTemplate(r'''
503 508 <!-- MathJax configuration -->
504 509 <script type="text/x-mathjax-config">
505 510 MathJax.Hub.Config({
506 511 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
507 512 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
508 513 TeX: {
509 514 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
510 515 },
511 516 tex2jax: {
512 517 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
513 518 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
514 519 processEscapes: true,
515 520 processEnvironments: true
516 521 },
517 522 // Center justify equations in code and markdown cells. Elsewhere
518 523 // we use CSS to left justify single line equations in code cells.
519 524 displayAlign: 'center',
520 525 "HTML-CSS": {
521 526 styles: {'.MathJax_Display': {"margin": 0}},
522 527 linebreaks: { automatic: true },
523 528 availableFonts: ["STIX", "TeX"]
524 529 },
525 530 showMathMenu: false
526 531 });
527 532 </script>
528 533 <!-- End of MathJax configuration -->
529 534 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
530 535 ''').render(h=helpers)
531 536
532 537 css = MakoTemplate(r'''
533 538 <link rel="stylesheet" type="text/css" href="${h.asset('css/style-ipython.css', ver=ver)}" media="screen"/>
534 539 ''').render(h=helpers, ver='ver1')
535 540
536 541 body = '\n'.join([header, css, js, body])
537 542 return body, resources
538 543
539 544 notebook = nbformat.reads(source, as_version=4)
540 545 (body, resources) = as_html(notebook)
541 546 return body
542 547
543 548
544 549 class RstTemplateRenderer(object):
545 550
546 551 def __init__(self):
547 552 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
548 553 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
549 554 self.template_store = TemplateLookup(
550 555 directories=rst_template_dirs,
551 556 input_encoding='utf-8',
552 557 imports=['from rhodecode.lib import helpers as h'])
553 558
554 559 def _get_template(self, templatename):
555 560 return self.template_store.get_template(templatename)
556 561
557 562 def render(self, template_name, **kwargs):
558 563 template = self._get_template(template_name)
559 564 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now