##// END OF EJS Templates
libs: new markdown rendereres for python3
super-admin -
r5079:bc1e432b default
parent child Browse files
Show More
@@ -24,6 +24,8 b' import xml.etree.ElementTree as etree'
24 24 from markdown.extensions import Extension
25 25 from markdown.extensions.fenced_code import FencedCodeExtension
26 26 from markdown.extensions.tables import TableExtension
27 from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension
28 from markdown.extensions.wikilinks import WikiLinkExtension
27 29 from markdown.inlinepatterns import Pattern
28 30
29 31 import gfm
@@ -87,53 +89,8 b' class SubstituteTagInlineProcessor(Simpl'
87 89 return etree.Element(self.tag), m.start(0), m.end(0)
88 90
89 91
90 class Nl2BrExtension(Extension):
91 BR_RE = r'\n'
92
93 def extendMarkdown(self, md, md_globals):
94 br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br')
95 md.inlinePatterns.add('nl', br_tag, '_end')
96
97
98 class GithubFlavoredMarkdownExtension(Extension):
99 """
100 An extension that is as compatible as possible with GitHub-flavored
101 Markdown (GFM).
102
103 This extension aims to be compatible with the variant of GFM that GitHub
104 uses for Markdown-formatted gists and files (including READMEs). This
105 variant seems to have all the extensions described in the `GFM
106 documentation`_, except:
107
108 - Newlines in paragraphs are not transformed into ``br`` tags.
109 - Intra-GitHub links to commits, repositories, and issues are not
110 supported.
111
112 If you need support for features specific to GitHub comments and issues,
113 please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`.
114
115 .. _GFM documentation: https://guides.github.com/features/mastering-markdown/
116 """
117
118 def extendMarkdown(self, md, md_globals):
119 # Built-in extensions
120 Nl2BrExtension().extendMarkdown(md, md_globals)
121 FencedCodeExtension().extendMarkdown(md, md_globals)
122 TableExtension().extendMarkdown(md, md_globals)
123
124 # Custom extensions
125 gfm.AutolinkExtension().extendMarkdown(md, md_globals)
126 gfm.AutomailExtension().extendMarkdown(md, md_globals)
127 gfm.HiddenHiliteExtension([
128 ('guess_lang', 'False'),
129 ('css_class', 'highlight')
130 ]).extendMarkdown(md, md_globals)
131 gfm.SemiSaneListExtension().extendMarkdown(md, md_globals)
132 gfm.SpacedLinkExtension().extendMarkdown(md, md_globals)
133 gfm.StrikethroughExtension().extendMarkdown(md, md_globals)
134 gfm.TaskListExtension([
135 ('list_attrs', {'class': 'checkbox'})
136 ]).extendMarkdown(md, md_globals)
92 class Nl2BrExtension(_Nl2BrExtension):
93 pass
137 94
138 95
139 96 # Global Vars
@@ -167,9 +124,9 b' class UrlizePattern(markdown.inlinepatte'
167 124 return el
168 125
169 126
170 class UrlizeExtension(markdown.Extension):
127 class UrlizeExtension(Extension):
171 128 """ Urlize Extension for Python-Markdown. """
172 129
173 def extendMarkdown(self, md, md_globals):
130 def extendMarkdown(self, md):
174 131 """ Replace autolink with UrlizePattern """
175 132 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
@@ -29,6 +29,7 b' import lxml'
29 29 import logging
30 30 import urllib.parse
31 31 import bleach
32 import pycmarkgfm
32 33
33 34 from mako.lookup import TemplateLookup
34 35 from mako.template import Template as MakoTemplate
@@ -39,8 +40,7 b' from docutils import writers'
39 40 from docutils.writers import html4css1
40 41 import markdown
41 42
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
43 from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
44 44
45 45 log = logging.getLogger(__name__)
46 46
@@ -81,7 +81,7 b' class CustomHTMLTranslator(writers.html4'
81 81
82 82 class RhodeCodeWriter(writers.html4css1.Writer):
83 83 def __init__(self):
84 writers.Writer.__init__(self)
84 super(RhodeCodeWriter, self).__init__()
85 85 self.translator_class = CustomHTMLTranslator
86 86
87 87
@@ -111,7 +111,7 b' def relative_links(html_source, server_p'
111 111 else:
112 112 el.attrib['href'] = relative_path(src, server_paths['standard'])
113 113
114 return lxml.html.tostring(doc)
114 return lxml.html.tostring(doc, encoding='unicode')
115 115
116 116
117 117 def relative_path(path, request_path, is_repo_file=None):
@@ -126,7 +126,7 b' def relative_path(path, request_path, is'
126 126 produces: '/repo/files/logo.png'
127 127 """
128 128 # TODO(marcink): unicode/str support ?
129 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
129 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
130 130
131 131 def dummy_check(p):
132 132 return True # assume default is a valid file path
@@ -135,8 +135,8 b' def relative_path(path, request_path, is'
135 135 if not path:
136 136 return request_path
137 137
138 path = safe_unicode(path)
139 request_path = safe_unicode(request_path)
138 path = safe_str(path)
139 request_path = safe_str(request_path)
140 140
141 141 if path.startswith(('data:', 'javascript:', '#', ':')):
142 142 # skip data, anchor, invalid links
@@ -185,22 +185,25 b' def get_markdown_renderer(extensions, ou'
185 185
186 186 if _cached_markdown_renderer is None:
187 187 _cached_markdown_renderer = markdown.Markdown(
188 extensions=extensions,
189 enable_attributes=False, output_format=output_format)
188 extensions=extensions + ['legacy_attrs'],
189 output_format=output_format)
190 190 return _cached_markdown_renderer
191 191
192 192
193 _cached_markdown_renderer_flavored = None
194
193 def get_markdown_renderer_flavored(extensions, output_format):
194 """
195 Dummy wrapper to mimic markdown API and render github HTML rendered
195 196
196 def get_markdown_renderer_flavored(extensions, output_format):
197 global _cached_markdown_renderer_flavored
197 """
198 md = get_markdown_renderer(extensions, output_format)
198 199
199 if _cached_markdown_renderer_flavored is None:
200 _cached_markdown_renderer_flavored = markdown.Markdown(
201 extensions=extensions + [GithubFlavoredMarkdownExtension()],
202 enable_attributes=False, output_format=output_format)
203 return _cached_markdown_renderer_flavored
200 class GFM(object):
201 def convert(self, source):
202 with pycmarkgfm.parse_gfm(source) as document:
203 parsed_md = document.to_commonmark()
204 return md.convert(parsed_md)
205
206 return GFM()
204 207
205 208
206 209 class MarkupRenderer(object):
@@ -267,7 +270,10 b' class MarkupRenderer(object):'
267 270 return getattr(MarkupRenderer, detected_renderer)
268 271
269 272 @classmethod
270 def bleach_clean(cls, text):
273 def sanitize_html(cls, text):
274 # TODO: replace this with https://nh3.readthedocs.io/en/latest
275 # bleach is abandoned and deprecated :/
276
271 277 from .bleach_whitelist import markdown_attrs, markdown_tags
272 278 allowed_tags = markdown_tags
273 279 allowed_attrs = markdown_attrs
@@ -275,7 +281,7 b' class MarkupRenderer(object):'
275 281 try:
276 282 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
277 283 except Exception:
278 return 'UNPARSEABLE TEXT'
284 return 'TEXT CANNOT BE PARSED USING SANITIZE'
279 285
280 286 @classmethod
281 287 def renderer_from_filename(cls, filename, exclude):
@@ -302,9 +308,6 b' class MarkupRenderer(object):'
302 308 Renders a given filename using detected renderer
303 309 it detects renderers based on file extension or mimetype.
304 310 At last it will just do a simple html replacing new lines with <br/>
305
306 :param file_name:
307 :param source:
308 311 """
309 312
310 313 renderer = self._detect_renderer(source, filename)
@@ -312,44 +315,10 b' class MarkupRenderer(object):'
312 315 return readme_data
313 316
314 317 @classmethod
315 def _flavored_markdown(cls, text):
316 """
317 Github style flavored markdown
318
319 :param text:
320 """
321
322 # Extract pre blocks.
323 extractions = {}
324
325 def pre_extraction_callback(matchobj):
326 digest = md5_safe(matchobj.group(0))
327 extractions[digest] = matchobj.group(0)
328 return "{gfm-extraction-%s}" % digest
329 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
330 text = re.sub(pattern, pre_extraction_callback, text)
331
332 # Prevent foo_bar_baz from ending up with an italic word in the middle.
333 def italic_callback(matchobj):
334 s = matchobj.group(0)
335 if list(s).count('_') >= 2:
336 return s.replace('_', r'\_')
337 return s
338 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
339
340 # Insert pre block extractions.
341 def pre_insert_callback(matchobj):
342 return '\n\n' + extractions[matchobj.group(1)]
343 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
344 pre_insert_callback, text)
345
346 return text
347
348 @classmethod
349 318 def urlify_text(cls, text):
350 319 def url_func(match_obj):
351 320 url_full = match_obj.groups()[0]
352 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
321 return f'<a href="{url_full}">{url_full}</a>'
353 322
354 323 return cls.URL_PAT.sub(url_func, text)
355 324
@@ -375,7 +344,7 b' class MarkupRenderer(object):'
375 344
376 345 @classmethod
377 346 def plain(cls, source, universal_newline=True, leading_newline=True):
378 source = safe_unicode(source)
347 source = safe_str(source)
379 348 if universal_newline:
380 349 newline = '\n'
381 350 source = newline.join(source.splitlines())
@@ -386,7 +355,7 b' class MarkupRenderer(object):'
386 355 source += '<br />'
387 356 source += rendered_source.replace("\n", '<br />')
388 357
389 rendered = cls.bleach_clean(source)
358 rendered = cls.sanitize_html(source)
390 359 return rendered
391 360
392 361 @classmethod
@@ -409,12 +378,9 b' class MarkupRenderer(object):'
409 378 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
410 379 mentions=False)
411 380
412 source = safe_unicode(source)
381 try:
382 rendered = markdown_renderer.convert(source)
413 383
414 try:
415 if flavored:
416 source = cls._flavored_markdown(source)
417 rendered = markdown_renderer.convert(source)
418 384 except Exception:
419 385 log.exception('Error when rendering Markdown')
420 386 if safe:
@@ -424,17 +390,18 b' class MarkupRenderer(object):'
424 390 raise
425 391
426 392 if clean_html:
427 rendered = cls.bleach_clean(rendered)
393 rendered = cls.sanitize_html(rendered)
428 394 return rendered
429 395
430 396 @classmethod
431 397 def rst(cls, source, safe=True, mentions=False, clean_html=False):
398
432 399 if mentions:
433 400 mention_hl = cls.convert_mentions(source, mode='rst')
434 401 # we extracted mentions render with this using Mentions false
435 402 return cls.rst(mention_hl, safe=safe, mentions=False)
436 403
437 source = safe_unicode(source)
404 source = safe_str(source)
438 405 try:
439 406 docutils_settings = dict(
440 407 [(alias, None) for alias in
@@ -446,7 +413,7 b' class MarkupRenderer(object):'
446 413 'syntax_highlight': 'short',
447 414 })
448 415
449 for k, v in docutils_settings.items():
416 for k, v in list(docutils_settings.items()):
450 417 directives.register_directive(k, v)
451 418
452 419 parts = publish_parts(source=source,
@@ -454,7 +421,7 b' class MarkupRenderer(object):'
454 421 settings_overrides=docutils_settings)
455 422 rendered = parts["fragment"]
456 423 if clean_html:
457 rendered = cls.bleach_clean(rendered)
424 rendered = cls.sanitize_html(rendered)
458 425 return parts['html_title'] + rendered
459 426 except Exception:
460 427 log.exception('Error when rendering RST')
@@ -494,7 +461,7 b' class MarkupRenderer(object):'
494 461
495 462 if 'source' in cell and cell['cell_type'] == 'markdown':
496 463 # sanitize similar like in markdown
497 cell['source'] = cls.bleach_clean(cell['source'])
464 cell['source'] = cls.sanitize_html(cell['source'])
498 465
499 466 return nb, resources
500 467
General Comments 0
You need to be logged in to leave comments. Login now