##// END OF EJS Templates
libs: new markdown rendereres for python3
super-admin -
r5079:bc1e432b default
parent child Browse files
Show More
@@ -24,6 +24,8 b' import xml.etree.ElementTree as etree'
24 from markdown.extensions import Extension
24 from markdown.extensions import Extension
25 from markdown.extensions.fenced_code import FencedCodeExtension
25 from markdown.extensions.fenced_code import FencedCodeExtension
26 from markdown.extensions.tables import TableExtension
26 from markdown.extensions.tables import TableExtension
27 from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension
28 from markdown.extensions.wikilinks import WikiLinkExtension
27 from markdown.inlinepatterns import Pattern
29 from markdown.inlinepatterns import Pattern
28
30
29 import gfm
31 import gfm
@@ -87,53 +89,8 b' class SubstituteTagInlineProcessor(Simpl'
87 return etree.Element(self.tag), m.start(0), m.end(0)
89 return etree.Element(self.tag), m.start(0), m.end(0)
88
90
89
91
90 class Nl2BrExtension(Extension):
92 class Nl2BrExtension(_Nl2BrExtension):
91 BR_RE = r'\n'
93 pass
92
93 def extendMarkdown(self, md, md_globals):
94 br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br')
95 md.inlinePatterns.add('nl', br_tag, '_end')
96
97
98 class GithubFlavoredMarkdownExtension(Extension):
99 """
100 An extension that is as compatible as possible with GitHub-flavored
101 Markdown (GFM).
102
103 This extension aims to be compatible with the variant of GFM that GitHub
104 uses for Markdown-formatted gists and files (including READMEs). This
105 variant seems to have all the extensions described in the `GFM
106 documentation`_, except:
107
108 - Newlines in paragraphs are not transformed into ``br`` tags.
109 - Intra-GitHub links to commits, repositories, and issues are not
110 supported.
111
112 If you need support for features specific to GitHub comments and issues,
113 please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`.
114
115 .. _GFM documentation: https://guides.github.com/features/mastering-markdown/
116 """
117
118 def extendMarkdown(self, md, md_globals):
119 # Built-in extensions
120 Nl2BrExtension().extendMarkdown(md, md_globals)
121 FencedCodeExtension().extendMarkdown(md, md_globals)
122 TableExtension().extendMarkdown(md, md_globals)
123
124 # Custom extensions
125 gfm.AutolinkExtension().extendMarkdown(md, md_globals)
126 gfm.AutomailExtension().extendMarkdown(md, md_globals)
127 gfm.HiddenHiliteExtension([
128 ('guess_lang', 'False'),
129 ('css_class', 'highlight')
130 ]).extendMarkdown(md, md_globals)
131 gfm.SemiSaneListExtension().extendMarkdown(md, md_globals)
132 gfm.SpacedLinkExtension().extendMarkdown(md, md_globals)
133 gfm.StrikethroughExtension().extendMarkdown(md, md_globals)
134 gfm.TaskListExtension([
135 ('list_attrs', {'class': 'checkbox'})
136 ]).extendMarkdown(md, md_globals)
137
94
138
95
139 # Global Vars
96 # Global Vars
@@ -167,9 +124,9 b' class UrlizePattern(markdown.inlinepatte'
167 return el
124 return el
168
125
169
126
170 class UrlizeExtension(markdown.Extension):
127 class UrlizeExtension(Extension):
171 """ Urlize Extension for Python-Markdown. """
128 """ Urlize Extension for Python-Markdown. """
172
129
173 def extendMarkdown(self, md, md_globals):
130 def extendMarkdown(self, md):
174 """ Replace autolink with UrlizePattern """
131 """ Replace autolink with UrlizePattern """
175 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
132 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
@@ -29,6 +29,7 b' import lxml'
29 import logging
29 import logging
30 import urllib.parse
30 import urllib.parse
31 import bleach
31 import bleach
32 import pycmarkgfm
32
33
33 from mako.lookup import TemplateLookup
34 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
35 from mako.template import Template as MakoTemplate
@@ -39,8 +40,7 b' from docutils import writers'
39 from docutils.writers import html4css1
40 from docutils.writers import html4css1
40 import markdown
41 import markdown
41
42
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
44
44
45 log = logging.getLogger(__name__)
45 log = logging.getLogger(__name__)
46
46
@@ -81,7 +81,7 b' class CustomHTMLTranslator(writers.html4'
81
81
82 class RhodeCodeWriter(writers.html4css1.Writer):
82 class RhodeCodeWriter(writers.html4css1.Writer):
83 def __init__(self):
83 def __init__(self):
84 writers.Writer.__init__(self)
84 super(RhodeCodeWriter, self).__init__()
85 self.translator_class = CustomHTMLTranslator
85 self.translator_class = CustomHTMLTranslator
86
86
87
87
@@ -111,7 +111,7 b' def relative_links(html_source, server_p'
111 else:
111 else:
112 el.attrib['href'] = relative_path(src, server_paths['standard'])
112 el.attrib['href'] = relative_path(src, server_paths['standard'])
113
113
114 return lxml.html.tostring(doc)
114 return lxml.html.tostring(doc, encoding='unicode')
115
115
116
116
117 def relative_path(path, request_path, is_repo_file=None):
117 def relative_path(path, request_path, is_repo_file=None):
@@ -126,7 +126,7 b' def relative_path(path, request_path, is'
126 produces: '/repo/files/logo.png'
126 produces: '/repo/files/logo.png'
127 """
127 """
128 # TODO(marcink): unicode/str support ?
128 # TODO(marcink): unicode/str support ?
129 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
129 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
130
130
131 def dummy_check(p):
131 def dummy_check(p):
132 return True # assume default is a valid file path
132 return True # assume default is a valid file path
@@ -135,8 +135,8 b' def relative_path(path, request_path, is'
135 if not path:
135 if not path:
136 return request_path
136 return request_path
137
137
138 path = safe_unicode(path)
138 path = safe_str(path)
139 request_path = safe_unicode(request_path)
139 request_path = safe_str(request_path)
140
140
141 if path.startswith(('data:', 'javascript:', '#', ':')):
141 if path.startswith(('data:', 'javascript:', '#', ':')):
142 # skip data, anchor, invalid links
142 # skip data, anchor, invalid links
@@ -185,22 +185,25 b' def get_markdown_renderer(extensions, ou'
185
185
186 if _cached_markdown_renderer is None:
186 if _cached_markdown_renderer is None:
187 _cached_markdown_renderer = markdown.Markdown(
187 _cached_markdown_renderer = markdown.Markdown(
188 extensions=extensions,
188 extensions=extensions + ['legacy_attrs'],
189 enable_attributes=False, output_format=output_format)
189 output_format=output_format)
190 return _cached_markdown_renderer
190 return _cached_markdown_renderer
191
191
192
192
193 _cached_markdown_renderer_flavored = None
193 def get_markdown_renderer_flavored(extensions, output_format):
194
194 """
195 Dummy wrapper to mimic markdown API and render github HTML rendered
195
196
196 def get_markdown_renderer_flavored(extensions, output_format):
197 """
197 global _cached_markdown_renderer_flavored
198 md = get_markdown_renderer(extensions, output_format)
198
199
199 if _cached_markdown_renderer_flavored is None:
200 class GFM(object):
200 _cached_markdown_renderer_flavored = markdown.Markdown(
201 def convert(self, source):
201 extensions=extensions + [GithubFlavoredMarkdownExtension()],
202 with pycmarkgfm.parse_gfm(source) as document:
202 enable_attributes=False, output_format=output_format)
203 parsed_md = document.to_commonmark()
203 return _cached_markdown_renderer_flavored
204 return md.convert(parsed_md)
205
206 return GFM()
204
207
205
208
206 class MarkupRenderer(object):
209 class MarkupRenderer(object):
@@ -267,7 +270,10 b' class MarkupRenderer(object):'
267 return getattr(MarkupRenderer, detected_renderer)
270 return getattr(MarkupRenderer, detected_renderer)
268
271
269 @classmethod
272 @classmethod
270 def bleach_clean(cls, text):
273 def sanitize_html(cls, text):
274 # TODO: replace this with https://nh3.readthedocs.io/en/latest
275 # bleach is abandoned and deprecated :/
276
271 from .bleach_whitelist import markdown_attrs, markdown_tags
277 from .bleach_whitelist import markdown_attrs, markdown_tags
272 allowed_tags = markdown_tags
278 allowed_tags = markdown_tags
273 allowed_attrs = markdown_attrs
279 allowed_attrs = markdown_attrs
@@ -275,7 +281,7 b' class MarkupRenderer(object):'
275 try:
281 try:
276 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
282 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
277 except Exception:
283 except Exception:
278 return 'UNPARSEABLE TEXT'
284 return 'TEXT CANNOT BE PARSED USING SANITIZE'
279
285
280 @classmethod
286 @classmethod
281 def renderer_from_filename(cls, filename, exclude):
287 def renderer_from_filename(cls, filename, exclude):
@@ -302,9 +308,6 b' class MarkupRenderer(object):'
302 Renders a given filename using detected renderer
308 Renders a given filename using detected renderer
303 it detects renderers based on file extension or mimetype.
309 it detects renderers based on file extension or mimetype.
304 At last it will just do a simple html replacing new lines with <br/>
310 At last it will just do a simple html replacing new lines with <br/>
305
306 :param file_name:
307 :param source:
308 """
311 """
309
312
310 renderer = self._detect_renderer(source, filename)
313 renderer = self._detect_renderer(source, filename)
@@ -312,44 +315,10 b' class MarkupRenderer(object):'
312 return readme_data
315 return readme_data
313
316
314 @classmethod
317 @classmethod
315 def _flavored_markdown(cls, text):
316 """
317 Github style flavored markdown
318
319 :param text:
320 """
321
322 # Extract pre blocks.
323 extractions = {}
324
325 def pre_extraction_callback(matchobj):
326 digest = md5_safe(matchobj.group(0))
327 extractions[digest] = matchobj.group(0)
328 return "{gfm-extraction-%s}" % digest
329 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
330 text = re.sub(pattern, pre_extraction_callback, text)
331
332 # Prevent foo_bar_baz from ending up with an italic word in the middle.
333 def italic_callback(matchobj):
334 s = matchobj.group(0)
335 if list(s).count('_') >= 2:
336 return s.replace('_', r'\_')
337 return s
338 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
339
340 # Insert pre block extractions.
341 def pre_insert_callback(matchobj):
342 return '\n\n' + extractions[matchobj.group(1)]
343 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
344 pre_insert_callback, text)
345
346 return text
347
348 @classmethod
349 def urlify_text(cls, text):
318 def urlify_text(cls, text):
350 def url_func(match_obj):
319 def url_func(match_obj):
351 url_full = match_obj.groups()[0]
320 url_full = match_obj.groups()[0]
352 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
321 return f'<a href="{url_full}">{url_full}</a>'
353
322
354 return cls.URL_PAT.sub(url_func, text)
323 return cls.URL_PAT.sub(url_func, text)
355
324
@@ -375,7 +344,7 b' class MarkupRenderer(object):'
375
344
376 @classmethod
345 @classmethod
377 def plain(cls, source, universal_newline=True, leading_newline=True):
346 def plain(cls, source, universal_newline=True, leading_newline=True):
378 source = safe_unicode(source)
347 source = safe_str(source)
379 if universal_newline:
348 if universal_newline:
380 newline = '\n'
349 newline = '\n'
381 source = newline.join(source.splitlines())
350 source = newline.join(source.splitlines())
@@ -386,7 +355,7 b' class MarkupRenderer(object):'
386 source += '<br />'
355 source += '<br />'
387 source += rendered_source.replace("\n", '<br />')
356 source += rendered_source.replace("\n", '<br />')
388
357
389 rendered = cls.bleach_clean(source)
358 rendered = cls.sanitize_html(source)
390 return rendered
359 return rendered
391
360
392 @classmethod
361 @classmethod
@@ -409,12 +378,9 b' class MarkupRenderer(object):'
409 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
378 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
410 mentions=False)
379 mentions=False)
411
380
412 source = safe_unicode(source)
381 try:
382 rendered = markdown_renderer.convert(source)
413
383
414 try:
415 if flavored:
416 source = cls._flavored_markdown(source)
417 rendered = markdown_renderer.convert(source)
418 except Exception:
384 except Exception:
419 log.exception('Error when rendering Markdown')
385 log.exception('Error when rendering Markdown')
420 if safe:
386 if safe:
@@ -424,17 +390,18 b' class MarkupRenderer(object):'
424 raise
390 raise
425
391
426 if clean_html:
392 if clean_html:
427 rendered = cls.bleach_clean(rendered)
393 rendered = cls.sanitize_html(rendered)
428 return rendered
394 return rendered
429
395
430 @classmethod
396 @classmethod
431 def rst(cls, source, safe=True, mentions=False, clean_html=False):
397 def rst(cls, source, safe=True, mentions=False, clean_html=False):
398
432 if mentions:
399 if mentions:
433 mention_hl = cls.convert_mentions(source, mode='rst')
400 mention_hl = cls.convert_mentions(source, mode='rst')
434 # we extracted mentions render with this using Mentions false
401 # we extracted mentions render with this using Mentions false
435 return cls.rst(mention_hl, safe=safe, mentions=False)
402 return cls.rst(mention_hl, safe=safe, mentions=False)
436
403
437 source = safe_unicode(source)
404 source = safe_str(source)
438 try:
405 try:
439 docutils_settings = dict(
406 docutils_settings = dict(
440 [(alias, None) for alias in
407 [(alias, None) for alias in
@@ -446,7 +413,7 b' class MarkupRenderer(object):'
446 'syntax_highlight': 'short',
413 'syntax_highlight': 'short',
447 })
414 })
448
415
449 for k, v in docutils_settings.items():
416 for k, v in list(docutils_settings.items()):
450 directives.register_directive(k, v)
417 directives.register_directive(k, v)
451
418
452 parts = publish_parts(source=source,
419 parts = publish_parts(source=source,
@@ -454,7 +421,7 b' class MarkupRenderer(object):'
454 settings_overrides=docutils_settings)
421 settings_overrides=docutils_settings)
455 rendered = parts["fragment"]
422 rendered = parts["fragment"]
456 if clean_html:
423 if clean_html:
457 rendered = cls.bleach_clean(rendered)
424 rendered = cls.sanitize_html(rendered)
458 return parts['html_title'] + rendered
425 return parts['html_title'] + rendered
459 except Exception:
426 except Exception:
460 log.exception('Error when rendering RST')
427 log.exception('Error when rendering RST')
@@ -494,7 +461,7 b' class MarkupRenderer(object):'
494
461
495 if 'source' in cell and cell['cell_type'] == 'markdown':
462 if 'source' in cell and cell['cell_type'] == 'markdown':
496 # sanitize similar like in markdown
463 # sanitize similar like in markdown
497 cell['source'] = cls.bleach_clean(cell['source'])
464 cell['source'] = cls.sanitize_html(cell['source'])
498
465
499 return nb, resources
466 return nb, resources
500
467
General Comments 0
You need to be logged in to leave comments. Login now