rhodecode-enterprise-ce Commit - r5079:bc1e432b

libs: new markdown rendereres for python3

super-admin -

r5079:bc1e432b default

parent child

rhodecode/lib/markdown_ext.py

0 +6 -49

              from markdown.extensions import Extension
              from markdown.extensions.fenced_code import FencedCodeExtension
              from markdown.extensions.tables import TableExtension
+             from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension
+             from markdown.extensions.wikilinks import WikiLinkExtension
              from markdown.inlinepatterns import Pattern
              import gfm
                      return etree.Element(self.tag), m.start(0), m.end(0)
-             class Nl2BrExtension(Extension):
-                 BR_RE = r'\n'
-                 def extendMarkdown(self, md, md_globals):
-                     br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br')
-                     md.inlinePatterns.add('nl', br_tag, '_end')
-             class GithubFlavoredMarkdownExtension(Extension):
-                 """
-                 An extension that is as compatible as possible with GitHub-flavored
-                 Markdown (GFM).
-                 This extension aims to be compatible with the variant of GFM that GitHub
-                 uses for Markdown-formatted gists and files (including READMEs). This
-                 variant seems to have all the extensions described in the `GFM
-                 documentation`_, except:
-                 - Newlines in paragraphs are not transformed into ``br`` tags.
-                 - Intra-GitHub links to commits, repositories, and issues are not
-                   supported.
-                 If you need support for features specific to GitHub comments and issues,
-                 please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`.
-                 .. _GFM documentation: https://guides.github.com/features/mastering-markdown/
-                 """
-                 def extendMarkdown(self, md, md_globals):
-                     # Built-in extensions
-                     Nl2BrExtension().extendMarkdown(md, md_globals)
-                     FencedCodeExtension().extendMarkdown(md, md_globals)
-                     TableExtension().extendMarkdown(md, md_globals)
-                     # Custom extensions
-                     gfm.AutolinkExtension().extendMarkdown(md, md_globals)
-                     gfm.AutomailExtension().extendMarkdown(md, md_globals)
-                     gfm.HiddenHiliteExtension([
-                         ('guess_lang', 'False'),
-                         ('css_class', 'highlight')
-                     ]).extendMarkdown(md, md_globals)
-                     gfm.SemiSaneListExtension().extendMarkdown(md, md_globals)
-                     gfm.SpacedLinkExtension().extendMarkdown(md, md_globals)
-                     gfm.StrikethroughExtension().extendMarkdown(md, md_globals)
-                     gfm.TaskListExtension([
-                         ('list_attrs', {'class': 'checkbox'})
-                     ]).extendMarkdown(md, md_globals)
+             class Nl2BrExtension(_Nl2BrExtension):
+                 pass
              # Global Vars
                      return el
-             class UrlizeExtension(markdown.Extension):
+             class UrlizeExtension(Extension):
                  """ Urlize Extension for Python-Markdown. """
-                 def extendMarkdown(self, md, md_globals):
+                 def extendMarkdown(self, md):
                      """ Replace autolink with UrlizePattern """
                      md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)

rhodecode/lib/markup_renderer.py

0 +37 -70

              import logging
              import urllib.parse
              import bleach
+             import pycmarkgfm
              from mako.lookup import TemplateLookup
              from mako.template import Template as MakoTemplate
              from docutils.writers import html4css1
              import markdown
-             from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
-             from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
+             from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
              log = logging.getLogger(__name__)
              class RhodeCodeWriter(writers.html4css1.Writer):
                  def __init__(self):
-                     writers.Writer.__init__(self)
+                     super(RhodeCodeWriter, self).__init__()
                      self.translator_class = CustomHTMLTranslator
                          else:
                              el.attrib['href'] = relative_path(src, server_paths['standard'])
-                 return lxml.html.tostring(doc)
+                 return lxml.html.tostring(doc, encoding='unicode')
              def relative_path(path, request_path, is_repo_file=None):
                  produces: '/repo/files/logo.png'
                  """
                  # TODO(marcink): unicode/str support ?
-                 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
+                 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
                  def dummy_check(p):
                      return True  # assume default is a valid file path
                  if not path:
                      return request_path
-                 path = safe_unicode(path)
-                 request_path = safe_unicode(request_path)
+                 path = safe_str(path)
+                 request_path = safe_str(request_path)
                  if path.startswith(('data:', 'javascript:', '#', ':')):
                      # skip data, anchor, invalid links
                  if _cached_markdown_renderer is None:
                      _cached_markdown_renderer = markdown.Markdown(
-                         extensions=extensions,
-                         enable_attributes=False, output_format=output_format)
+                         extensions=extensions + ['legacy_attrs'],
+                         output_format=output_format)
                  return _cached_markdown_renderer
-             _cached_markdown_renderer_flavored = None
+             def get_markdown_renderer_flavored(extensions, output_format):
+                 """
+                 Dummy wrapper to mimic markdown API and render github HTML rendered
-             def get_markdown_renderer_flavored(extensions, output_format):
-                 global _cached_markdown_renderer_flavored
+                 """
+                 md = get_markdown_renderer(extensions, output_format)
-                 if _cached_markdown_renderer_flavored is None:
-                     _cached_markdown_renderer_flavored = markdown.Markdown(
-                         extensions=extensions + [GithubFlavoredMarkdownExtension()],
-                         enable_attributes=False, output_format=output_format)
-                 return _cached_markdown_renderer_flavored
+                 class GFM(object):
+                     def convert(self, source):
+                         with pycmarkgfm.parse_gfm(source) as document:
+                             parsed_md = document.to_commonmark()
+                             return md.convert(parsed_md)
+                 return GFM()
              class MarkupRenderer(object):
                      return getattr(MarkupRenderer, detected_renderer)
                  @classmethod
-                 def bleach_clean(cls, text):
+                 def sanitize_html(cls, text):
+                     # TODO: replace this with https://nh3.readthedocs.io/en/latest
+                     # bleach is abandoned and deprecated :/
                      from .bleach_whitelist import markdown_attrs, markdown_tags
                      allowed_tags = markdown_tags
                      allowed_attrs = markdown_attrs
                      try:
                          return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
                      except Exception:
-                         return 'UNPARSEABLE TEXT'
+                         return 'TEXT CANNOT BE PARSED USING SANITIZE'
                  @classmethod
                  def renderer_from_filename(cls, filename, exclude):
                      Renders a given filename using detected renderer
                      it detects renderers based on file extension or mimetype.
                      At last it will just do a simple html replacing new lines with <br/>
-                     :param file_name:
-                     :param source:
                      """
                      renderer = self._detect_renderer(source, filename)
                      return readme_data
                  @classmethod
-                 def _flavored_markdown(cls, text):
-                     """
-                     Github style flavored markdown
-                     :param text:
-                     """
-                     # Extract pre blocks.
-                     extractions = {}
-                     def pre_extraction_callback(matchobj):
-                         digest = md5_safe(matchobj.group(0))
-                         extractions[digest] = matchobj.group(0)
-                         return "{gfm-extraction-%s}" % digest
-                     pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
-                     text = re.sub(pattern, pre_extraction_callback, text)
-                     # Prevent foo_bar_baz from ending up with an italic word in the middle.
-                     def italic_callback(matchobj):
-                         s = matchobj.group(0)
-                         if list(s).count('_') >= 2:
-                             return s.replace('_', r'\_')
-                         return s
-                     text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
-                     # Insert pre block extractions.
-                     def pre_insert_callback(matchobj):
-                         return '\n\n' + extractions[matchobj.group(1)]
-                     text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
-                                   pre_insert_callback, text)
-                     return text
-                 @classmethod
                  def urlify_text(cls, text):
                      def url_func(match_obj):
                          url_full = match_obj.groups()[0]
-                         return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
+                         return f'<a href="{url_full}">{url_full}</a>'
                      return cls.URL_PAT.sub(url_func, text)
                  @classmethod
                  def plain(cls, source, universal_newline=True, leading_newline=True):
-                     source = safe_unicode(source)
+                     source = safe_str(source)
                      if universal_newline:
                          newline = '\n'
                          source = newline.join(source.splitlines())
                          source += '<br />'
                      source += rendered_source.replace("\n", '<br />')
-                     rendered = cls.bleach_clean(source)
+                     rendered = cls.sanitize_html(source)
                      return rendered
                  @classmethod
                          return cls.markdown(mention_hl, safe=safe, flavored=flavored,
                                              mentions=False)
-                     source = safe_unicode(source)
+                     try:
+                         rendered = markdown_renderer.convert(source)
-                     try:
-                         if flavored:
-                             source = cls._flavored_markdown(source)
-                         rendered = markdown_renderer.convert(source)
                      except Exception:
                          log.exception('Error when rendering Markdown')
                          if safe:
                              raise
                      if clean_html:
-                         rendered = cls.bleach_clean(rendered)
+                         rendered = cls.sanitize_html(rendered)
                      return rendered
                  @classmethod
                  def rst(cls, source, safe=True, mentions=False, clean_html=False):
                      if mentions:
                          mention_hl = cls.convert_mentions(source, mode='rst')
                          # we extracted mentions render with this using Mentions false
                          return cls.rst(mention_hl, safe=safe, mentions=False)
-                     source = safe_unicode(source)
+                     source = safe_str(source)
                      try:
                          docutils_settings = dict(
                              [(alias, None) for alias in
                              'syntax_highlight': 'short',
                          })
-                         for k, v in docutils_settings.items():
+                         for k, v in list(docutils_settings.items()):
                              directives.register_directive(k, v)
                          parts = publish_parts(source=source,
                                                settings_overrides=docutils_settings)
                          rendered = parts["fragment"]
                          if clean_html:
-                             rendered = cls.bleach_clean(rendered)
+                             rendered = cls.sanitize_html(rendered)
                          return parts['html_title'] + rendered
                      except Exception:
                          log.exception('Error when rendering RST')
                                  if 'source' in cell and cell['cell_type'] == 'markdown':
                                      # sanitize similar like in markdown
-                                     cell['source'] = cls.bleach_clean(cell['source'])
+                                     cell['source'] = cls.sanitize_html(cell['source'])
                              return nb, resources

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages