# HG changeset patch
# User RhodeCode Admin <admin@rhodecode.com>
# Date 2023-07-18 09:43:49
# Node ID bc1e432b5f3385c3e9d177be69052082cc365512
# Parent  b8ca8ea669d79c368329a879c86f8d05adc195e5

libs: new markdown rendereres for python3

diff --git a/rhodecode/lib/markdown_ext.py b/rhodecode/lib/markdown_ext.py
--- a/rhodecode/lib/markdown_ext.py
+++ b/rhodecode/lib/markdown_ext.py
@@ -24,6 +24,8 @@ import xml.etree.ElementTree as etree
 from markdown.extensions import Extension
 from markdown.extensions.fenced_code import FencedCodeExtension
 from markdown.extensions.tables import TableExtension
+from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension
+from markdown.extensions.wikilinks import WikiLinkExtension
 from markdown.inlinepatterns import Pattern
 
 import gfm
@@ -87,53 +89,8 @@ class SubstituteTagInlineProcessor(Simpl
         return etree.Element(self.tag), m.start(0), m.end(0)
 
 
-class Nl2BrExtension(Extension):
-    BR_RE = r'\n'
-
-    def extendMarkdown(self, md, md_globals):
-        br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br')
-        md.inlinePatterns.add('nl', br_tag, '_end')
-
-
-class GithubFlavoredMarkdownExtension(Extension):
-    """
-    An extension that is as compatible as possible with GitHub-flavored
-    Markdown (GFM).
-
-    This extension aims to be compatible with the variant of GFM that GitHub
-    uses for Markdown-formatted gists and files (including READMEs). This
-    variant seems to have all the extensions described in the `GFM
-    documentation`_, except:
-
-    - Newlines in paragraphs are not transformed into ``br`` tags.
-    - Intra-GitHub links to commits, repositories, and issues are not
-      supported.
-
-    If you need support for features specific to GitHub comments and issues,
-    please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`.
-
-    .. _GFM documentation: https://guides.github.com/features/mastering-markdown/
-    """
-
-    def extendMarkdown(self, md, md_globals):
-        # Built-in extensions
-        Nl2BrExtension().extendMarkdown(md, md_globals)
-        FencedCodeExtension().extendMarkdown(md, md_globals)
-        TableExtension().extendMarkdown(md, md_globals)
-
-        # Custom extensions
-        gfm.AutolinkExtension().extendMarkdown(md, md_globals)
-        gfm.AutomailExtension().extendMarkdown(md, md_globals)
-        gfm.HiddenHiliteExtension([
-            ('guess_lang', 'False'),
-            ('css_class', 'highlight')
-        ]).extendMarkdown(md, md_globals)
-        gfm.SemiSaneListExtension().extendMarkdown(md, md_globals)
-        gfm.SpacedLinkExtension().extendMarkdown(md, md_globals)
-        gfm.StrikethroughExtension().extendMarkdown(md, md_globals)
-        gfm.TaskListExtension([
-            ('list_attrs', {'class': 'checkbox'})
-        ]).extendMarkdown(md, md_globals)
+class Nl2BrExtension(_Nl2BrExtension):
+    pass
 
 
 # Global Vars
@@ -167,9 +124,9 @@ class UrlizePattern(markdown.inlinepatte
         return el
 
 
-class UrlizeExtension(markdown.Extension):
+class UrlizeExtension(Extension):
     """ Urlize Extension for Python-Markdown. """
 
-    def extendMarkdown(self, md, md_globals):
+    def extendMarkdown(self, md):
         """ Replace autolink with UrlizePattern """
         md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
diff --git a/rhodecode/lib/markup_renderer.py b/rhodecode/lib/markup_renderer.py
--- a/rhodecode/lib/markup_renderer.py
+++ b/rhodecode/lib/markup_renderer.py
@@ -29,6 +29,7 @@ import lxml
 import logging
 import urllib.parse
 import bleach
+import pycmarkgfm
 
 from mako.lookup import TemplateLookup
 from mako.template import Template as MakoTemplate
@@ -39,8 +40,7 @@ from docutils import writers
 from docutils.writers import html4css1
 import markdown
 
-from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
-from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
+from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
 
 log = logging.getLogger(__name__)
 
@@ -81,7 +81,7 @@ class CustomHTMLTranslator(writers.html4
 
 class RhodeCodeWriter(writers.html4css1.Writer):
     def __init__(self):
-        writers.Writer.__init__(self)
+        super(RhodeCodeWriter, self).__init__()
         self.translator_class = CustomHTMLTranslator
 
 
@@ -111,7 +111,7 @@ def relative_links(html_source, server_p
             else:
                 el.attrib['href'] = relative_path(src, server_paths['standard'])
 
-    return lxml.html.tostring(doc)
+    return lxml.html.tostring(doc, encoding='unicode')
 
 
 def relative_path(path, request_path, is_repo_file=None):
@@ -126,7 +126,7 @@ def relative_path(path, request_path, is
     produces: '/repo/files/logo.png'
     """
     # TODO(marcink): unicode/str support ?
-    # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
+    # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
 
     def dummy_check(p):
         return True  # assume default is a valid file path
@@ -135,8 +135,8 @@ def relative_path(path, request_path, is
     if not path:
         return request_path
 
-    path = safe_unicode(path)
-    request_path = safe_unicode(request_path)
+    path = safe_str(path)
+    request_path = safe_str(request_path)
 
     if path.startswith(('data:', 'javascript:', '#', ':')):
         # skip data, anchor, invalid links
@@ -185,22 +185,25 @@ def get_markdown_renderer(extensions, ou
 
     if _cached_markdown_renderer is None:
         _cached_markdown_renderer = markdown.Markdown(
-            extensions=extensions,
-            enable_attributes=False, output_format=output_format)
+            extensions=extensions + ['legacy_attrs'],
+            output_format=output_format)
     return _cached_markdown_renderer
 
 
-_cached_markdown_renderer_flavored = None
-
+def get_markdown_renderer_flavored(extensions, output_format):
+    """
+    Dummy wrapper to mimic markdown API and render github HTML rendered
 
-def get_markdown_renderer_flavored(extensions, output_format):
-    global _cached_markdown_renderer_flavored
+    """
+    md = get_markdown_renderer(extensions, output_format)
 
-    if _cached_markdown_renderer_flavored is None:
-        _cached_markdown_renderer_flavored = markdown.Markdown(
-            extensions=extensions + [GithubFlavoredMarkdownExtension()],
-            enable_attributes=False, output_format=output_format)
-    return _cached_markdown_renderer_flavored
+    class GFM(object):
+        def convert(self, source):
+            with pycmarkgfm.parse_gfm(source) as document:
+                parsed_md = document.to_commonmark()
+                return md.convert(parsed_md)
+
+    return GFM()
 
 
 class MarkupRenderer(object):
@@ -267,7 +270,10 @@ class MarkupRenderer(object):
         return getattr(MarkupRenderer, detected_renderer)
 
     @classmethod
-    def bleach_clean(cls, text):
+    def sanitize_html(cls, text):
+        # TODO: replace this with https://nh3.readthedocs.io/en/latest
+        # bleach is abandoned and deprecated :/
+
         from .bleach_whitelist import markdown_attrs, markdown_tags
         allowed_tags = markdown_tags
         allowed_attrs = markdown_attrs
@@ -275,7 +281,7 @@ class MarkupRenderer(object):
         try:
             return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
         except Exception:
-            return 'UNPARSEABLE TEXT'
+            return 'TEXT CANNOT BE PARSED USING SANITIZE'
 
     @classmethod
     def renderer_from_filename(cls, filename, exclude):
@@ -302,9 +308,6 @@ class MarkupRenderer(object):
         Renders a given filename using detected renderer
         it detects renderers based on file extension or mimetype.
         At last it will just do a simple html replacing new lines with <br/>
-
-        :param file_name:
-        :param source:
         """
 
         renderer = self._detect_renderer(source, filename)
@@ -312,44 +315,10 @@ class MarkupRenderer(object):
         return readme_data
 
     @classmethod
-    def _flavored_markdown(cls, text):
-        """
-        Github style flavored markdown
-
-        :param text:
-        """
-
-        # Extract pre blocks.
-        extractions = {}
-
-        def pre_extraction_callback(matchobj):
-            digest = md5_safe(matchobj.group(0))
-            extractions[digest] = matchobj.group(0)
-            return "{gfm-extraction-%s}" % digest
-        pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
-        text = re.sub(pattern, pre_extraction_callback, text)
-
-        # Prevent foo_bar_baz from ending up with an italic word in the middle.
-        def italic_callback(matchobj):
-            s = matchobj.group(0)
-            if list(s).count('_') >= 2:
-                return s.replace('_', r'\_')
-            return s
-        text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
-
-        # Insert pre block extractions.
-        def pre_insert_callback(matchobj):
-            return '\n\n' + extractions[matchobj.group(1)]
-        text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
-                      pre_insert_callback, text)
-
-        return text
-
-    @classmethod
     def urlify_text(cls, text):
         def url_func(match_obj):
             url_full = match_obj.groups()[0]
-            return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
+            return f'<a href="{url_full}">{url_full}</a>'
 
         return cls.URL_PAT.sub(url_func, text)
 
@@ -375,7 +344,7 @@ class MarkupRenderer(object):
 
     @classmethod
     def plain(cls, source, universal_newline=True, leading_newline=True):
-        source = safe_unicode(source)
+        source = safe_str(source)
         if universal_newline:
             newline = '\n'
             source = newline.join(source.splitlines())
@@ -386,7 +355,7 @@ class MarkupRenderer(object):
             source += '<br />'
         source += rendered_source.replace("\n", '<br />')
 
-        rendered = cls.bleach_clean(source)
+        rendered = cls.sanitize_html(source)
         return rendered
 
     @classmethod
@@ -409,12 +378,9 @@ class MarkupRenderer(object):
             return cls.markdown(mention_hl, safe=safe, flavored=flavored,
                                 mentions=False)
 
-        source = safe_unicode(source)
+        try:
+            rendered = markdown_renderer.convert(source)
 
-        try:
-            if flavored:
-                source = cls._flavored_markdown(source)
-            rendered = markdown_renderer.convert(source)
         except Exception:
             log.exception('Error when rendering Markdown')
             if safe:
@@ -424,17 +390,18 @@ class MarkupRenderer(object):
                 raise
 
         if clean_html:
-            rendered = cls.bleach_clean(rendered)
+            rendered = cls.sanitize_html(rendered)
         return rendered
 
     @classmethod
     def rst(cls, source, safe=True, mentions=False, clean_html=False):
+
         if mentions:
             mention_hl = cls.convert_mentions(source, mode='rst')
             # we extracted mentions render with this using Mentions false
             return cls.rst(mention_hl, safe=safe, mentions=False)
 
-        source = safe_unicode(source)
+        source = safe_str(source)
         try:
             docutils_settings = dict(
                 [(alias, None) for alias in
@@ -446,7 +413,7 @@ class MarkupRenderer(object):
                 'syntax_highlight': 'short',
             })
 
-            for k, v in docutils_settings.items():
+            for k, v in list(docutils_settings.items()):
                 directives.register_directive(k, v)
 
             parts = publish_parts(source=source,
@@ -454,7 +421,7 @@ class MarkupRenderer(object):
                                   settings_overrides=docutils_settings)
             rendered = parts["fragment"]
             if clean_html:
-                rendered = cls.bleach_clean(rendered)
+                rendered = cls.sanitize_html(rendered)
             return parts['html_title'] + rendered
         except Exception:
             log.exception('Error when rendering RST')
@@ -494,7 +461,7 @@ class MarkupRenderer(object):
 
                     if 'source' in cell and cell['cell_type'] == 'markdown':
                         # sanitize similar like in markdown
-                        cell['source'] = cls.bleach_clean(cell['source'])
+                        cell['source'] = cls.sanitize_html(cell['source'])
 
                 return nb, resources