diff --git a/rhodecode/lib/markdown_ext.py b/rhodecode/lib/markdown_ext.py --- a/rhodecode/lib/markdown_ext.py +++ b/rhodecode/lib/markdown_ext.py @@ -24,6 +24,8 @@ import xml.etree.ElementTree as etree from markdown.extensions import Extension from markdown.extensions.fenced_code import FencedCodeExtension from markdown.extensions.tables import TableExtension +from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension +from markdown.extensions.wikilinks import WikiLinkExtension from markdown.inlinepatterns import Pattern import gfm @@ -87,53 +89,8 @@ class SubstituteTagInlineProcessor(Simpl return etree.Element(self.tag), m.start(0), m.end(0) -class Nl2BrExtension(Extension): - BR_RE = r'\n' - - def extendMarkdown(self, md, md_globals): - br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br') - md.inlinePatterns.add('nl', br_tag, '_end') - - -class GithubFlavoredMarkdownExtension(Extension): - """ - An extension that is as compatible as possible with GitHub-flavored - Markdown (GFM). - - This extension aims to be compatible with the variant of GFM that GitHub - uses for Markdown-formatted gists and files (including READMEs). This - variant seems to have all the extensions described in the `GFM - documentation`_, except: - - - Newlines in paragraphs are not transformed into ``br`` tags. - - Intra-GitHub links to commits, repositories, and issues are not - supported. - - If you need support for features specific to GitHub comments and issues, - please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`. - - .. _GFM documentation: https://guides.github.com/features/mastering-markdown/ - """ - - def extendMarkdown(self, md, md_globals): - # Built-in extensions - Nl2BrExtension().extendMarkdown(md, md_globals) - FencedCodeExtension().extendMarkdown(md, md_globals) - TableExtension().extendMarkdown(md, md_globals) - - # Custom extensions - gfm.AutolinkExtension().extendMarkdown(md, md_globals) - gfm.AutomailExtension().extendMarkdown(md, md_globals) - gfm.HiddenHiliteExtension([ - ('guess_lang', 'False'), - ('css_class', 'highlight') - ]).extendMarkdown(md, md_globals) - gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) - gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) - gfm.StrikethroughExtension().extendMarkdown(md, md_globals) - gfm.TaskListExtension([ - ('list_attrs', {'class': 'checkbox'}) - ]).extendMarkdown(md, md_globals) +class Nl2BrExtension(_Nl2BrExtension): + pass # Global Vars @@ -167,9 +124,9 @@ class UrlizePattern(markdown.inlinepatte return el -class UrlizeExtension(markdown.Extension): +class UrlizeExtension(Extension): """ Urlize Extension for Python-Markdown. """ - def extendMarkdown(self, md, md_globals): + def extendMarkdown(self, md): """ Replace autolink with UrlizePattern """ md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md) diff --git a/rhodecode/lib/markup_renderer.py b/rhodecode/lib/markup_renderer.py --- a/rhodecode/lib/markup_renderer.py +++ b/rhodecode/lib/markup_renderer.py @@ -29,6 +29,7 @@ import lxml import logging import urllib.parse import bleach +import pycmarkgfm from mako.lookup import TemplateLookup from mako.template import Template as MakoTemplate @@ -39,8 +40,7 @@ from docutils import writers from docutils.writers import html4css1 import markdown -from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension -from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX) +from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX log = logging.getLogger(__name__) @@ -81,7 +81,7 @@ class CustomHTMLTranslator(writers.html4 class RhodeCodeWriter(writers.html4css1.Writer): def __init__(self): - writers.Writer.__init__(self) + super(RhodeCodeWriter, self).__init__() self.translator_class = CustomHTMLTranslator @@ -111,7 +111,7 @@ def relative_links(html_source, server_p else: el.attrib['href'] = relative_path(src, server_paths['standard']) - return lxml.html.tostring(doc) + return lxml.html.tostring(doc, encoding='unicode') def relative_path(path, request_path, is_repo_file=None): @@ -126,7 +126,7 @@ def relative_path(path, request_path, is produces: '/repo/files/logo.png' """ # TODO(marcink): unicode/str support ? - # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:')) + # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:')) def dummy_check(p): return True # assume default is a valid file path @@ -135,8 +135,8 @@ def relative_path(path, request_path, is if not path: return request_path - path = safe_unicode(path) - request_path = safe_unicode(request_path) + path = safe_str(path) + request_path = safe_str(request_path) if path.startswith(('data:', 'javascript:', '#', ':')): # skip data, anchor, invalid links @@ -185,22 +185,25 @@ def get_markdown_renderer(extensions, ou if _cached_markdown_renderer is None: _cached_markdown_renderer = markdown.Markdown( - extensions=extensions, - enable_attributes=False, output_format=output_format) + extensions=extensions + ['legacy_attrs'], + output_format=output_format) return _cached_markdown_renderer -_cached_markdown_renderer_flavored = None - +def get_markdown_renderer_flavored(extensions, output_format): + """ + Dummy wrapper to mimic markdown API and render github HTML rendered -def get_markdown_renderer_flavored(extensions, output_format): - global _cached_markdown_renderer_flavored + """ + md = get_markdown_renderer(extensions, output_format) - if _cached_markdown_renderer_flavored is None: - _cached_markdown_renderer_flavored = markdown.Markdown( - extensions=extensions + [GithubFlavoredMarkdownExtension()], - enable_attributes=False, output_format=output_format) - return _cached_markdown_renderer_flavored + class GFM(object): + def convert(self, source): + with pycmarkgfm.parse_gfm(source) as document: + parsed_md = document.to_commonmark() + return md.convert(parsed_md) + + return GFM() class MarkupRenderer(object): @@ -267,7 +270,10 @@ class MarkupRenderer(object): return getattr(MarkupRenderer, detected_renderer) @classmethod - def bleach_clean(cls, text): + def sanitize_html(cls, text): + # TODO: replace this with https://nh3.readthedocs.io/en/latest + # bleach is abandoned and deprecated :/ + from .bleach_whitelist import markdown_attrs, markdown_tags allowed_tags = markdown_tags allowed_attrs = markdown_attrs @@ -275,7 +281,7 @@ class MarkupRenderer(object): try: return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) except Exception: - return 'UNPARSEABLE TEXT' + return 'TEXT CANNOT BE PARSED USING SANITIZE' @classmethod def renderer_from_filename(cls, filename, exclude): @@ -302,9 +308,6 @@ class MarkupRenderer(object): Renders a given filename using detected renderer it detects renderers based on file extension or mimetype. At last it will just do a simple html replacing new lines with
- - :param file_name: - :param source: """ renderer = self._detect_renderer(source, filename) @@ -312,44 +315,10 @@ class MarkupRenderer(object): return readme_data @classmethod - def _flavored_markdown(cls, text): - """ - Github style flavored markdown - - :param text: - """ - - # Extract pre blocks. - extractions = {} - - def pre_extraction_callback(matchobj): - digest = md5_safe(matchobj.group(0)) - extractions[digest] = matchobj.group(0) - return "{gfm-extraction-%s}" % digest - pattern = re.compile(r'
.*?
', re.MULTILINE | re.DOTALL) - text = re.sub(pattern, pre_extraction_callback, text) - - # Prevent foo_bar_baz from ending up with an italic word in the middle. - def italic_callback(matchobj): - s = matchobj.group(0) - if list(s).count('_') >= 2: - return s.replace('_', r'\_') - return s - text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text) - - # Insert pre block extractions. - def pre_insert_callback(matchobj): - return '\n\n' + extractions[matchobj.group(1)] - text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}', - pre_insert_callback, text) - - return text - - @classmethod def urlify_text(cls, text): def url_func(match_obj): url_full = match_obj.groups()[0] - return '%(url)s' % ({'url': url_full}) + return f'{url_full}' return cls.URL_PAT.sub(url_func, text) @@ -375,7 +344,7 @@ class MarkupRenderer(object): @classmethod def plain(cls, source, universal_newline=True, leading_newline=True): - source = safe_unicode(source) + source = safe_str(source) if universal_newline: newline = '\n' source = newline.join(source.splitlines()) @@ -386,7 +355,7 @@ class MarkupRenderer(object): source += '
' source += rendered_source.replace("\n", '
') - rendered = cls.bleach_clean(source) + rendered = cls.sanitize_html(source) return rendered @classmethod @@ -409,12 +378,9 @@ class MarkupRenderer(object): return cls.markdown(mention_hl, safe=safe, flavored=flavored, mentions=False) - source = safe_unicode(source) + try: + rendered = markdown_renderer.convert(source) - try: - if flavored: - source = cls._flavored_markdown(source) - rendered = markdown_renderer.convert(source) except Exception: log.exception('Error when rendering Markdown') if safe: @@ -424,17 +390,18 @@ class MarkupRenderer(object): raise if clean_html: - rendered = cls.bleach_clean(rendered) + rendered = cls.sanitize_html(rendered) return rendered @classmethod def rst(cls, source, safe=True, mentions=False, clean_html=False): + if mentions: mention_hl = cls.convert_mentions(source, mode='rst') # we extracted mentions render with this using Mentions false return cls.rst(mention_hl, safe=safe, mentions=False) - source = safe_unicode(source) + source = safe_str(source) try: docutils_settings = dict( [(alias, None) for alias in @@ -446,7 +413,7 @@ class MarkupRenderer(object): 'syntax_highlight': 'short', }) - for k, v in docutils_settings.items(): + for k, v in list(docutils_settings.items()): directives.register_directive(k, v) parts = publish_parts(source=source, @@ -454,7 +421,7 @@ class MarkupRenderer(object): settings_overrides=docutils_settings) rendered = parts["fragment"] if clean_html: - rendered = cls.bleach_clean(rendered) + rendered = cls.sanitize_html(rendered) return parts['html_title'] + rendered except Exception: log.exception('Error when rendering RST') @@ -494,7 +461,7 @@ class MarkupRenderer(object): if 'source' in cell and cell['cell_type'] == 'markdown': # sanitize similar like in markdown - cell['source'] = cls.bleach_clean(cell['source']) + cell['source'] = cls.sanitize_html(cell['source']) return nb, resources