Show More
@@ -24,6 +24,8 b' import xml.etree.ElementTree as etree' | |||
|
24 | 24 | from markdown.extensions import Extension |
|
25 | 25 | from markdown.extensions.fenced_code import FencedCodeExtension |
|
26 | 26 | from markdown.extensions.tables import TableExtension |
|
27 | from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension | |
|
28 | from markdown.extensions.wikilinks import WikiLinkExtension | |
|
27 | 29 | from markdown.inlinepatterns import Pattern |
|
28 | 30 | |
|
29 | 31 | import gfm |
@@ -87,53 +89,8 b' class SubstituteTagInlineProcessor(Simpl' | |||
|
87 | 89 | return etree.Element(self.tag), m.start(0), m.end(0) |
|
88 | 90 | |
|
89 | 91 | |
|
90 | class Nl2BrExtension(Extension): | |
|
91 | BR_RE = r'\n' | |
|
92 | ||
|
93 | def extendMarkdown(self, md, md_globals): | |
|
94 | br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br') | |
|
95 | md.inlinePatterns.add('nl', br_tag, '_end') | |
|
96 | ||
|
97 | ||
|
98 | class GithubFlavoredMarkdownExtension(Extension): | |
|
99 | """ | |
|
100 | An extension that is as compatible as possible with GitHub-flavored | |
|
101 | Markdown (GFM). | |
|
102 | ||
|
103 | This extension aims to be compatible with the variant of GFM that GitHub | |
|
104 | uses for Markdown-formatted gists and files (including READMEs). This | |
|
105 | variant seems to have all the extensions described in the `GFM | |
|
106 | documentation`_, except: | |
|
107 | ||
|
108 | - Newlines in paragraphs are not transformed into ``br`` tags. | |
|
109 | - Intra-GitHub links to commits, repositories, and issues are not | |
|
110 | supported. | |
|
111 | ||
|
112 | If you need support for features specific to GitHub comments and issues, | |
|
113 | please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`. | |
|
114 | ||
|
115 | .. _GFM documentation: https://guides.github.com/features/mastering-markdown/ | |
|
116 | """ | |
|
117 | ||
|
118 | def extendMarkdown(self, md, md_globals): | |
|
119 | # Built-in extensions | |
|
120 | Nl2BrExtension().extendMarkdown(md, md_globals) | |
|
121 | FencedCodeExtension().extendMarkdown(md, md_globals) | |
|
122 | TableExtension().extendMarkdown(md, md_globals) | |
|
123 | ||
|
124 | # Custom extensions | |
|
125 | gfm.AutolinkExtension().extendMarkdown(md, md_globals) | |
|
126 | gfm.AutomailExtension().extendMarkdown(md, md_globals) | |
|
127 | gfm.HiddenHiliteExtension([ | |
|
128 | ('guess_lang', 'False'), | |
|
129 | ('css_class', 'highlight') | |
|
130 | ]).extendMarkdown(md, md_globals) | |
|
131 | gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) | |
|
132 | gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) | |
|
133 | gfm.StrikethroughExtension().extendMarkdown(md, md_globals) | |
|
134 | gfm.TaskListExtension([ | |
|
135 | ('list_attrs', {'class': 'checkbox'}) | |
|
136 | ]).extendMarkdown(md, md_globals) | |
|
92 | class Nl2BrExtension(_Nl2BrExtension): | |
|
93 | pass | |
|
137 | 94 | |
|
138 | 95 | |
|
139 | 96 | # Global Vars |
@@ -167,9 +124,9 b' class UrlizePattern(markdown.inlinepatte' | |||
|
167 | 124 | return el |
|
168 | 125 | |
|
169 | 126 | |
|
170 |
class UrlizeExtension( |
|
|
127 | class UrlizeExtension(Extension): | |
|
171 | 128 | """ Urlize Extension for Python-Markdown. """ |
|
172 | 129 | |
|
173 |
def extendMarkdown(self, md |
|
|
130 | def extendMarkdown(self, md): | |
|
174 | 131 | """ Replace autolink with UrlizePattern """ |
|
175 | 132 | md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md) |
@@ -29,6 +29,7 b' import lxml' | |||
|
29 | 29 | import logging |
|
30 | 30 | import urllib.parse |
|
31 | 31 | import bleach |
|
32 | import pycmarkgfm | |
|
32 | 33 | |
|
33 | 34 | from mako.lookup import TemplateLookup |
|
34 | 35 | from mako.template import Template as MakoTemplate |
@@ -39,8 +40,7 b' from docutils import writers' | |||
|
39 | 40 | from docutils.writers import html4css1 |
|
40 | 41 | import markdown |
|
41 | 42 | |
|
42 | from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension | |
|
43 | from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX) | |
|
43 | from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX | |
|
44 | 44 | |
|
45 | 45 | log = logging.getLogger(__name__) |
|
46 | 46 | |
@@ -81,7 +81,7 b' class CustomHTMLTranslator(writers.html4' | |||
|
81 | 81 | |
|
82 | 82 | class RhodeCodeWriter(writers.html4css1.Writer): |
|
83 | 83 | def __init__(self): |
|
84 |
|
|
|
84 | super(RhodeCodeWriter, self).__init__() | |
|
85 | 85 | self.translator_class = CustomHTMLTranslator |
|
86 | 86 | |
|
87 | 87 | |
@@ -111,7 +111,7 b' def relative_links(html_source, server_p' | |||
|
111 | 111 | else: |
|
112 | 112 | el.attrib['href'] = relative_path(src, server_paths['standard']) |
|
113 | 113 | |
|
114 | return lxml.html.tostring(doc) | |
|
114 | return lxml.html.tostring(doc, encoding='unicode') | |
|
115 | 115 | |
|
116 | 116 | |
|
117 | 117 | def relative_path(path, request_path, is_repo_file=None): |
@@ -126,7 +126,7 b' def relative_path(path, request_path, is' | |||
|
126 | 126 | produces: '/repo/files/logo.png' |
|
127 | 127 | """ |
|
128 | 128 | # TODO(marcink): unicode/str support ? |
|
129 |
# maybe=> safe_ |
|
|
129 | # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:')) | |
|
130 | 130 | |
|
131 | 131 | def dummy_check(p): |
|
132 | 132 | return True # assume default is a valid file path |
@@ -135,8 +135,8 b' def relative_path(path, request_path, is' | |||
|
135 | 135 | if not path: |
|
136 | 136 | return request_path |
|
137 | 137 | |
|
138 |
path = safe_ |
|
|
139 |
request_path = safe_ |
|
|
138 | path = safe_str(path) | |
|
139 | request_path = safe_str(request_path) | |
|
140 | 140 | |
|
141 | 141 | if path.startswith(('data:', 'javascript:', '#', ':')): |
|
142 | 142 | # skip data, anchor, invalid links |
@@ -185,22 +185,25 b' def get_markdown_renderer(extensions, ou' | |||
|
185 | 185 | |
|
186 | 186 | if _cached_markdown_renderer is None: |
|
187 | 187 | _cached_markdown_renderer = markdown.Markdown( |
|
188 | extensions=extensions, | |
|
189 |
|
|
|
188 | extensions=extensions + ['legacy_attrs'], | |
|
189 | output_format=output_format) | |
|
190 | 190 | return _cached_markdown_renderer |
|
191 | 191 | |
|
192 | 192 | |
|
193 | _cached_markdown_renderer_flavored = None | |
|
194 | ||
|
193 | def get_markdown_renderer_flavored(extensions, output_format): | |
|
194 | """ | |
|
195 | Dummy wrapper to mimic markdown API and render github HTML rendered | |
|
195 | 196 |
|
|
196 | def get_markdown_renderer_flavored(extensions, output_format): | |
|
197 | global _cached_markdown_renderer_flavored | |
|
197 | """ | |
|
198 | md = get_markdown_renderer(extensions, output_format) | |
|
198 | 199 | |
|
199 | if _cached_markdown_renderer_flavored is None: | |
|
200 | _cached_markdown_renderer_flavored = markdown.Markdown( | |
|
201 | extensions=extensions + [GithubFlavoredMarkdownExtension()], | |
|
202 | enable_attributes=False, output_format=output_format) | |
|
203 | return _cached_markdown_renderer_flavored | |
|
200 | class GFM(object): | |
|
201 | def convert(self, source): | |
|
202 | with pycmarkgfm.parse_gfm(source) as document: | |
|
203 | parsed_md = document.to_commonmark() | |
|
204 | return md.convert(parsed_md) | |
|
205 | ||
|
206 | return GFM() | |
|
204 | 207 | |
|
205 | 208 | |
|
206 | 209 | class MarkupRenderer(object): |
@@ -267,7 +270,10 b' class MarkupRenderer(object):' | |||
|
267 | 270 | return getattr(MarkupRenderer, detected_renderer) |
|
268 | 271 | |
|
269 | 272 | @classmethod |
|
270 |
def |
|
|
273 | def sanitize_html(cls, text): | |
|
274 | # TODO: replace this with https://nh3.readthedocs.io/en/latest | |
|
275 | # bleach is abandoned and deprecated :/ | |
|
276 | ||
|
271 | 277 | from .bleach_whitelist import markdown_attrs, markdown_tags |
|
272 | 278 | allowed_tags = markdown_tags |
|
273 | 279 | allowed_attrs = markdown_attrs |
@@ -275,7 +281,7 b' class MarkupRenderer(object):' | |||
|
275 | 281 | try: |
|
276 | 282 | return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) |
|
277 | 283 | except Exception: |
|
278 |
return ' |
|
|
284 | return 'TEXT CANNOT BE PARSED USING SANITIZE' | |
|
279 | 285 | |
|
280 | 286 | @classmethod |
|
281 | 287 | def renderer_from_filename(cls, filename, exclude): |
@@ -302,9 +308,6 b' class MarkupRenderer(object):' | |||
|
302 | 308 | Renders a given filename using detected renderer |
|
303 | 309 | it detects renderers based on file extension or mimetype. |
|
304 | 310 | At last it will just do a simple html replacing new lines with <br/> |
|
305 | ||
|
306 | :param file_name: | |
|
307 | :param source: | |
|
308 | 311 | """ |
|
309 | 312 | |
|
310 | 313 | renderer = self._detect_renderer(source, filename) |
@@ -312,44 +315,10 b' class MarkupRenderer(object):' | |||
|
312 | 315 | return readme_data |
|
313 | 316 | |
|
314 | 317 | @classmethod |
|
315 | def _flavored_markdown(cls, text): | |
|
316 | """ | |
|
317 | Github style flavored markdown | |
|
318 | ||
|
319 | :param text: | |
|
320 | """ | |
|
321 | ||
|
322 | # Extract pre blocks. | |
|
323 | extractions = {} | |
|
324 | ||
|
325 | def pre_extraction_callback(matchobj): | |
|
326 | digest = md5_safe(matchobj.group(0)) | |
|
327 | extractions[digest] = matchobj.group(0) | |
|
328 | return "{gfm-extraction-%s}" % digest | |
|
329 | pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL) | |
|
330 | text = re.sub(pattern, pre_extraction_callback, text) | |
|
331 | ||
|
332 | # Prevent foo_bar_baz from ending up with an italic word in the middle. | |
|
333 | def italic_callback(matchobj): | |
|
334 | s = matchobj.group(0) | |
|
335 | if list(s).count('_') >= 2: | |
|
336 | return s.replace('_', r'\_') | |
|
337 | return s | |
|
338 | text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text) | |
|
339 | ||
|
340 | # Insert pre block extractions. | |
|
341 | def pre_insert_callback(matchobj): | |
|
342 | return '\n\n' + extractions[matchobj.group(1)] | |
|
343 | text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}', | |
|
344 | pre_insert_callback, text) | |
|
345 | ||
|
346 | return text | |
|
347 | ||
|
348 | @classmethod | |
|
349 | 318 | def urlify_text(cls, text): |
|
350 | 319 | def url_func(match_obj): |
|
351 | 320 | url_full = match_obj.groups()[0] |
|
352 |
return '<a href=" |
|
|
321 | return f'<a href="{url_full}">{url_full}</a>' | |
|
353 | 322 | |
|
354 | 323 | return cls.URL_PAT.sub(url_func, text) |
|
355 | 324 | |
@@ -375,7 +344,7 b' class MarkupRenderer(object):' | |||
|
375 | 344 | |
|
376 | 345 | @classmethod |
|
377 | 346 | def plain(cls, source, universal_newline=True, leading_newline=True): |
|
378 |
source = safe_ |
|
|
347 | source = safe_str(source) | |
|
379 | 348 | if universal_newline: |
|
380 | 349 | newline = '\n' |
|
381 | 350 | source = newline.join(source.splitlines()) |
@@ -386,7 +355,7 b' class MarkupRenderer(object):' | |||
|
386 | 355 | source += '<br />' |
|
387 | 356 | source += rendered_source.replace("\n", '<br />') |
|
388 | 357 | |
|
389 |
rendered = cls. |
|
|
358 | rendered = cls.sanitize_html(source) | |
|
390 | 359 | return rendered |
|
391 | 360 | |
|
392 | 361 | @classmethod |
@@ -409,12 +378,9 b' class MarkupRenderer(object):' | |||
|
409 | 378 | return cls.markdown(mention_hl, safe=safe, flavored=flavored, |
|
410 | 379 | mentions=False) |
|
411 | 380 | |
|
412 | source = safe_unicode(source) | |
|
381 | try: | |
|
382 | rendered = markdown_renderer.convert(source) | |
|
413 | 383 | |
|
414 | try: | |
|
415 | if flavored: | |
|
416 | source = cls._flavored_markdown(source) | |
|
417 | rendered = markdown_renderer.convert(source) | |
|
418 | 384 | except Exception: |
|
419 | 385 | log.exception('Error when rendering Markdown') |
|
420 | 386 | if safe: |
@@ -424,17 +390,18 b' class MarkupRenderer(object):' | |||
|
424 | 390 | raise |
|
425 | 391 | |
|
426 | 392 | if clean_html: |
|
427 |
rendered = cls. |
|
|
393 | rendered = cls.sanitize_html(rendered) | |
|
428 | 394 | return rendered |
|
429 | 395 | |
|
430 | 396 | @classmethod |
|
431 | 397 | def rst(cls, source, safe=True, mentions=False, clean_html=False): |
|
398 | ||
|
432 | 399 | if mentions: |
|
433 | 400 | mention_hl = cls.convert_mentions(source, mode='rst') |
|
434 | 401 | # we extracted mentions render with this using Mentions false |
|
435 | 402 | return cls.rst(mention_hl, safe=safe, mentions=False) |
|
436 | 403 | |
|
437 |
source = safe_ |
|
|
404 | source = safe_str(source) | |
|
438 | 405 | try: |
|
439 | 406 | docutils_settings = dict( |
|
440 | 407 | [(alias, None) for alias in |
@@ -446,7 +413,7 b' class MarkupRenderer(object):' | |||
|
446 | 413 | 'syntax_highlight': 'short', |
|
447 | 414 | }) |
|
448 | 415 | |
|
449 | for k, v in docutils_settings.items(): | |
|
416 | for k, v in list(docutils_settings.items()): | |
|
450 | 417 | directives.register_directive(k, v) |
|
451 | 418 | |
|
452 | 419 | parts = publish_parts(source=source, |
@@ -454,7 +421,7 b' class MarkupRenderer(object):' | |||
|
454 | 421 | settings_overrides=docutils_settings) |
|
455 | 422 | rendered = parts["fragment"] |
|
456 | 423 | if clean_html: |
|
457 |
rendered = cls. |
|
|
424 | rendered = cls.sanitize_html(rendered) | |
|
458 | 425 | return parts['html_title'] + rendered |
|
459 | 426 | except Exception: |
|
460 | 427 | log.exception('Error when rendering RST') |
@@ -494,7 +461,7 b' class MarkupRenderer(object):' | |||
|
494 | 461 | |
|
495 | 462 | if 'source' in cell and cell['cell_type'] == 'markdown': |
|
496 | 463 | # sanitize similar like in markdown |
|
497 |
cell['source'] = cls. |
|
|
464 | cell['source'] = cls.sanitize_html(cell['source']) | |
|
498 | 465 | |
|
499 | 466 | return nb, resources |
|
500 | 467 |
General Comments 0
You need to be logged in to leave comments.
Login now