Show More
@@ -24,6 +24,8 b' import xml.etree.ElementTree as etree' | |||||
24 | from markdown.extensions import Extension |
|
24 | from markdown.extensions import Extension | |
25 | from markdown.extensions.fenced_code import FencedCodeExtension |
|
25 | from markdown.extensions.fenced_code import FencedCodeExtension | |
26 | from markdown.extensions.tables import TableExtension |
|
26 | from markdown.extensions.tables import TableExtension | |
|
27 | from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension | |||
|
28 | from markdown.extensions.wikilinks import WikiLinkExtension | |||
27 | from markdown.inlinepatterns import Pattern |
|
29 | from markdown.inlinepatterns import Pattern | |
28 |
|
30 | |||
29 | import gfm |
|
31 | import gfm | |
@@ -87,53 +89,8 b' class SubstituteTagInlineProcessor(Simpl' | |||||
87 | return etree.Element(self.tag), m.start(0), m.end(0) |
|
89 | return etree.Element(self.tag), m.start(0), m.end(0) | |
88 |
|
90 | |||
89 |
|
91 | |||
90 | class Nl2BrExtension(Extension): |
|
92 | class Nl2BrExtension(_Nl2BrExtension): | |
91 | BR_RE = r'\n' |
|
93 | pass | |
92 |
|
||||
93 | def extendMarkdown(self, md, md_globals): |
|
|||
94 | br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br') |
|
|||
95 | md.inlinePatterns.add('nl', br_tag, '_end') |
|
|||
96 |
|
||||
97 |
|
||||
98 | class GithubFlavoredMarkdownExtension(Extension): |
|
|||
99 | """ |
|
|||
100 | An extension that is as compatible as possible with GitHub-flavored |
|
|||
101 | Markdown (GFM). |
|
|||
102 |
|
||||
103 | This extension aims to be compatible with the variant of GFM that GitHub |
|
|||
104 | uses for Markdown-formatted gists and files (including READMEs). This |
|
|||
105 | variant seems to have all the extensions described in the `GFM |
|
|||
106 | documentation`_, except: |
|
|||
107 |
|
||||
108 | - Newlines in paragraphs are not transformed into ``br`` tags. |
|
|||
109 | - Intra-GitHub links to commits, repositories, and issues are not |
|
|||
110 | supported. |
|
|||
111 |
|
||||
112 | If you need support for features specific to GitHub comments and issues, |
|
|||
113 | please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`. |
|
|||
114 |
|
||||
115 | .. _GFM documentation: https://guides.github.com/features/mastering-markdown/ |
|
|||
116 | """ |
|
|||
117 |
|
||||
118 | def extendMarkdown(self, md, md_globals): |
|
|||
119 | # Built-in extensions |
|
|||
120 | Nl2BrExtension().extendMarkdown(md, md_globals) |
|
|||
121 | FencedCodeExtension().extendMarkdown(md, md_globals) |
|
|||
122 | TableExtension().extendMarkdown(md, md_globals) |
|
|||
123 |
|
||||
124 | # Custom extensions |
|
|||
125 | gfm.AutolinkExtension().extendMarkdown(md, md_globals) |
|
|||
126 | gfm.AutomailExtension().extendMarkdown(md, md_globals) |
|
|||
127 | gfm.HiddenHiliteExtension([ |
|
|||
128 | ('guess_lang', 'False'), |
|
|||
129 | ('css_class', 'highlight') |
|
|||
130 | ]).extendMarkdown(md, md_globals) |
|
|||
131 | gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) |
|
|||
132 | gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) |
|
|||
133 | gfm.StrikethroughExtension().extendMarkdown(md, md_globals) |
|
|||
134 | gfm.TaskListExtension([ |
|
|||
135 | ('list_attrs', {'class': 'checkbox'}) |
|
|||
136 | ]).extendMarkdown(md, md_globals) |
|
|||
137 |
|
94 | |||
138 |
|
95 | |||
139 | # Global Vars |
|
96 | # Global Vars | |
@@ -167,9 +124,9 b' class UrlizePattern(markdown.inlinepatte' | |||||
167 | return el |
|
124 | return el | |
168 |
|
125 | |||
169 |
|
126 | |||
170 |
class UrlizeExtension( |
|
127 | class UrlizeExtension(Extension): | |
171 | """ Urlize Extension for Python-Markdown. """ |
|
128 | """ Urlize Extension for Python-Markdown. """ | |
172 |
|
129 | |||
173 |
def extendMarkdown(self, md |
|
130 | def extendMarkdown(self, md): | |
174 | """ Replace autolink with UrlizePattern """ |
|
131 | """ Replace autolink with UrlizePattern """ | |
175 | md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md) |
|
132 | md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md) |
@@ -29,6 +29,7 b' import lxml' | |||||
29 | import logging |
|
29 | import logging | |
30 | import urllib.parse |
|
30 | import urllib.parse | |
31 | import bleach |
|
31 | import bleach | |
|
32 | import pycmarkgfm | |||
32 |
|
33 | |||
33 | from mako.lookup import TemplateLookup |
|
34 | from mako.lookup import TemplateLookup | |
34 | from mako.template import Template as MakoTemplate |
|
35 | from mako.template import Template as MakoTemplate | |
@@ -39,8 +40,7 b' from docutils import writers' | |||||
39 | from docutils.writers import html4css1 |
|
40 | from docutils.writers import html4css1 | |
40 | import markdown |
|
41 | import markdown | |
41 |
|
42 | |||
42 | from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension |
|
43 | from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX | |
43 | from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX) |
|
|||
44 |
|
44 | |||
45 | log = logging.getLogger(__name__) |
|
45 | log = logging.getLogger(__name__) | |
46 |
|
46 | |||
@@ -81,7 +81,7 b' class CustomHTMLTranslator(writers.html4' | |||||
81 |
|
81 | |||
82 | class RhodeCodeWriter(writers.html4css1.Writer): |
|
82 | class RhodeCodeWriter(writers.html4css1.Writer): | |
83 | def __init__(self): |
|
83 | def __init__(self): | |
84 |
|
|
84 | super(RhodeCodeWriter, self).__init__() | |
85 | self.translator_class = CustomHTMLTranslator |
|
85 | self.translator_class = CustomHTMLTranslator | |
86 |
|
86 | |||
87 |
|
87 | |||
@@ -111,7 +111,7 b' def relative_links(html_source, server_p' | |||||
111 | else: |
|
111 | else: | |
112 | el.attrib['href'] = relative_path(src, server_paths['standard']) |
|
112 | el.attrib['href'] = relative_path(src, server_paths['standard']) | |
113 |
|
113 | |||
114 | return lxml.html.tostring(doc) |
|
114 | return lxml.html.tostring(doc, encoding='unicode') | |
115 |
|
115 | |||
116 |
|
116 | |||
117 | def relative_path(path, request_path, is_repo_file=None): |
|
117 | def relative_path(path, request_path, is_repo_file=None): | |
@@ -126,7 +126,7 b' def relative_path(path, request_path, is' | |||||
126 | produces: '/repo/files/logo.png' |
|
126 | produces: '/repo/files/logo.png' | |
127 | """ |
|
127 | """ | |
128 | # TODO(marcink): unicode/str support ? |
|
128 | # TODO(marcink): unicode/str support ? | |
129 |
# maybe=> safe_ |
|
129 | # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:')) | |
130 |
|
130 | |||
131 | def dummy_check(p): |
|
131 | def dummy_check(p): | |
132 | return True # assume default is a valid file path |
|
132 | return True # assume default is a valid file path | |
@@ -135,8 +135,8 b' def relative_path(path, request_path, is' | |||||
135 | if not path: |
|
135 | if not path: | |
136 | return request_path |
|
136 | return request_path | |
137 |
|
137 | |||
138 |
path = safe_ |
|
138 | path = safe_str(path) | |
139 |
request_path = safe_ |
|
139 | request_path = safe_str(request_path) | |
140 |
|
140 | |||
141 | if path.startswith(('data:', 'javascript:', '#', ':')): |
|
141 | if path.startswith(('data:', 'javascript:', '#', ':')): | |
142 | # skip data, anchor, invalid links |
|
142 | # skip data, anchor, invalid links | |
@@ -185,22 +185,25 b' def get_markdown_renderer(extensions, ou' | |||||
185 |
|
185 | |||
186 | if _cached_markdown_renderer is None: |
|
186 | if _cached_markdown_renderer is None: | |
187 | _cached_markdown_renderer = markdown.Markdown( |
|
187 | _cached_markdown_renderer = markdown.Markdown( | |
188 | extensions=extensions, |
|
188 | extensions=extensions + ['legacy_attrs'], | |
189 |
|
|
189 | output_format=output_format) | |
190 | return _cached_markdown_renderer |
|
190 | return _cached_markdown_renderer | |
191 |
|
191 | |||
192 |
|
192 | |||
193 | _cached_markdown_renderer_flavored = None |
|
193 | def get_markdown_renderer_flavored(extensions, output_format): | |
194 |
|
194 | """ | ||
|
195 | Dummy wrapper to mimic markdown API and render github HTML rendered | |||
195 |
|
|
196 | ||
196 | def get_markdown_renderer_flavored(extensions, output_format): |
|
197 | """ | |
197 | global _cached_markdown_renderer_flavored |
|
198 | md = get_markdown_renderer(extensions, output_format) | |
198 |
|
199 | |||
199 | if _cached_markdown_renderer_flavored is None: |
|
200 | class GFM(object): | |
200 | _cached_markdown_renderer_flavored = markdown.Markdown( |
|
201 | def convert(self, source): | |
201 | extensions=extensions + [GithubFlavoredMarkdownExtension()], |
|
202 | with pycmarkgfm.parse_gfm(source) as document: | |
202 | enable_attributes=False, output_format=output_format) |
|
203 | parsed_md = document.to_commonmark() | |
203 | return _cached_markdown_renderer_flavored |
|
204 | return md.convert(parsed_md) | |
|
205 | ||||
|
206 | return GFM() | |||
204 |
|
207 | |||
205 |
|
208 | |||
206 | class MarkupRenderer(object): |
|
209 | class MarkupRenderer(object): | |
@@ -267,7 +270,10 b' class MarkupRenderer(object):' | |||||
267 | return getattr(MarkupRenderer, detected_renderer) |
|
270 | return getattr(MarkupRenderer, detected_renderer) | |
268 |
|
271 | |||
269 | @classmethod |
|
272 | @classmethod | |
270 |
def |
|
273 | def sanitize_html(cls, text): | |
|
274 | # TODO: replace this with https://nh3.readthedocs.io/en/latest | |||
|
275 | # bleach is abandoned and deprecated :/ | |||
|
276 | ||||
271 | from .bleach_whitelist import markdown_attrs, markdown_tags |
|
277 | from .bleach_whitelist import markdown_attrs, markdown_tags | |
272 | allowed_tags = markdown_tags |
|
278 | allowed_tags = markdown_tags | |
273 | allowed_attrs = markdown_attrs |
|
279 | allowed_attrs = markdown_attrs | |
@@ -275,7 +281,7 b' class MarkupRenderer(object):' | |||||
275 | try: |
|
281 | try: | |
276 | return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) |
|
282 | return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs) | |
277 | except Exception: |
|
283 | except Exception: | |
278 |
return ' |
|
284 | return 'TEXT CANNOT BE PARSED USING SANITIZE' | |
279 |
|
285 | |||
280 | @classmethod |
|
286 | @classmethod | |
281 | def renderer_from_filename(cls, filename, exclude): |
|
287 | def renderer_from_filename(cls, filename, exclude): | |
@@ -302,9 +308,6 b' class MarkupRenderer(object):' | |||||
302 | Renders a given filename using detected renderer |
|
308 | Renders a given filename using detected renderer | |
303 | it detects renderers based on file extension or mimetype. |
|
309 | it detects renderers based on file extension or mimetype. | |
304 | At last it will just do a simple html replacing new lines with <br/> |
|
310 | At last it will just do a simple html replacing new lines with <br/> | |
305 |
|
||||
306 | :param file_name: |
|
|||
307 | :param source: |
|
|||
308 | """ |
|
311 | """ | |
309 |
|
312 | |||
310 | renderer = self._detect_renderer(source, filename) |
|
313 | renderer = self._detect_renderer(source, filename) | |
@@ -312,44 +315,10 b' class MarkupRenderer(object):' | |||||
312 | return readme_data |
|
315 | return readme_data | |
313 |
|
316 | |||
314 | @classmethod |
|
317 | @classmethod | |
315 | def _flavored_markdown(cls, text): |
|
|||
316 | """ |
|
|||
317 | Github style flavored markdown |
|
|||
318 |
|
||||
319 | :param text: |
|
|||
320 | """ |
|
|||
321 |
|
||||
322 | # Extract pre blocks. |
|
|||
323 | extractions = {} |
|
|||
324 |
|
||||
325 | def pre_extraction_callback(matchobj): |
|
|||
326 | digest = md5_safe(matchobj.group(0)) |
|
|||
327 | extractions[digest] = matchobj.group(0) |
|
|||
328 | return "{gfm-extraction-%s}" % digest |
|
|||
329 | pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL) |
|
|||
330 | text = re.sub(pattern, pre_extraction_callback, text) |
|
|||
331 |
|
||||
332 | # Prevent foo_bar_baz from ending up with an italic word in the middle. |
|
|||
333 | def italic_callback(matchobj): |
|
|||
334 | s = matchobj.group(0) |
|
|||
335 | if list(s).count('_') >= 2: |
|
|||
336 | return s.replace('_', r'\_') |
|
|||
337 | return s |
|
|||
338 | text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text) |
|
|||
339 |
|
||||
340 | # Insert pre block extractions. |
|
|||
341 | def pre_insert_callback(matchobj): |
|
|||
342 | return '\n\n' + extractions[matchobj.group(1)] |
|
|||
343 | text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}', |
|
|||
344 | pre_insert_callback, text) |
|
|||
345 |
|
||||
346 | return text |
|
|||
347 |
|
||||
348 | @classmethod |
|
|||
349 | def urlify_text(cls, text): |
|
318 | def urlify_text(cls, text): | |
350 | def url_func(match_obj): |
|
319 | def url_func(match_obj): | |
351 | url_full = match_obj.groups()[0] |
|
320 | url_full = match_obj.groups()[0] | |
352 |
return '<a href=" |
|
321 | return f'<a href="{url_full}">{url_full}</a>' | |
353 |
|
322 | |||
354 | return cls.URL_PAT.sub(url_func, text) |
|
323 | return cls.URL_PAT.sub(url_func, text) | |
355 |
|
324 | |||
@@ -375,7 +344,7 b' class MarkupRenderer(object):' | |||||
375 |
|
344 | |||
376 | @classmethod |
|
345 | @classmethod | |
377 | def plain(cls, source, universal_newline=True, leading_newline=True): |
|
346 | def plain(cls, source, universal_newline=True, leading_newline=True): | |
378 |
source = safe_ |
|
347 | source = safe_str(source) | |
379 | if universal_newline: |
|
348 | if universal_newline: | |
380 | newline = '\n' |
|
349 | newline = '\n' | |
381 | source = newline.join(source.splitlines()) |
|
350 | source = newline.join(source.splitlines()) | |
@@ -386,7 +355,7 b' class MarkupRenderer(object):' | |||||
386 | source += '<br />' |
|
355 | source += '<br />' | |
387 | source += rendered_source.replace("\n", '<br />') |
|
356 | source += rendered_source.replace("\n", '<br />') | |
388 |
|
357 | |||
389 |
rendered = cls. |
|
358 | rendered = cls.sanitize_html(source) | |
390 | return rendered |
|
359 | return rendered | |
391 |
|
360 | |||
392 | @classmethod |
|
361 | @classmethod | |
@@ -409,12 +378,9 b' class MarkupRenderer(object):' | |||||
409 | return cls.markdown(mention_hl, safe=safe, flavored=flavored, |
|
378 | return cls.markdown(mention_hl, safe=safe, flavored=flavored, | |
410 | mentions=False) |
|
379 | mentions=False) | |
411 |
|
380 | |||
412 | source = safe_unicode(source) |
|
381 | try: | |
|
382 | rendered = markdown_renderer.convert(source) | |||
413 |
|
383 | |||
414 | try: |
|
|||
415 | if flavored: |
|
|||
416 | source = cls._flavored_markdown(source) |
|
|||
417 | rendered = markdown_renderer.convert(source) |
|
|||
418 | except Exception: |
|
384 | except Exception: | |
419 | log.exception('Error when rendering Markdown') |
|
385 | log.exception('Error when rendering Markdown') | |
420 | if safe: |
|
386 | if safe: | |
@@ -424,17 +390,18 b' class MarkupRenderer(object):' | |||||
424 | raise |
|
390 | raise | |
425 |
|
391 | |||
426 | if clean_html: |
|
392 | if clean_html: | |
427 |
rendered = cls. |
|
393 | rendered = cls.sanitize_html(rendered) | |
428 | return rendered |
|
394 | return rendered | |
429 |
|
395 | |||
430 | @classmethod |
|
396 | @classmethod | |
431 | def rst(cls, source, safe=True, mentions=False, clean_html=False): |
|
397 | def rst(cls, source, safe=True, mentions=False, clean_html=False): | |
|
398 | ||||
432 | if mentions: |
|
399 | if mentions: | |
433 | mention_hl = cls.convert_mentions(source, mode='rst') |
|
400 | mention_hl = cls.convert_mentions(source, mode='rst') | |
434 | # we extracted mentions render with this using Mentions false |
|
401 | # we extracted mentions render with this using Mentions false | |
435 | return cls.rst(mention_hl, safe=safe, mentions=False) |
|
402 | return cls.rst(mention_hl, safe=safe, mentions=False) | |
436 |
|
403 | |||
437 |
source = safe_ |
|
404 | source = safe_str(source) | |
438 | try: |
|
405 | try: | |
439 | docutils_settings = dict( |
|
406 | docutils_settings = dict( | |
440 | [(alias, None) for alias in |
|
407 | [(alias, None) for alias in | |
@@ -446,7 +413,7 b' class MarkupRenderer(object):' | |||||
446 | 'syntax_highlight': 'short', |
|
413 | 'syntax_highlight': 'short', | |
447 | }) |
|
414 | }) | |
448 |
|
415 | |||
449 | for k, v in docutils_settings.items(): |
|
416 | for k, v in list(docutils_settings.items()): | |
450 | directives.register_directive(k, v) |
|
417 | directives.register_directive(k, v) | |
451 |
|
418 | |||
452 | parts = publish_parts(source=source, |
|
419 | parts = publish_parts(source=source, | |
@@ -454,7 +421,7 b' class MarkupRenderer(object):' | |||||
454 | settings_overrides=docutils_settings) |
|
421 | settings_overrides=docutils_settings) | |
455 | rendered = parts["fragment"] |
|
422 | rendered = parts["fragment"] | |
456 | if clean_html: |
|
423 | if clean_html: | |
457 |
rendered = cls. |
|
424 | rendered = cls.sanitize_html(rendered) | |
458 | return parts['html_title'] + rendered |
|
425 | return parts['html_title'] + rendered | |
459 | except Exception: |
|
426 | except Exception: | |
460 | log.exception('Error when rendering RST') |
|
427 | log.exception('Error when rendering RST') | |
@@ -494,7 +461,7 b' class MarkupRenderer(object):' | |||||
494 |
|
461 | |||
495 | if 'source' in cell and cell['cell_type'] == 'markdown': |
|
462 | if 'source' in cell and cell['cell_type'] == 'markdown': | |
496 | # sanitize similar like in markdown |
|
463 | # sanitize similar like in markdown | |
497 |
cell['source'] = cls. |
|
464 | cell['source'] = cls.sanitize_html(cell['source']) | |
498 |
|
465 | |||
499 | return nb, resources |
|
466 | return nb, resources | |
500 |
|
467 |
General Comments 0
You need to be logged in to leave comments.
Login now