##// END OF EJS Templates
libs: new markdown rendereres for python3
super-admin -
r5079:bc1e432b default
parent child Browse files
Show More
@@ -1,175 +1,132 b''
1
1
2 # Copyright (C) 2010-2020 RhodeCode GmbH
2 # Copyright (C) 2010-2020 RhodeCode GmbH
3 #
3 #
4 # This program is free software: you can redistribute it and/or modify
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License, version 3
5 # it under the terms of the GNU Affero General Public License, version 3
6 # (only), as published by the Free Software Foundation.
6 # (only), as published by the Free Software Foundation.
7 #
7 #
8 # This program is distributed in the hope that it will be useful,
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
11 # GNU General Public License for more details.
12 #
12 #
13 # You should have received a copy of the GNU Affero General Public License
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #
15 #
16 # This program is dual-licensed. If you wish to learn more about the
16 # This program is dual-licensed. If you wish to learn more about the
17 # RhodeCode Enterprise Edition, including its added features, Support services,
17 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 # and proprietary license terms, please see https://rhodecode.com/licenses/
19
19
20 import re
20 import re
21 import markdown
21 import markdown
22 import xml.etree.ElementTree as etree
22 import xml.etree.ElementTree as etree
23
23
24 from markdown.extensions import Extension
24 from markdown.extensions import Extension
25 from markdown.extensions.fenced_code import FencedCodeExtension
25 from markdown.extensions.fenced_code import FencedCodeExtension
26 from markdown.extensions.tables import TableExtension
26 from markdown.extensions.tables import TableExtension
27 from markdown.extensions.nl2br import Nl2BrExtension as _Nl2BrExtension
28 from markdown.extensions.wikilinks import WikiLinkExtension
27 from markdown.inlinepatterns import Pattern
29 from markdown.inlinepatterns import Pattern
28
30
29 import gfm
31 import gfm
30
32
31
33
32 class InlineProcessor(Pattern):
34 class InlineProcessor(Pattern):
33 """
35 """
34 Base class that inline patterns subclass.
36 Base class that inline patterns subclass.
35 This is the newer style inline processor that uses a more
37 This is the newer style inline processor that uses a more
36 efficient and flexible search approach.
38 efficient and flexible search approach.
37 """
39 """
38
40
39 def __init__(self, pattern, md=None):
41 def __init__(self, pattern, md=None):
40 """
42 """
41 Create an instant of an inline pattern.
43 Create an instant of an inline pattern.
42 Keyword arguments:
44 Keyword arguments:
43 * pattern: A regular expression that matches a pattern
45 * pattern: A regular expression that matches a pattern
44 """
46 """
45 self.pattern = pattern
47 self.pattern = pattern
46 self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
48 self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
47
49
48 # Api for Markdown to pass safe_mode into instance
50 # Api for Markdown to pass safe_mode into instance
49 self.safe_mode = False
51 self.safe_mode = False
50 self.md = md
52 self.md = md
51
53
52 def handleMatch(self, m, data):
54 def handleMatch(self, m, data):
53 """Return a ElementTree element from the given match and the
55 """Return a ElementTree element from the given match and the
54 start and end index of the matched text.
56 start and end index of the matched text.
55 If `start` and/or `end` are returned as `None`, it will be
57 If `start` and/or `end` are returned as `None`, it will be
56 assumed that the processor did not find a valid region of text.
58 assumed that the processor did not find a valid region of text.
57 Subclasses should override this method.
59 Subclasses should override this method.
58 Keyword arguments:
60 Keyword arguments:
59 * m: A re match object containing a match of the pattern.
61 * m: A re match object containing a match of the pattern.
60 * data: The buffer current under analysis
62 * data: The buffer current under analysis
61 Returns:
63 Returns:
62 * el: The ElementTree element, text or None.
64 * el: The ElementTree element, text or None.
63 * start: The start of the region that has been matched or None.
65 * start: The start of the region that has been matched or None.
64 * end: The end of the region that has been matched or None.
66 * end: The end of the region that has been matched or None.
65 """
67 """
66 pass # pragma: no cover
68 pass # pragma: no cover
67
69
68
70
69 class SimpleTagInlineProcessor(InlineProcessor):
71 class SimpleTagInlineProcessor(InlineProcessor):
70 """
72 """
71 Return element of type `tag` with a text attribute of group(2)
73 Return element of type `tag` with a text attribute of group(2)
72 of a Pattern.
74 of a Pattern.
73 """
75 """
74 def __init__(self, pattern, tag):
76 def __init__(self, pattern, tag):
75 InlineProcessor.__init__(self, pattern)
77 InlineProcessor.__init__(self, pattern)
76 self.tag = tag
78 self.tag = tag
77
79
78 def handleMatch(self, m, data): # pragma: no cover
80 def handleMatch(self, m, data): # pragma: no cover
79 el = etree.Element(self.tag)
81 el = etree.Element(self.tag)
80 el.text = m.group(2)
82 el.text = m.group(2)
81 return el, m.start(0), m.end(0)
83 return el, m.start(0), m.end(0)
82
84
83
85
84 class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
86 class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
85 """ Return an element of type `tag` with no children. """
87 """ Return an element of type `tag` with no children. """
86 def handleMatch(self, m, data):
88 def handleMatch(self, m, data):
87 return etree.Element(self.tag), m.start(0), m.end(0)
89 return etree.Element(self.tag), m.start(0), m.end(0)
88
90
89
91
90 class Nl2BrExtension(Extension):
92 class Nl2BrExtension(_Nl2BrExtension):
91 BR_RE = r'\n'
93 pass
92
93 def extendMarkdown(self, md, md_globals):
94 br_tag = SubstituteTagInlineProcessor(self.BR_RE, 'br')
95 md.inlinePatterns.add('nl', br_tag, '_end')
96
97
98 class GithubFlavoredMarkdownExtension(Extension):
99 """
100 An extension that is as compatible as possible with GitHub-flavored
101 Markdown (GFM).
102
103 This extension aims to be compatible with the variant of GFM that GitHub
104 uses for Markdown-formatted gists and files (including READMEs). This
105 variant seems to have all the extensions described in the `GFM
106 documentation`_, except:
107
108 - Newlines in paragraphs are not transformed into ``br`` tags.
109 - Intra-GitHub links to commits, repositories, and issues are not
110 supported.
111
112 If you need support for features specific to GitHub comments and issues,
113 please use :class:`mdx_gfm.GithubFlavoredMarkdownExtension`.
114
115 .. _GFM documentation: https://guides.github.com/features/mastering-markdown/
116 """
117
118 def extendMarkdown(self, md, md_globals):
119 # Built-in extensions
120 Nl2BrExtension().extendMarkdown(md, md_globals)
121 FencedCodeExtension().extendMarkdown(md, md_globals)
122 TableExtension().extendMarkdown(md, md_globals)
123
124 # Custom extensions
125 gfm.AutolinkExtension().extendMarkdown(md, md_globals)
126 gfm.AutomailExtension().extendMarkdown(md, md_globals)
127 gfm.HiddenHiliteExtension([
128 ('guess_lang', 'False'),
129 ('css_class', 'highlight')
130 ]).extendMarkdown(md, md_globals)
131 gfm.SemiSaneListExtension().extendMarkdown(md, md_globals)
132 gfm.SpacedLinkExtension().extendMarkdown(md, md_globals)
133 gfm.StrikethroughExtension().extendMarkdown(md, md_globals)
134 gfm.TaskListExtension([
135 ('list_attrs', {'class': 'checkbox'})
136 ]).extendMarkdown(md, md_globals)
137
94
138
95
139 # Global Vars
96 # Global Vars
140 URLIZE_RE = '(%s)' % '|'.join([
97 URLIZE_RE = '(%s)' % '|'.join([
141 r'<(?:f|ht)tps?://[^>]*>',
98 r'<(?:f|ht)tps?://[^>]*>',
142 r'\b(?:f|ht)tps?://[^)<>\s]+[^.,)<>\s]',
99 r'\b(?:f|ht)tps?://[^)<>\s]+[^.,)<>\s]',
143 r'\bwww\.[^)<>\s]+[^.,)<>\s]',
100 r'\bwww\.[^)<>\s]+[^.,)<>\s]',
144 r'[^(<\s]+\.(?:com|net|org)\b',
101 r'[^(<\s]+\.(?:com|net|org)\b',
145 ])
102 ])
146
103
147
104
148 class UrlizePattern(markdown.inlinepatterns.Pattern):
105 class UrlizePattern(markdown.inlinepatterns.Pattern):
149 """ Return a link Element given an autolink (`http://example/com`). """
106 """ Return a link Element given an autolink (`http://example/com`). """
150 def handleMatch(self, m):
107 def handleMatch(self, m):
151 url = m.group(2)
108 url = m.group(2)
152
109
153 if url.startswith('<'):
110 if url.startswith('<'):
154 url = url[1:-1]
111 url = url[1:-1]
155
112
156 text = url
113 text = url
157
114
158 if not url.split('://')[0] in ('http','https','ftp'):
115 if not url.split('://')[0] in ('http','https','ftp'):
159 if '@' in url and not '/' in url:
116 if '@' in url and not '/' in url:
160 url = 'mailto:' + url
117 url = 'mailto:' + url
161 else:
118 else:
162 url = 'http://' + url
119 url = 'http://' + url
163
120
164 el = markdown.util.etree.Element("a")
121 el = markdown.util.etree.Element("a")
165 el.set('href', url)
122 el.set('href', url)
166 el.text = markdown.util.AtomicString(text)
123 el.text = markdown.util.AtomicString(text)
167 return el
124 return el
168
125
169
126
170 class UrlizeExtension(markdown.Extension):
127 class UrlizeExtension(Extension):
171 """ Urlize Extension for Python-Markdown. """
128 """ Urlize Extension for Python-Markdown. """
172
129
173 def extendMarkdown(self, md, md_globals):
130 def extendMarkdown(self, md):
174 """ Replace autolink with UrlizePattern """
131 """ Replace autolink with UrlizePattern """
175 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
132 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
@@ -1,580 +1,547 b''
1
1
2
2
3 # Copyright (C) 2011-2020 RhodeCode GmbH
3 # Copyright (C) 2011-2020 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import lxml
28 import lxml
29 import logging
29 import logging
30 import urllib.parse
30 import urllib.parse
31 import bleach
31 import bleach
32 import pycmarkgfm
32
33
33 from mako.lookup import TemplateLookup
34 from mako.lookup import TemplateLookup
34 from mako.template import Template as MakoTemplate
35 from mako.template import Template as MakoTemplate
35
36
36 from docutils.core import publish_parts
37 from docutils.core import publish_parts
37 from docutils.parsers.rst import directives
38 from docutils.parsers.rst import directives
38 from docutils import writers
39 from docutils import writers
39 from docutils.writers import html4css1
40 from docutils.writers import html4css1
40 import markdown
41 import markdown
41
42
42 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
43 from rhodecode.lib.utils2 import safe_str, md5_safe, MENTIONS_REGEX
43 from rhodecode.lib.utils2 import (safe_unicode, md5_safe, MENTIONS_REGEX)
44
44
45 log = logging.getLogger(__name__)
45 log = logging.getLogger(__name__)
46
46
47 # default renderer used to generate automated comments
47 # default renderer used to generate automated comments
48 DEFAULT_COMMENTS_RENDERER = 'rst'
48 DEFAULT_COMMENTS_RENDERER = 'rst'
49
49
50 try:
50 try:
51 from lxml.html import fromstring
51 from lxml.html import fromstring
52 from lxml.html import tostring
52 from lxml.html import tostring
53 except ImportError:
53 except ImportError:
54 log.exception('Failed to import lxml')
54 log.exception('Failed to import lxml')
55 fromstring = None
55 fromstring = None
56 tostring = None
56 tostring = None
57
57
58
58
59 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
59 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
60 """
60 """
61 Custom HTML Translator used for sandboxing potential
61 Custom HTML Translator used for sandboxing potential
62 JS injections in ref links
62 JS injections in ref links
63 """
63 """
64 def visit_literal_block(self, node):
64 def visit_literal_block(self, node):
65 self.body.append(self.starttag(node, 'pre', CLASS='codehilite literal-block'))
65 self.body.append(self.starttag(node, 'pre', CLASS='codehilite literal-block'))
66
66
67 def visit_reference(self, node):
67 def visit_reference(self, node):
68 if 'refuri' in node.attributes:
68 if 'refuri' in node.attributes:
69 refuri = node['refuri']
69 refuri = node['refuri']
70 if ':' in refuri:
70 if ':' in refuri:
71 prefix, link = refuri.lstrip().split(':', 1)
71 prefix, link = refuri.lstrip().split(':', 1)
72 prefix = prefix or ''
72 prefix = prefix or ''
73
73
74 if prefix.lower() == 'javascript':
74 if prefix.lower() == 'javascript':
75 # we don't allow javascript type of refs...
75 # we don't allow javascript type of refs...
76 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
76 node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
77
77
78 # old style class requires this...
78 # old style class requires this...
79 return html4css1.HTMLTranslator.visit_reference(self, node)
79 return html4css1.HTMLTranslator.visit_reference(self, node)
80
80
81
81
82 class RhodeCodeWriter(writers.html4css1.Writer):
82 class RhodeCodeWriter(writers.html4css1.Writer):
83 def __init__(self):
83 def __init__(self):
84 writers.Writer.__init__(self)
84 super(RhodeCodeWriter, self).__init__()
85 self.translator_class = CustomHTMLTranslator
85 self.translator_class = CustomHTMLTranslator
86
86
87
87
88 def relative_links(html_source, server_paths):
88 def relative_links(html_source, server_paths):
89 if not html_source:
89 if not html_source:
90 return html_source
90 return html_source
91
91
92 if not fromstring and tostring:
92 if not fromstring and tostring:
93 return html_source
93 return html_source
94
94
95 try:
95 try:
96 doc = lxml.html.fromstring(html_source)
96 doc = lxml.html.fromstring(html_source)
97 except Exception:
97 except Exception:
98 return html_source
98 return html_source
99
99
100 for el in doc.cssselect('img, video'):
100 for el in doc.cssselect('img, video'):
101 src = el.attrib.get('src')
101 src = el.attrib.get('src')
102 if src:
102 if src:
103 el.attrib['src'] = relative_path(src, server_paths['raw'])
103 el.attrib['src'] = relative_path(src, server_paths['raw'])
104
104
105 for el in doc.cssselect('a:not(.gfm)'):
105 for el in doc.cssselect('a:not(.gfm)'):
106 src = el.attrib.get('href')
106 src = el.attrib.get('href')
107 if src:
107 if src:
108 raw_mode = el.attrib['href'].endswith('?raw=1')
108 raw_mode = el.attrib['href'].endswith('?raw=1')
109 if raw_mode:
109 if raw_mode:
110 el.attrib['href'] = relative_path(src, server_paths['raw'])
110 el.attrib['href'] = relative_path(src, server_paths['raw'])
111 else:
111 else:
112 el.attrib['href'] = relative_path(src, server_paths['standard'])
112 el.attrib['href'] = relative_path(src, server_paths['standard'])
113
113
114 return lxml.html.tostring(doc)
114 return lxml.html.tostring(doc, encoding='unicode')
115
115
116
116
117 def relative_path(path, request_path, is_repo_file=None):
117 def relative_path(path, request_path, is_repo_file=None):
118 """
118 """
119 relative link support, path is a rel path, and request_path is current
119 relative link support, path is a rel path, and request_path is current
120 server path (not absolute)
120 server path (not absolute)
121
121
122 e.g.
122 e.g.
123
123
124 path = '../logo.png'
124 path = '../logo.png'
125 request_path= '/repo/files/path/file.md'
125 request_path= '/repo/files/path/file.md'
126 produces: '/repo/files/logo.png'
126 produces: '/repo/files/logo.png'
127 """
127 """
128 # TODO(marcink): unicode/str support ?
128 # TODO(marcink): unicode/str support ?
129 # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
129 # maybe=> safe_str(urllib.quote(safe_str(final_path), '/:'))
130
130
131 def dummy_check(p):
131 def dummy_check(p):
132 return True # assume default is a valid file path
132 return True # assume default is a valid file path
133
133
134 is_repo_file = is_repo_file or dummy_check
134 is_repo_file = is_repo_file or dummy_check
135 if not path:
135 if not path:
136 return request_path
136 return request_path
137
137
138 path = safe_unicode(path)
138 path = safe_str(path)
139 request_path = safe_unicode(request_path)
139 request_path = safe_str(request_path)
140
140
141 if path.startswith(('data:', 'javascript:', '#', ':')):
141 if path.startswith(('data:', 'javascript:', '#', ':')):
142 # skip data, anchor, invalid links
142 # skip data, anchor, invalid links
143 return path
143 return path
144
144
145 is_absolute = bool(urllib.parse.urlparse(path).netloc)
145 is_absolute = bool(urllib.parse.urlparse(path).netloc)
146 if is_absolute:
146 if is_absolute:
147 return path
147 return path
148
148
149 if not request_path:
149 if not request_path:
150 return path
150 return path
151
151
152 if path.startswith('/'):
152 if path.startswith('/'):
153 path = path[1:]
153 path = path[1:]
154
154
155 if path.startswith('./'):
155 if path.startswith('./'):
156 path = path[2:]
156 path = path[2:]
157
157
158 parts = request_path.split('/')
158 parts = request_path.split('/')
159 # compute how deep we need to traverse the request_path
159 # compute how deep we need to traverse the request_path
160 depth = 0
160 depth = 0
161
161
162 if is_repo_file(request_path):
162 if is_repo_file(request_path):
163 # if request path is a VALID file, we use a relative path with
163 # if request path is a VALID file, we use a relative path with
164 # one level up
164 # one level up
165 depth += 1
165 depth += 1
166
166
167 while path.startswith('../'):
167 while path.startswith('../'):
168 depth += 1
168 depth += 1
169 path = path[3:]
169 path = path[3:]
170
170
171 if depth > 0:
171 if depth > 0:
172 parts = parts[:-depth]
172 parts = parts[:-depth]
173
173
174 parts.append(path)
174 parts.append(path)
175 final_path = '/'.join(parts).lstrip('/')
175 final_path = '/'.join(parts).lstrip('/')
176
176
177 return '/' + final_path
177 return '/' + final_path
178
178
179
179
180 _cached_markdown_renderer = None
180 _cached_markdown_renderer = None
181
181
182
182
183 def get_markdown_renderer(extensions, output_format):
183 def get_markdown_renderer(extensions, output_format):
184 global _cached_markdown_renderer
184 global _cached_markdown_renderer
185
185
186 if _cached_markdown_renderer is None:
186 if _cached_markdown_renderer is None:
187 _cached_markdown_renderer = markdown.Markdown(
187 _cached_markdown_renderer = markdown.Markdown(
188 extensions=extensions,
188 extensions=extensions + ['legacy_attrs'],
189 enable_attributes=False, output_format=output_format)
189 output_format=output_format)
190 return _cached_markdown_renderer
190 return _cached_markdown_renderer
191
191
192
192
193 _cached_markdown_renderer_flavored = None
193 def get_markdown_renderer_flavored(extensions, output_format):
194
194 """
195 Dummy wrapper to mimic markdown API and render github HTML rendered
195
196
196 def get_markdown_renderer_flavored(extensions, output_format):
197 """
197 global _cached_markdown_renderer_flavored
198 md = get_markdown_renderer(extensions, output_format)
198
199
199 if _cached_markdown_renderer_flavored is None:
200 class GFM(object):
200 _cached_markdown_renderer_flavored = markdown.Markdown(
201 def convert(self, source):
201 extensions=extensions + [GithubFlavoredMarkdownExtension()],
202 with pycmarkgfm.parse_gfm(source) as document:
202 enable_attributes=False, output_format=output_format)
203 parsed_md = document.to_commonmark()
203 return _cached_markdown_renderer_flavored
204 return md.convert(parsed_md)
205
206 return GFM()
204
207
205
208
206 class MarkupRenderer(object):
209 class MarkupRenderer(object):
207 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
210 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
208
211
209 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
212 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
210 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
213 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
211 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
214 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
212 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
215 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
213
216
214 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
217 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
215 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
218 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
216
219
217 MENTION_PAT = re.compile(MENTIONS_REGEX)
220 MENTION_PAT = re.compile(MENTIONS_REGEX)
218
221
219 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
222 extensions = ['markdown.extensions.codehilite', 'markdown.extensions.extra',
220 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
223 'markdown.extensions.def_list', 'markdown.extensions.sane_lists']
221
224
222 output_format = 'html4'
225 output_format = 'html4'
223
226
224 # extension together with weights. Lower is first means we control how
227 # extension together with weights. Lower is first means we control how
225 # extensions are attached to readme names with those.
228 # extensions are attached to readme names with those.
226 PLAIN_EXTS = [
229 PLAIN_EXTS = [
227 # prefer no extension
230 # prefer no extension
228 ('', 0), # special case that renders READMES names without extension
231 ('', 0), # special case that renders READMES names without extension
229 ('.text', 2), ('.TEXT', 2),
232 ('.text', 2), ('.TEXT', 2),
230 ('.txt', 3), ('.TXT', 3)
233 ('.txt', 3), ('.TXT', 3)
231 ]
234 ]
232
235
233 RST_EXTS = [
236 RST_EXTS = [
234 ('.rst', 1), ('.rest', 1),
237 ('.rst', 1), ('.rest', 1),
235 ('.RST', 2), ('.REST', 2)
238 ('.RST', 2), ('.REST', 2)
236 ]
239 ]
237
240
238 MARKDOWN_EXTS = [
241 MARKDOWN_EXTS = [
239 ('.md', 1), ('.MD', 1),
242 ('.md', 1), ('.MD', 1),
240 ('.mkdn', 2), ('.MKDN', 2),
243 ('.mkdn', 2), ('.MKDN', 2),
241 ('.mdown', 3), ('.MDOWN', 3),
244 ('.mdown', 3), ('.MDOWN', 3),
242 ('.markdown', 4), ('.MARKDOWN', 4)
245 ('.markdown', 4), ('.MARKDOWN', 4)
243 ]
246 ]
244
247
245 def _detect_renderer(self, source, filename=None):
248 def _detect_renderer(self, source, filename=None):
246 """
249 """
247 runs detection of what renderer should be used for generating html
250 runs detection of what renderer should be used for generating html
248 from a markup language
251 from a markup language
249
252
250 filename can be also explicitly a renderer name
253 filename can be also explicitly a renderer name
251
254
252 :param source:
255 :param source:
253 :param filename:
256 :param filename:
254 """
257 """
255
258
256 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
259 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
257 detected_renderer = 'markdown'
260 detected_renderer = 'markdown'
258 elif MarkupRenderer.RST_PAT.findall(filename):
261 elif MarkupRenderer.RST_PAT.findall(filename):
259 detected_renderer = 'rst'
262 detected_renderer = 'rst'
260 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
263 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
261 detected_renderer = 'jupyter'
264 detected_renderer = 'jupyter'
262 elif MarkupRenderer.PLAIN_PAT.findall(filename):
265 elif MarkupRenderer.PLAIN_PAT.findall(filename):
263 detected_renderer = 'plain'
266 detected_renderer = 'plain'
264 else:
267 else:
265 detected_renderer = 'plain'
268 detected_renderer = 'plain'
266
269
267 return getattr(MarkupRenderer, detected_renderer)
270 return getattr(MarkupRenderer, detected_renderer)
268
271
269 @classmethod
272 @classmethod
270 def bleach_clean(cls, text):
273 def sanitize_html(cls, text):
274 # TODO: replace this with https://nh3.readthedocs.io/en/latest
275 # bleach is abandoned and deprecated :/
276
271 from .bleach_whitelist import markdown_attrs, markdown_tags
277 from .bleach_whitelist import markdown_attrs, markdown_tags
272 allowed_tags = markdown_tags
278 allowed_tags = markdown_tags
273 allowed_attrs = markdown_attrs
279 allowed_attrs = markdown_attrs
274
280
275 try:
281 try:
276 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
282 return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
277 except Exception:
283 except Exception:
278 return 'UNPARSEABLE TEXT'
284 return 'TEXT CANNOT BE PARSED USING SANITIZE'
279
285
280 @classmethod
286 @classmethod
281 def renderer_from_filename(cls, filename, exclude):
287 def renderer_from_filename(cls, filename, exclude):
282 """
288 """
283 Detect renderer markdown/rst from filename and optionally use exclude
289 Detect renderer markdown/rst from filename and optionally use exclude
284 list to remove some options. This is mostly used in helpers.
290 list to remove some options. This is mostly used in helpers.
285 Returns None when no renderer can be detected.
291 Returns None when no renderer can be detected.
286 """
292 """
287 def _filter(elements):
293 def _filter(elements):
288 if isinstance(exclude, (list, tuple)):
294 if isinstance(exclude, (list, tuple)):
289 return [x for x in elements if x not in exclude]
295 return [x for x in elements if x not in exclude]
290 return elements
296 return elements
291
297
292 if filename.endswith(
298 if filename.endswith(
293 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
299 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
294 return 'markdown'
300 return 'markdown'
295 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
301 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
296 return 'rst'
302 return 'rst'
297
303
298 return None
304 return None
299
305
300 def render(self, source, filename=None):
306 def render(self, source, filename=None):
301 """
307 """
302 Renders a given filename using detected renderer
308 Renders a given filename using detected renderer
303 it detects renderers based on file extension or mimetype.
309 it detects renderers based on file extension or mimetype.
304 At last it will just do a simple html replacing new lines with <br/>
310 At last it will just do a simple html replacing new lines with <br/>
305
306 :param file_name:
307 :param source:
308 """
311 """
309
312
310 renderer = self._detect_renderer(source, filename)
313 renderer = self._detect_renderer(source, filename)
311 readme_data = renderer(source)
314 readme_data = renderer(source)
312 return readme_data
315 return readme_data
313
316
314 @classmethod
317 @classmethod
315 def _flavored_markdown(cls, text):
316 """
317 Github style flavored markdown
318
319 :param text:
320 """
321
322 # Extract pre blocks.
323 extractions = {}
324
325 def pre_extraction_callback(matchobj):
326 digest = md5_safe(matchobj.group(0))
327 extractions[digest] = matchobj.group(0)
328 return "{gfm-extraction-%s}" % digest
329 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
330 text = re.sub(pattern, pre_extraction_callback, text)
331
332 # Prevent foo_bar_baz from ending up with an italic word in the middle.
333 def italic_callback(matchobj):
334 s = matchobj.group(0)
335 if list(s).count('_') >= 2:
336 return s.replace('_', r'\_')
337 return s
338 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
339
340 # Insert pre block extractions.
341 def pre_insert_callback(matchobj):
342 return '\n\n' + extractions[matchobj.group(1)]
343 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
344 pre_insert_callback, text)
345
346 return text
347
348 @classmethod
349 def urlify_text(cls, text):
318 def urlify_text(cls, text):
350 def url_func(match_obj):
319 def url_func(match_obj):
351 url_full = match_obj.groups()[0]
320 url_full = match_obj.groups()[0]
352 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
321 return f'<a href="{url_full}">{url_full}</a>'
353
322
354 return cls.URL_PAT.sub(url_func, text)
323 return cls.URL_PAT.sub(url_func, text)
355
324
356 @classmethod
325 @classmethod
357 def convert_mentions(cls, text, mode):
326 def convert_mentions(cls, text, mode):
358 mention_pat = cls.MENTION_PAT
327 mention_pat = cls.MENTION_PAT
359
328
360 def wrapp(match_obj):
329 def wrapp(match_obj):
361 uname = match_obj.groups()[0]
330 uname = match_obj.groups()[0]
362 hovercard_url = "pyroutes.url('hovercard_username', {'username': '%s'});" % uname
331 hovercard_url = "pyroutes.url('hovercard_username', {'username': '%s'});" % uname
363
332
364 if mode == 'markdown':
333 if mode == 'markdown':
365 tmpl = '<strong class="tooltip-hovercard" data-hovercard-alt="{uname}" data-hovercard-url="{hovercard_url}">@{uname}</strong>'
334 tmpl = '<strong class="tooltip-hovercard" data-hovercard-alt="{uname}" data-hovercard-url="{hovercard_url}">@{uname}</strong>'
366 elif mode == 'rst':
335 elif mode == 'rst':
367 tmpl = ' **@{uname}** '
336 tmpl = ' **@{uname}** '
368 else:
337 else:
369 raise ValueError('mode must be rst or markdown')
338 raise ValueError('mode must be rst or markdown')
370
339
371 return tmpl.format(**{'uname': uname,
340 return tmpl.format(**{'uname': uname,
372 'hovercard_url': hovercard_url})
341 'hovercard_url': hovercard_url})
373
342
374 return mention_pat.sub(wrapp, text).strip()
343 return mention_pat.sub(wrapp, text).strip()
375
344
376 @classmethod
345 @classmethod
377 def plain(cls, source, universal_newline=True, leading_newline=True):
346 def plain(cls, source, universal_newline=True, leading_newline=True):
378 source = safe_unicode(source)
347 source = safe_str(source)
379 if universal_newline:
348 if universal_newline:
380 newline = '\n'
349 newline = '\n'
381 source = newline.join(source.splitlines())
350 source = newline.join(source.splitlines())
382
351
383 rendered_source = cls.urlify_text(source)
352 rendered_source = cls.urlify_text(source)
384 source = ''
353 source = ''
385 if leading_newline:
354 if leading_newline:
386 source += '<br />'
355 source += '<br />'
387 source += rendered_source.replace("\n", '<br />')
356 source += rendered_source.replace("\n", '<br />')
388
357
389 rendered = cls.bleach_clean(source)
358 rendered = cls.sanitize_html(source)
390 return rendered
359 return rendered
391
360
392 @classmethod
361 @classmethod
393 def markdown(cls, source, safe=True, flavored=True, mentions=False,
362 def markdown(cls, source, safe=True, flavored=True, mentions=False,
394 clean_html=True):
363 clean_html=True):
395 """
364 """
396 returns markdown rendered code cleaned by the bleach library
365 returns markdown rendered code cleaned by the bleach library
397 """
366 """
398
367
399 if flavored:
368 if flavored:
400 markdown_renderer = get_markdown_renderer_flavored(
369 markdown_renderer = get_markdown_renderer_flavored(
401 cls.extensions, cls.output_format)
370 cls.extensions, cls.output_format)
402 else:
371 else:
403 markdown_renderer = get_markdown_renderer(
372 markdown_renderer = get_markdown_renderer(
404 cls.extensions, cls.output_format)
373 cls.extensions, cls.output_format)
405
374
406 if mentions:
375 if mentions:
407 mention_hl = cls.convert_mentions(source, mode='markdown')
376 mention_hl = cls.convert_mentions(source, mode='markdown')
408 # we extracted mentions render with this using Mentions false
377 # we extracted mentions render with this using Mentions false
409 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
378 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
410 mentions=False)
379 mentions=False)
411
380
412 source = safe_unicode(source)
381 try:
382 rendered = markdown_renderer.convert(source)
413
383
414 try:
415 if flavored:
416 source = cls._flavored_markdown(source)
417 rendered = markdown_renderer.convert(source)
418 except Exception:
384 except Exception:
419 log.exception('Error when rendering Markdown')
385 log.exception('Error when rendering Markdown')
420 if safe:
386 if safe:
421 log.debug('Fallback to render in plain mode')
387 log.debug('Fallback to render in plain mode')
422 rendered = cls.plain(source)
388 rendered = cls.plain(source)
423 else:
389 else:
424 raise
390 raise
425
391
426 if clean_html:
392 if clean_html:
427 rendered = cls.bleach_clean(rendered)
393 rendered = cls.sanitize_html(rendered)
428 return rendered
394 return rendered
429
395
430 @classmethod
396 @classmethod
431 def rst(cls, source, safe=True, mentions=False, clean_html=False):
397 def rst(cls, source, safe=True, mentions=False, clean_html=False):
398
432 if mentions:
399 if mentions:
433 mention_hl = cls.convert_mentions(source, mode='rst')
400 mention_hl = cls.convert_mentions(source, mode='rst')
434 # we extracted mentions render with this using Mentions false
401 # we extracted mentions render with this using Mentions false
435 return cls.rst(mention_hl, safe=safe, mentions=False)
402 return cls.rst(mention_hl, safe=safe, mentions=False)
436
403
437 source = safe_unicode(source)
404 source = safe_str(source)
438 try:
405 try:
439 docutils_settings = dict(
406 docutils_settings = dict(
440 [(alias, None) for alias in
407 [(alias, None) for alias in
441 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
408 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
442
409
443 docutils_settings.update({
410 docutils_settings.update({
444 'input_encoding': 'unicode',
411 'input_encoding': 'unicode',
445 'report_level': 4,
412 'report_level': 4,
446 'syntax_highlight': 'short',
413 'syntax_highlight': 'short',
447 })
414 })
448
415
449 for k, v in docutils_settings.items():
416 for k, v in list(docutils_settings.items()):
450 directives.register_directive(k, v)
417 directives.register_directive(k, v)
451
418
452 parts = publish_parts(source=source,
419 parts = publish_parts(source=source,
453 writer=RhodeCodeWriter(),
420 writer=RhodeCodeWriter(),
454 settings_overrides=docutils_settings)
421 settings_overrides=docutils_settings)
455 rendered = parts["fragment"]
422 rendered = parts["fragment"]
456 if clean_html:
423 if clean_html:
457 rendered = cls.bleach_clean(rendered)
424 rendered = cls.sanitize_html(rendered)
458 return parts['html_title'] + rendered
425 return parts['html_title'] + rendered
459 except Exception:
426 except Exception:
460 log.exception('Error when rendering RST')
427 log.exception('Error when rendering RST')
461 if safe:
428 if safe:
462 log.debug('Fallback to render in plain mode')
429 log.debug('Fallback to render in plain mode')
463 return cls.plain(source)
430 return cls.plain(source)
464 else:
431 else:
465 raise
432 raise
466
433
467 @classmethod
434 @classmethod
468 def jupyter(cls, source, safe=True):
435 def jupyter(cls, source, safe=True):
469 from rhodecode.lib import helpers
436 from rhodecode.lib import helpers
470
437
471 from traitlets.config import Config
438 from traitlets.config import Config
472 import nbformat
439 import nbformat
473 from nbconvert import HTMLExporter
440 from nbconvert import HTMLExporter
474 from nbconvert.preprocessors import Preprocessor
441 from nbconvert.preprocessors import Preprocessor
475
442
476 class CustomHTMLExporter(HTMLExporter):
443 class CustomHTMLExporter(HTMLExporter):
477 def _template_file_default(self):
444 def _template_file_default(self):
478 return 'basic'
445 return 'basic'
479
446
480 class Sandbox(Preprocessor):
447 class Sandbox(Preprocessor):
481
448
482 def preprocess(self, nb, resources):
449 def preprocess(self, nb, resources):
483 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
450 sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
484 for cell in nb['cells']:
451 for cell in nb['cells']:
485 if not safe:
452 if not safe:
486 continue
453 continue
487
454
488 if 'outputs' in cell:
455 if 'outputs' in cell:
489 for cell_output in cell['outputs']:
456 for cell_output in cell['outputs']:
490 if 'data' in cell_output:
457 if 'data' in cell_output:
491 if 'application/javascript' in cell_output['data']:
458 if 'application/javascript' in cell_output['data']:
492 cell_output['data']['text/plain'] = sandbox_text
459 cell_output['data']['text/plain'] = sandbox_text
493 cell_output['data'].pop('application/javascript', None)
460 cell_output['data'].pop('application/javascript', None)
494
461
495 if 'source' in cell and cell['cell_type'] == 'markdown':
462 if 'source' in cell and cell['cell_type'] == 'markdown':
496 # sanitize similar like in markdown
463 # sanitize similar like in markdown
497 cell['source'] = cls.bleach_clean(cell['source'])
464 cell['source'] = cls.sanitize_html(cell['source'])
498
465
499 return nb, resources
466 return nb, resources
500
467
501 def _sanitize_resources(input_resources):
468 def _sanitize_resources(input_resources):
502 """
469 """
503 Skip/sanitize some of the CSS generated and included in jupyter
470 Skip/sanitize some of the CSS generated and included in jupyter
504 so it doesn't messes up UI so much
471 so it doesn't messes up UI so much
505 """
472 """
506
473
507 # TODO(marcink): probably we should replace this with whole custom
474 # TODO(marcink): probably we should replace this with whole custom
508 # CSS set that doesn't screw up, but jupyter generated html has some
475 # CSS set that doesn't screw up, but jupyter generated html has some
509 # special markers, so it requires Custom HTML exporter template with
476 # special markers, so it requires Custom HTML exporter template with
510 # _default_template_path_default, to achieve that
477 # _default_template_path_default, to achieve that
511
478
512 # strip the reset CSS
479 # strip the reset CSS
513 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
480 input_resources[0] = input_resources[0][input_resources[0].find('/*! Source'):]
514 return input_resources
481 return input_resources
515
482
516 def as_html(notebook):
483 def as_html(notebook):
517 conf = Config()
484 conf = Config()
518 conf.CustomHTMLExporter.preprocessors = [Sandbox]
485 conf.CustomHTMLExporter.preprocessors = [Sandbox]
519 html_exporter = CustomHTMLExporter(config=conf)
486 html_exporter = CustomHTMLExporter(config=conf)
520
487
521 (body, resources) = html_exporter.from_notebook_node(notebook)
488 (body, resources) = html_exporter.from_notebook_node(notebook)
522 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
489 header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
523 js = MakoTemplate(r'''
490 js = MakoTemplate(r'''
524 <!-- MathJax configuration -->
491 <!-- MathJax configuration -->
525 <script type="text/x-mathjax-config">
492 <script type="text/x-mathjax-config">
526 MathJax.Hub.Config({
493 MathJax.Hub.Config({
527 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
494 jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
528 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
495 extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
529 TeX: {
496 TeX: {
530 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
497 extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
531 },
498 },
532 tex2jax: {
499 tex2jax: {
533 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
500 inlineMath: [ ['$','$'], ["\\(","\\)"] ],
534 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
501 displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
535 processEscapes: true,
502 processEscapes: true,
536 processEnvironments: true
503 processEnvironments: true
537 },
504 },
538 // Center justify equations in code and markdown cells. Elsewhere
505 // Center justify equations in code and markdown cells. Elsewhere
539 // we use CSS to left justify single line equations in code cells.
506 // we use CSS to left justify single line equations in code cells.
540 displayAlign: 'center',
507 displayAlign: 'center',
541 "HTML-CSS": {
508 "HTML-CSS": {
542 styles: {'.MathJax_Display': {"margin": 0}},
509 styles: {'.MathJax_Display': {"margin": 0}},
543 linebreaks: { automatic: true },
510 linebreaks: { automatic: true },
544 availableFonts: ["STIX", "TeX"]
511 availableFonts: ["STIX", "TeX"]
545 },
512 },
546 showMathMenu: false
513 showMathMenu: false
547 });
514 });
548 </script>
515 </script>
549 <!-- End of MathJax configuration -->
516 <!-- End of MathJax configuration -->
550 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
517 <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
551 ''').render(h=helpers)
518 ''').render(h=helpers)
552
519
553 css = MakoTemplate(r'''
520 css = MakoTemplate(r'''
554 <link rel="stylesheet" type="text/css" href="${h.asset('css/style-ipython.css', ver=ver)}" media="screen"/>
521 <link rel="stylesheet" type="text/css" href="${h.asset('css/style-ipython.css', ver=ver)}" media="screen"/>
555 ''').render(h=helpers, ver='ver1')
522 ''').render(h=helpers, ver='ver1')
556
523
557 body = '\n'.join([header, css, js, body])
524 body = '\n'.join([header, css, js, body])
558 return body, resources
525 return body, resources
559
526
560 notebook = nbformat.reads(source, as_version=4)
527 notebook = nbformat.reads(source, as_version=4)
561 (body, resources) = as_html(notebook)
528 (body, resources) = as_html(notebook)
562 return body
529 return body
563
530
564
531
565 class RstTemplateRenderer(object):
532 class RstTemplateRenderer(object):
566
533
567 def __init__(self):
534 def __init__(self):
568 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
535 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
569 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
536 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
570 self.template_store = TemplateLookup(
537 self.template_store = TemplateLookup(
571 directories=rst_template_dirs,
538 directories=rst_template_dirs,
572 input_encoding='utf-8',
539 input_encoding='utf-8',
573 imports=['from rhodecode.lib import helpers as h'])
540 imports=['from rhodecode.lib import helpers as h'])
574
541
575 def _get_template(self, templatename):
542 def _get_template(self, templatename):
576 return self.template_store.get_template(templatename)
543 return self.template_store.get_template(templatename)
577
544
578 def render(self, template_name, **kwargs):
545 def render(self, template_name, **kwargs):
579 template = self._get_template(template_name)
546 template = self._get_template(template_name)
580 return template.render(**kwargs)
547 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now