##// END OF EJS Templates
markup-renderer: use global Markdown object to speed up markdown rendering.
marcink -
r1353:7b18aa9b default
parent child Browse files
Show More
@@ -1,273 +1,282 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2
2
3 # Copyright (C) 2011-2017 RhodeCode GmbH
3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 #
4 #
5 # This program is free software: you can redistribute it and/or modify
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License, version 3
6 # it under the terms of the GNU Affero General Public License, version 3
7 # (only), as published by the Free Software Foundation.
7 # (only), as published by the Free Software Foundation.
8 #
8 #
9 # This program is distributed in the hope that it will be useful,
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
12 # GNU General Public License for more details.
13 #
13 #
14 # You should have received a copy of the GNU Affero General Public License
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #
16 #
17 # This program is dual-licensed. If you wish to learn more about the
17 # This program is dual-licensed. If you wish to learn more about the
18 # RhodeCode Enterprise Edition, including its added features, Support services,
18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20
20
21
21
22 """
22 """
23 Renderer for markup languages with ability to parse using rst or markdown
23 Renderer for markup languages with ability to parse using rst or markdown
24 """
24 """
25
25
26 import re
26 import re
27 import os
27 import os
28 import logging
28 import logging
29 import itertools
29 import itertools
30
30
31 from mako.lookup import TemplateLookup
31 from mako.lookup import TemplateLookup
32
32
33 from docutils.core import publish_parts
33 from docutils.core import publish_parts
34 from docutils.parsers.rst import directives
34 from docutils.parsers.rst import directives
35 import markdown
35 import markdown
36
36
37 from rhodecode.lib.markdown_ext import (
37 from rhodecode.lib.markdown_ext import (
38 UrlizeExtension, GithubFlavoredMarkdownExtension)
38 UrlizeExtension, GithubFlavoredMarkdownExtension)
39 from rhodecode.lib.utils2 import safe_unicode, md5_safe, MENTIONS_REGEX
39 from rhodecode.lib.utils2 import safe_unicode, md5_safe, MENTIONS_REGEX
40
40
41 log = logging.getLogger(__name__)
41 log = logging.getLogger(__name__)
42
42
43 # default renderer used to generate automated comments
43 # default renderer used to generate automated comments
44 DEFAULT_COMMENTS_RENDERER = 'rst'
44 DEFAULT_COMMENTS_RENDERER = 'rst'
45
45
46
46
47 class MarkupRenderer(object):
47 class MarkupRenderer(object):
48 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
48 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
49
49
50 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
50 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
51 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
51 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
52 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
52 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
53
53
54 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
55 markdown_renderer = markdown.Markdown(
56 extensions, safe_mode=True, enable_attributes=False)
57
58 markdown_renderer_flavored = markdown.Markdown(
59 extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
60 enable_attributes=False)
61
54 # extension together with weights. Lower is first means we control how
62 # extension together with weights. Lower is first means we control how
55 # extensions are attached to readme names with those.
63 # extensions are attached to readme names with those.
56 PLAIN_EXTS = [
64 PLAIN_EXTS = [
57 # prefer no extension
65 # prefer no extension
58 ('', 0), # special case that renders READMES names without extension
66 ('', 0), # special case that renders READMES names without extension
59 ('.text', 2), ('.TEXT', 2),
67 ('.text', 2), ('.TEXT', 2),
60 ('.txt', 3), ('.TXT', 3)
68 ('.txt', 3), ('.TXT', 3)
61 ]
69 ]
62
70
63 RST_EXTS = [
71 RST_EXTS = [
64 ('.rst', 1), ('.rest', 1),
72 ('.rst', 1), ('.rest', 1),
65 ('.RST', 2), ('.REST', 2)
73 ('.RST', 2), ('.REST', 2)
66 ]
74 ]
67
75
68 MARKDOWN_EXTS = [
76 MARKDOWN_EXTS = [
69 ('.md', 1), ('.MD', 1),
77 ('.md', 1), ('.MD', 1),
70 ('.mkdn', 2), ('.MKDN', 2),
78 ('.mkdn', 2), ('.MKDN', 2),
71 ('.mdown', 3), ('.MDOWN', 3),
79 ('.mdown', 3), ('.MDOWN', 3),
72 ('.markdown', 4), ('.MARKDOWN', 4)
80 ('.markdown', 4), ('.MARKDOWN', 4)
73 ]
81 ]
74
82
75 def _detect_renderer(self, source, filename=None):
83 def _detect_renderer(self, source, filename=None):
76 """
84 """
77 runs detection of what renderer should be used for generating html
85 runs detection of what renderer should be used for generating html
78 from a markup language
86 from a markup language
79
87
80 filename can be also explicitly a renderer name
88 filename can be also explicitly a renderer name
81
89
82 :param source:
90 :param source:
83 :param filename:
91 :param filename:
84 """
92 """
85
93
86 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
94 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
87 detected_renderer = 'markdown'
95 detected_renderer = 'markdown'
88 elif MarkupRenderer.RST_PAT.findall(filename):
96 elif MarkupRenderer.RST_PAT.findall(filename):
89 detected_renderer = 'rst'
97 detected_renderer = 'rst'
90 elif MarkupRenderer.PLAIN_PAT.findall(filename):
98 elif MarkupRenderer.PLAIN_PAT.findall(filename):
91 detected_renderer = 'plain'
99 detected_renderer = 'plain'
92 else:
100 else:
93 detected_renderer = 'plain'
101 detected_renderer = 'plain'
94
102
95 return getattr(MarkupRenderer, detected_renderer)
103 return getattr(MarkupRenderer, detected_renderer)
96
104
97 @classmethod
105 @classmethod
98 def renderer_from_filename(cls, filename, exclude):
106 def renderer_from_filename(cls, filename, exclude):
99 """
107 """
100 Detect renderer markdown/rst from filename and optionally use exclude
108 Detect renderer markdown/rst from filename and optionally use exclude
101 list to remove some options. This is mostly used in helpers.
109 list to remove some options. This is mostly used in helpers.
102 Returns None when no renderer can be detected.
110 Returns None when no renderer can be detected.
103 """
111 """
104 def _filter(elements):
112 def _filter(elements):
105 if isinstance(exclude, (list, tuple)):
113 if isinstance(exclude, (list, tuple)):
106 return [x for x in elements if x not in exclude]
114 return [x for x in elements if x not in exclude]
107 return elements
115 return elements
108
116
109 if filename.endswith(
117 if filename.endswith(
110 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
118 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
111 return 'markdown'
119 return 'markdown'
112 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
120 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
113 return 'rst'
121 return 'rst'
114
122
115 return None
123 return None
116
124
117 def render(self, source, filename=None):
125 def render(self, source, filename=None):
118 """
126 """
119 Renders a given filename using detected renderer
127 Renders a given filename using detected renderer
120 it detects renderers based on file extension or mimetype.
128 it detects renderers based on file extension or mimetype.
121 At last it will just do a simple html replacing new lines with <br/>
129 At last it will just do a simple html replacing new lines with <br/>
122
130
123 :param file_name:
131 :param file_name:
124 :param source:
132 :param source:
125 """
133 """
126
134
127 renderer = self._detect_renderer(source, filename)
135 renderer = self._detect_renderer(source, filename)
128 readme_data = renderer(source)
136 readme_data = renderer(source)
129 return readme_data
137 return readme_data
130
138
131 @classmethod
139 @classmethod
132 def _flavored_markdown(cls, text):
140 def _flavored_markdown(cls, text):
133 """
141 """
134 Github style flavored markdown
142 Github style flavored markdown
135
143
136 :param text:
144 :param text:
137 """
145 """
138
146
139 # Extract pre blocks.
147 # Extract pre blocks.
140 extractions = {}
148 extractions = {}
141
149
142 def pre_extraction_callback(matchobj):
150 def pre_extraction_callback(matchobj):
143 digest = md5_safe(matchobj.group(0))
151 digest = md5_safe(matchobj.group(0))
144 extractions[digest] = matchobj.group(0)
152 extractions[digest] = matchobj.group(0)
145 return "{gfm-extraction-%s}" % digest
153 return "{gfm-extraction-%s}" % digest
146 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
154 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
147 text = re.sub(pattern, pre_extraction_callback, text)
155 text = re.sub(pattern, pre_extraction_callback, text)
148
156
149 # Prevent foo_bar_baz from ending up with an italic word in the middle.
157 # Prevent foo_bar_baz from ending up with an italic word in the middle.
150 def italic_callback(matchobj):
158 def italic_callback(matchobj):
151 s = matchobj.group(0)
159 s = matchobj.group(0)
152 if list(s).count('_') >= 2:
160 if list(s).count('_') >= 2:
153 return s.replace('_', r'\_')
161 return s.replace('_', r'\_')
154 return s
162 return s
155 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
163 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
156
164
157 # Insert pre block extractions.
165 # Insert pre block extractions.
158 def pre_insert_callback(matchobj):
166 def pre_insert_callback(matchobj):
159 return '\n\n' + extractions[matchobj.group(1)]
167 return '\n\n' + extractions[matchobj.group(1)]
160 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
168 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
161 pre_insert_callback, text)
169 pre_insert_callback, text)
162
170
163 return text
171 return text
164
172
165 @classmethod
173 @classmethod
166 def urlify_text(cls, text):
174 def urlify_text(cls, text):
167 url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
175 url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
168 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
176 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
169
177
170 def url_func(match_obj):
178 def url_func(match_obj):
171 url_full = match_obj.groups()[0]
179 url_full = match_obj.groups()[0]
172 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
180 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
173
181
174 return url_pat.sub(url_func, text)
182 return url_pat.sub(url_func, text)
175
183
176 @classmethod
184 @classmethod
177 def plain(cls, source, universal_newline=True):
185 def plain(cls, source, universal_newline=True):
178 source = safe_unicode(source)
186 source = safe_unicode(source)
179 if universal_newline:
187 if universal_newline:
180 newline = '\n'
188 newline = '\n'
181 source = newline.join(source.splitlines())
189 source = newline.join(source.splitlines())
182
190
183 source = cls.urlify_text(source)
191 source = cls.urlify_text(source)
184 return '<br />' + source.replace("\n", '<br />')
192 return '<br />' + source.replace("\n", '<br />')
185
193
186 @classmethod
194 @classmethod
187 def markdown(cls, source, safe=True, flavored=True, mentions=False):
195 def markdown(cls, source, safe=True, flavored=True, mentions=False):
188 # It does not allow to insert inline HTML. In presence of HTML tags, it
196 # It does not allow to insert inline HTML. In presence of HTML tags, it
189 # will replace them instead with [HTML_REMOVED]. This is controlled by
197 # will replace them instead with [HTML_REMOVED]. This is controlled by
190 # the safe_mode=True parameter of the markdown method.
198 # the safe_mode=True parameter of the markdown method.
191 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
199
192 if flavored:
200 if flavored:
193 extensions.append(GithubFlavoredMarkdownExtension())
201 markdown_renderer = cls.markdown_renderer_flavored
202 else:
203 markdown_renderer = cls.markdown_renderer
194
204
195 if mentions:
205 if mentions:
196 mention_pat = re.compile(MENTIONS_REGEX)
206 mention_pat = re.compile(MENTIONS_REGEX)
197
207
198 def wrapp(match_obj):
208 def wrapp(match_obj):
199 uname = match_obj.groups()[0]
209 uname = match_obj.groups()[0]
200 return ' **@%(uname)s** ' % {'uname': uname}
210 return ' **@%(uname)s** ' % {'uname': uname}
201 mention_hl = mention_pat.sub(wrapp, source).strip()
211 mention_hl = mention_pat.sub(wrapp, source).strip()
202 # we extracted mentions render with this using Mentions false
212 # we extracted mentions render with this using Mentions false
203 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
213 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
204 mentions=False)
214 mentions=False)
205
215
206 source = safe_unicode(source)
216 source = safe_unicode(source)
207 try:
217 try:
208 if flavored:
218 if flavored:
209 source = cls._flavored_markdown(source)
219 source = cls._flavored_markdown(source)
210 return markdown.markdown(
220 return markdown_renderer.convert(source)
211 source, extensions, safe_mode=True, enable_attributes=False)
212 except Exception:
221 except Exception:
213 log.exception('Error when rendering Markdown')
222 log.exception('Error when rendering Markdown')
214 if safe:
223 if safe:
215 log.debug('Fallback to render in plain mode')
224 log.debug('Fallback to render in plain mode')
216 return cls.plain(source)
225 return cls.plain(source)
217 else:
226 else:
218 raise
227 raise
219
228
220 @classmethod
229 @classmethod
221 def rst(cls, source, safe=True, mentions=False):
230 def rst(cls, source, safe=True, mentions=False):
222 if mentions:
231 if mentions:
223 mention_pat = re.compile(MENTIONS_REGEX)
232 mention_pat = re.compile(MENTIONS_REGEX)
224
233
225 def wrapp(match_obj):
234 def wrapp(match_obj):
226 uname = match_obj.groups()[0]
235 uname = match_obj.groups()[0]
227 return ' **@%(uname)s** ' % {'uname': uname}
236 return ' **@%(uname)s** ' % {'uname': uname}
228 mention_hl = mention_pat.sub(wrapp, source).strip()
237 mention_hl = mention_pat.sub(wrapp, source).strip()
229 # we extracted mentions render with this using Mentions false
238 # we extracted mentions render with this using Mentions false
230 return cls.rst(mention_hl, safe=safe, mentions=False)
239 return cls.rst(mention_hl, safe=safe, mentions=False)
231
240
232 source = safe_unicode(source)
241 source = safe_unicode(source)
233 try:
242 try:
234 docutils_settings = dict(
243 docutils_settings = dict(
235 [(alias, None) for alias in
244 [(alias, None) for alias in
236 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
245 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
237
246
238 docutils_settings.update({'input_encoding': 'unicode',
247 docutils_settings.update({'input_encoding': 'unicode',
239 'report_level': 4})
248 'report_level': 4})
240
249
241 for k, v in docutils_settings.iteritems():
250 for k, v in docutils_settings.iteritems():
242 directives.register_directive(k, v)
251 directives.register_directive(k, v)
243
252
244 parts = publish_parts(source=source,
253 parts = publish_parts(source=source,
245 writer_name="html4css1",
254 writer_name="html4css1",
246 settings_overrides=docutils_settings)
255 settings_overrides=docutils_settings)
247
256
248 return parts['html_title'] + parts["fragment"]
257 return parts['html_title'] + parts["fragment"]
249 except Exception:
258 except Exception:
250 log.exception('Error when rendering RST')
259 log.exception('Error when rendering RST')
251 if safe:
260 if safe:
252 log.debug('Fallbacking to render in plain mode')
261 log.debug('Fallbacking to render in plain mode')
253 return cls.plain(source)
262 return cls.plain(source)
254 else:
263 else:
255 raise
264 raise
256
265
257
266
258 class RstTemplateRenderer(object):
267 class RstTemplateRenderer(object):
259
268
260 def __init__(self):
269 def __init__(self):
261 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
270 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
262 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
271 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
263 self.template_store = TemplateLookup(
272 self.template_store = TemplateLookup(
264 directories=rst_template_dirs,
273 directories=rst_template_dirs,
265 input_encoding='utf-8',
274 input_encoding='utf-8',
266 imports=['from rhodecode.lib import helpers as h'])
275 imports=['from rhodecode.lib import helpers as h'])
267
276
268 def _get_template(self, templatename):
277 def _get_template(self, templatename):
269 return self.template_store.get_template(templatename)
278 return self.template_store.get_template(templatename)
270
279
271 def render(self, template_name, **kwargs):
280 def render(self, template_name, **kwargs):
272 template = self._get_template(template_name)
281 template = self._get_template(template_name)
273 return template.render(**kwargs)
282 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now