##// END OF EJS Templates
markup-renderer: use global Markdown object to speed up markdown rendering.
marcink -
r1353:7b18aa9b default
parent child Browse files
Show More
@@ -1,273 +1,282 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Renderer for markup languages with ability to parse using rst or markdown
24 24 """
25 25
26 26 import re
27 27 import os
28 28 import logging
29 29 import itertools
30 30
31 31 from mako.lookup import TemplateLookup
32 32
33 33 from docutils.core import publish_parts
34 34 from docutils.parsers.rst import directives
35 35 import markdown
36 36
37 37 from rhodecode.lib.markdown_ext import (
38 38 UrlizeExtension, GithubFlavoredMarkdownExtension)
39 39 from rhodecode.lib.utils2 import safe_unicode, md5_safe, MENTIONS_REGEX
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43 # default renderer used to generate automated comments
44 44 DEFAULT_COMMENTS_RENDERER = 'rst'
45 45
46 46
47 47 class MarkupRenderer(object):
48 48 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
49 49
50 50 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
51 51 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
52 52 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
53 53
54 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
55 markdown_renderer = markdown.Markdown(
56 extensions, safe_mode=True, enable_attributes=False)
57
58 markdown_renderer_flavored = markdown.Markdown(
59 extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
60 enable_attributes=False)
61
54 62 # extension together with weights. Lower is first means we control how
55 63 # extensions are attached to readme names with those.
56 64 PLAIN_EXTS = [
57 65 # prefer no extension
58 66 ('', 0), # special case that renders READMES names without extension
59 67 ('.text', 2), ('.TEXT', 2),
60 68 ('.txt', 3), ('.TXT', 3)
61 69 ]
62 70
63 71 RST_EXTS = [
64 72 ('.rst', 1), ('.rest', 1),
65 73 ('.RST', 2), ('.REST', 2)
66 74 ]
67 75
68 76 MARKDOWN_EXTS = [
69 77 ('.md', 1), ('.MD', 1),
70 78 ('.mkdn', 2), ('.MKDN', 2),
71 79 ('.mdown', 3), ('.MDOWN', 3),
72 80 ('.markdown', 4), ('.MARKDOWN', 4)
73 81 ]
74 82
75 83 def _detect_renderer(self, source, filename=None):
76 84 """
77 85 runs detection of what renderer should be used for generating html
78 86 from a markup language
79 87
80 88 filename can be also explicitly a renderer name
81 89
82 90 :param source:
83 91 :param filename:
84 92 """
85 93
86 94 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
87 95 detected_renderer = 'markdown'
88 96 elif MarkupRenderer.RST_PAT.findall(filename):
89 97 detected_renderer = 'rst'
90 98 elif MarkupRenderer.PLAIN_PAT.findall(filename):
91 99 detected_renderer = 'plain'
92 100 else:
93 101 detected_renderer = 'plain'
94 102
95 103 return getattr(MarkupRenderer, detected_renderer)
96 104
97 105 @classmethod
98 106 def renderer_from_filename(cls, filename, exclude):
99 107 """
100 108 Detect renderer markdown/rst from filename and optionally use exclude
101 109 list to remove some options. This is mostly used in helpers.
102 110 Returns None when no renderer can be detected.
103 111 """
104 112 def _filter(elements):
105 113 if isinstance(exclude, (list, tuple)):
106 114 return [x for x in elements if x not in exclude]
107 115 return elements
108 116
109 117 if filename.endswith(
110 118 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
111 119 return 'markdown'
112 120 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
113 121 return 'rst'
114 122
115 123 return None
116 124
117 125 def render(self, source, filename=None):
118 126 """
119 127 Renders a given filename using detected renderer
120 128 it detects renderers based on file extension or mimetype.
121 129 At last it will just do a simple html replacing new lines with <br/>
122 130
123 131 :param file_name:
124 132 :param source:
125 133 """
126 134
127 135 renderer = self._detect_renderer(source, filename)
128 136 readme_data = renderer(source)
129 137 return readme_data
130 138
131 139 @classmethod
132 140 def _flavored_markdown(cls, text):
133 141 """
134 142 Github style flavored markdown
135 143
136 144 :param text:
137 145 """
138 146
139 147 # Extract pre blocks.
140 148 extractions = {}
141 149
142 150 def pre_extraction_callback(matchobj):
143 151 digest = md5_safe(matchobj.group(0))
144 152 extractions[digest] = matchobj.group(0)
145 153 return "{gfm-extraction-%s}" % digest
146 154 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
147 155 text = re.sub(pattern, pre_extraction_callback, text)
148 156
149 157 # Prevent foo_bar_baz from ending up with an italic word in the middle.
150 158 def italic_callback(matchobj):
151 159 s = matchobj.group(0)
152 160 if list(s).count('_') >= 2:
153 161 return s.replace('_', r'\_')
154 162 return s
155 163 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
156 164
157 165 # Insert pre block extractions.
158 166 def pre_insert_callback(matchobj):
159 167 return '\n\n' + extractions[matchobj.group(1)]
160 168 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
161 169 pre_insert_callback, text)
162 170
163 171 return text
164 172
165 173 @classmethod
166 174 def urlify_text(cls, text):
167 175 url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
168 176 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
169 177
170 178 def url_func(match_obj):
171 179 url_full = match_obj.groups()[0]
172 180 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
173 181
174 182 return url_pat.sub(url_func, text)
175 183
176 184 @classmethod
177 185 def plain(cls, source, universal_newline=True):
178 186 source = safe_unicode(source)
179 187 if universal_newline:
180 188 newline = '\n'
181 189 source = newline.join(source.splitlines())
182 190
183 191 source = cls.urlify_text(source)
184 192 return '<br />' + source.replace("\n", '<br />')
185 193
186 194 @classmethod
187 195 def markdown(cls, source, safe=True, flavored=True, mentions=False):
188 196 # It does not allow to insert inline HTML. In presence of HTML tags, it
189 197 # will replace them instead with [HTML_REMOVED]. This is controlled by
190 198 # the safe_mode=True parameter of the markdown method.
191 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
199
192 200 if flavored:
193 extensions.append(GithubFlavoredMarkdownExtension())
201 markdown_renderer = cls.markdown_renderer_flavored
202 else:
203 markdown_renderer = cls.markdown_renderer
194 204
195 205 if mentions:
196 206 mention_pat = re.compile(MENTIONS_REGEX)
197 207
198 208 def wrapp(match_obj):
199 209 uname = match_obj.groups()[0]
200 210 return ' **@%(uname)s** ' % {'uname': uname}
201 211 mention_hl = mention_pat.sub(wrapp, source).strip()
202 212 # we extracted mentions render with this using Mentions false
203 213 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
204 214 mentions=False)
205 215
206 216 source = safe_unicode(source)
207 217 try:
208 218 if flavored:
209 219 source = cls._flavored_markdown(source)
210 return markdown.markdown(
211 source, extensions, safe_mode=True, enable_attributes=False)
220 return markdown_renderer.convert(source)
212 221 except Exception:
213 222 log.exception('Error when rendering Markdown')
214 223 if safe:
215 224 log.debug('Fallback to render in plain mode')
216 225 return cls.plain(source)
217 226 else:
218 227 raise
219 228
220 229 @classmethod
221 230 def rst(cls, source, safe=True, mentions=False):
222 231 if mentions:
223 232 mention_pat = re.compile(MENTIONS_REGEX)
224 233
225 234 def wrapp(match_obj):
226 235 uname = match_obj.groups()[0]
227 236 return ' **@%(uname)s** ' % {'uname': uname}
228 237 mention_hl = mention_pat.sub(wrapp, source).strip()
229 238 # we extracted mentions render with this using Mentions false
230 239 return cls.rst(mention_hl, safe=safe, mentions=False)
231 240
232 241 source = safe_unicode(source)
233 242 try:
234 243 docutils_settings = dict(
235 244 [(alias, None) for alias in
236 245 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
237 246
238 247 docutils_settings.update({'input_encoding': 'unicode',
239 248 'report_level': 4})
240 249
241 250 for k, v in docutils_settings.iteritems():
242 251 directives.register_directive(k, v)
243 252
244 253 parts = publish_parts(source=source,
245 254 writer_name="html4css1",
246 255 settings_overrides=docutils_settings)
247 256
248 257 return parts['html_title'] + parts["fragment"]
249 258 except Exception:
250 259 log.exception('Error when rendering RST')
251 260 if safe:
252 261 log.debug('Fallbacking to render in plain mode')
253 262 return cls.plain(source)
254 263 else:
255 264 raise
256 265
257 266
258 267 class RstTemplateRenderer(object):
259 268
260 269 def __init__(self):
261 270 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
262 271 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
263 272 self.template_store = TemplateLookup(
264 273 directories=rst_template_dirs,
265 274 input_encoding='utf-8',
266 275 imports=['from rhodecode.lib import helpers as h'])
267 276
268 277 def _get_template(self, templatename):
269 278 return self.template_store.get_template(templatename)
270 279
271 280 def render(self, template_name, **kwargs):
272 281 template = self._get_template(template_name)
273 282 return template.render(**kwargs)
General Comments 0
You need to be logged in to leave comments. Login now