##// END OF EJS Templates
renderer: don't render plaintext files as RST
marcink -
r1289:e8c231d6 default
parent child Browse files
Show More
@@ -1,273 +1,273 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2011-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21
22 22 """
23 23 Renderer for markup languages with ability to parse using rst or markdown
24 24 """
25 25
26 26 import re
27 27 import os
28 28 import logging
29 29 import itertools
30 30
31 31 from mako.lookup import TemplateLookup
32 32
33 33 from docutils.core import publish_parts
34 34 from docutils.parsers.rst import directives
35 35 import markdown
36 36
37 37 from rhodecode.lib.markdown_ext import (
38 38 UrlizeExtension, GithubFlavoredMarkdownExtension)
39 39 from rhodecode.lib.utils2 import safe_unicode, md5_safe, MENTIONS_REGEX
40 40
41 41 log = logging.getLogger(__name__)
42 42
43 43 # default renderer used to generate automated comments
44 44 DEFAULT_COMMENTS_RENDERER = 'rst'
45 45
46 46
47 47 class MarkupRenderer(object):
48 48 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
49 49
50 50 MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
51 51 RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
52 52 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
53 53
54 54 # extension together with weights. Lower is first means we control how
55 55 # extensions are attached to readme names with those.
56 56 PLAIN_EXTS = [
57 57 # prefer no extension
58 58 ('', 0), # special case that renders READMES names without extension
59 59 ('.text', 2), ('.TEXT', 2),
60 60 ('.txt', 3), ('.TXT', 3)
61 61 ]
62 62
63 63 RST_EXTS = [
64 64 ('.rst', 1), ('.rest', 1),
65 65 ('.RST', 2), ('.REST', 2)
66 66 ]
67 67
68 68 MARKDOWN_EXTS = [
69 69 ('.md', 1), ('.MD', 1),
70 70 ('.mkdn', 2), ('.MKDN', 2),
71 71 ('.mdown', 3), ('.MDOWN', 3),
72 72 ('.markdown', 4), ('.MARKDOWN', 4)
73 73 ]
74 74
75 75 def _detect_renderer(self, source, filename=None):
76 76 """
77 77 runs detection of what renderer should be used for generating html
78 78 from a markup language
79 79
80 80 filename can be also explicitly a renderer name
81 81
82 82 :param source:
83 83 :param filename:
84 84 """
85 85
86 86 if MarkupRenderer.MARKDOWN_PAT.findall(filename):
87 87 detected_renderer = 'markdown'
88 88 elif MarkupRenderer.RST_PAT.findall(filename):
89 89 detected_renderer = 'rst'
90 90 elif MarkupRenderer.PLAIN_PAT.findall(filename):
91 detected_renderer = 'rst'
91 detected_renderer = 'plain'
92 92 else:
93 93 detected_renderer = 'plain'
94 94
95 95 return getattr(MarkupRenderer, detected_renderer)
96 96
97 97 @classmethod
98 98 def renderer_from_filename(cls, filename, exclude):
99 99 """
100 100 Detect renderer markdown/rst from filename and optionally use exclude
101 101 list to remove some options. This is mostly used in helpers.
102 102 Returns None when no renderer can be detected.
103 103 """
104 104 def _filter(elements):
105 105 if isinstance(exclude, (list, tuple)):
106 106 return [x for x in elements if x not in exclude]
107 107 return elements
108 108
109 109 if filename.endswith(
110 110 tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
111 111 return 'markdown'
112 112 if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
113 113 return 'rst'
114 114
115 115 return None
116 116
117 117 def render(self, source, filename=None):
118 118 """
119 119 Renders a given filename using detected renderer
120 120 it detects renderers based on file extension or mimetype.
121 121 At last it will just do a simple html replacing new lines with <br/>
122 122
123 123 :param file_name:
124 124 :param source:
125 125 """
126 126
127 127 renderer = self._detect_renderer(source, filename)
128 128 readme_data = renderer(source)
129 129 return readme_data
130 130
131 131 @classmethod
132 132 def _flavored_markdown(cls, text):
133 133 """
134 134 Github style flavored markdown
135 135
136 136 :param text:
137 137 """
138 138
139 139 # Extract pre blocks.
140 140 extractions = {}
141 141
142 142 def pre_extraction_callback(matchobj):
143 143 digest = md5_safe(matchobj.group(0))
144 144 extractions[digest] = matchobj.group(0)
145 145 return "{gfm-extraction-%s}" % digest
146 146 pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
147 147 text = re.sub(pattern, pre_extraction_callback, text)
148 148
149 149 # Prevent foo_bar_baz from ending up with an italic word in the middle.
150 150 def italic_callback(matchobj):
151 151 s = matchobj.group(0)
152 152 if list(s).count('_') >= 2:
153 153 return s.replace('_', r'\_')
154 154 return s
155 155 text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
156 156
157 157 # Insert pre block extractions.
158 158 def pre_insert_callback(matchobj):
159 159 return '\n\n' + extractions[matchobj.group(1)]
160 160 text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
161 161 pre_insert_callback, text)
162 162
163 163 return text
164 164
165 165 @classmethod
166 166 def urlify_text(cls, text):
167 167 url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
168 168 r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
169 169
170 170 def url_func(match_obj):
171 171 url_full = match_obj.groups()[0]
172 172 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
173 173
174 174 return url_pat.sub(url_func, text)
175 175
176 176 @classmethod
177 177 def plain(cls, source, universal_newline=True):
178 178 source = safe_unicode(source)
179 179 if universal_newline:
180 180 newline = '\n'
181 181 source = newline.join(source.splitlines())
182 182
183 183 source = cls.urlify_text(source)
184 184 return '<br />' + source.replace("\n", '<br />')
185 185
186 186 @classmethod
187 187 def markdown(cls, source, safe=True, flavored=True, mentions=False):
188 188 # It does not allow to insert inline HTML. In presence of HTML tags, it
189 189 # will replace them instead with [HTML_REMOVED]. This is controlled by
190 190 # the safe_mode=True parameter of the markdown method.
191 191 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
192 192 if flavored:
193 193 extensions.append(GithubFlavoredMarkdownExtension())
194 194
195 195 if mentions:
196 196 mention_pat = re.compile(MENTIONS_REGEX)
197 197
198 198 def wrapp(match_obj):
199 199 uname = match_obj.groups()[0]
200 200 return ' **@%(uname)s** ' % {'uname': uname}
201 201 mention_hl = mention_pat.sub(wrapp, source).strip()
202 202 # we extracted mentions render with this using Mentions false
203 203 return cls.markdown(mention_hl, safe=safe, flavored=flavored,
204 204 mentions=False)
205 205
206 206 source = safe_unicode(source)
207 207 try:
208 208 if flavored:
209 209 source = cls._flavored_markdown(source)
210 210 return markdown.markdown(
211 211 source, extensions, safe_mode=True, enable_attributes=False)
212 212 except Exception:
213 213 log.exception('Error when rendering Markdown')
214 214 if safe:
215 215 log.debug('Fallback to render in plain mode')
216 216 return cls.plain(source)
217 217 else:
218 218 raise
219 219
220 220 @classmethod
221 221 def rst(cls, source, safe=True, mentions=False):
222 222 if mentions:
223 223 mention_pat = re.compile(MENTIONS_REGEX)
224 224
225 225 def wrapp(match_obj):
226 226 uname = match_obj.groups()[0]
227 227 return ' **@%(uname)s** ' % {'uname': uname}
228 228 mention_hl = mention_pat.sub(wrapp, source).strip()
229 229 # we extracted mentions render with this using Mentions false
230 230 return cls.rst(mention_hl, safe=safe, mentions=False)
231 231
232 232 source = safe_unicode(source)
233 233 try:
234 234 docutils_settings = dict(
235 235 [(alias, None) for alias in
236 236 cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
237 237
238 238 docutils_settings.update({'input_encoding': 'unicode',
239 239 'report_level': 4})
240 240
241 241 for k, v in docutils_settings.iteritems():
242 242 directives.register_directive(k, v)
243 243
244 244 parts = publish_parts(source=source,
245 245 writer_name="html4css1",
246 246 settings_overrides=docutils_settings)
247 247
248 248 return parts['html_title'] + parts["fragment"]
249 249 except Exception:
250 250 log.exception('Error when rendering RST')
251 251 if safe:
252 252 log.debug('Fallbacking to render in plain mode')
253 253 return cls.plain(source)
254 254 else:
255 255 raise
256 256
257 257
258 258 class RstTemplateRenderer(object):
259 259
260 260 def __init__(self):
261 261 base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
262 262 rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
263 263 self.template_store = TemplateLookup(
264 264 directories=rst_template_dirs,
265 265 input_encoding='utf-8',
266 266 imports=['from rhodecode.lib import helpers as h'])
267 267
268 268 def _get_template(self, templatename):
269 269 return self.template_store.get_template(templatename)
270 270
271 271 def render(self, template_name, **kwargs):
272 272 template = self._get_template(template_name)
273 273 return template.render(**kwargs)
@@ -1,179 +1,179 b''
1 1 # -*- coding: utf-8 -*-
2 2
3 3 # Copyright (C) 2010-2017 RhodeCode GmbH
4 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU Affero General Public License, version 3
7 7 # (only), as published by the Free Software Foundation.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU Affero General Public License
15 15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16 #
17 17 # This program is dual-licensed. If you wish to learn more about the
18 18 # RhodeCode Enterprise Edition, including its added features, Support services,
19 19 # and proprietary license terms, please see https://rhodecode.com/licenses/
20 20
21 21 import pytest
22 22
23 23 from rhodecode.lib.markup_renderer import MarkupRenderer, RstTemplateRenderer
24 24
25 25
26 26 @pytest.mark.parametrize(
27 27 "filename, expected_renderer",
28 28 [
29 29 ('readme.md', 'markdown'),
30 30 ('readme.Md', 'markdown'),
31 31 ('readme.MdoWn', 'markdown'),
32 32 ('readme.rst', 'rst'),
33 33 ('readme.Rst', 'rst'),
34 34 ('readme.rest', 'rst'),
35 35 ('readme.rest', 'rst'),
36 ('readme', 'rst'),
37 ('README', 'rst'),
38 36
39 37 ('markdown.xml', 'plain'),
40 38 ('rest.xml', 'plain'),
41 39 ('readme.xml', 'plain'),
42 40
41 ('readme', 'plain'),
42 ('README', 'plain'),
43 43 ('readme.mdx', 'plain'),
44 44 ('readme.rstx', 'plain'),
45 45 ('readmex', 'plain'),
46 46 ])
47 47 def test_detect_renderer(filename, expected_renderer):
48 48 detected_renderer = MarkupRenderer()._detect_renderer(
49 49 '', filename=filename).__name__
50 50 assert expected_renderer == detected_renderer
51 51
52 52
53 53 def test_markdown_xss_link():
54 54 xss_md = "[link](javascript:alert('XSS: pwned!'))"
55 55 rendered_html = MarkupRenderer.markdown(xss_md)
56 56 assert 'href="javascript:alert(\'XSS: pwned!\')"' not in rendered_html
57 57
58 58
59 59 def test_markdown_xss_inline_html():
60 60 xss_md = '\n'.join([
61 61 '> <a name="n"',
62 62 '> href="javascript:alert(\'XSS: pwned!\')">link</a>'])
63 63 rendered_html = MarkupRenderer.markdown(xss_md)
64 64 assert 'href="javascript:alert(\'XSS: pwned!\')">' not in rendered_html
65 65
66 66
67 67 def test_markdown_inline_html():
68 68 xss_md = '\n'.join(['> <a name="n"',
69 69 '> href="https://rhodecode.com">link</a>'])
70 70 rendered_html = MarkupRenderer.markdown(xss_md)
71 71 assert '[HTML_REMOVED]link[HTML_REMOVED]' in rendered_html
72 72
73 73
74 74 def test_rst_xss_link():
75 75 xss_rst = "`Link<javascript:alert('XSS: pwned!')>`_"
76 76 rendered_html = MarkupRenderer.rst(xss_rst)
77 77 assert "href=javascript:alert('XSS: pwned!')" not in rendered_html
78 78
79 79
80 80 @pytest.mark.xfail(reason='Bug in docutils. Waiting answer from the author')
81 81 def test_rst_xss_inline_html():
82 82 xss_rst = '<a href="javascript:alert(\'XSS: pwned!\')">link</a>'
83 83 rendered_html = MarkupRenderer.rst(xss_rst)
84 84 assert 'href="javascript:alert(' not in rendered_html
85 85
86 86
87 87 def test_rst_xss_raw_directive():
88 88 xss_rst = '\n'.join([
89 89 '.. raw:: html',
90 90 '',
91 91 ' <a href="javascript:alert(\'XSS: pwned!\')">link</a>'])
92 92 rendered_html = MarkupRenderer.rst(xss_rst)
93 93 assert 'href="javascript:alert(' not in rendered_html
94 94
95 95
96 96 def test_render_rst_template_without_files():
97 97 expected = u'''\
98 98 Pull request updated. Auto status change to |under_review|
99 99
100 100 .. role:: added
101 101 .. role:: removed
102 102 .. parsed-literal::
103 103
104 104 Changed commits:
105 105 * :added:`2 added`
106 106 * :removed:`3 removed`
107 107
108 108 No file changes found
109 109
110 110 .. |under_review| replace:: *"NEW STATUS"*'''
111 111
112 112 params = {
113 113 'under_review_label': 'NEW STATUS',
114 114 'added_commits': ['a', 'b'],
115 115 'removed_commits': ['a', 'b', 'c'],
116 116 'changed_files': [],
117 117 'added_files': [],
118 118 'modified_files': [],
119 119 'removed_files': [],
120 120 }
121 121 renderer = RstTemplateRenderer()
122 122 rendered = renderer.render('pull_request_update.mako', **params)
123 123 assert expected == rendered
124 124
125 125
126 126 def test_render_rst_template_with_files():
127 127 expected = u'''\
128 128 Pull request updated. Auto status change to |under_review|
129 129
130 130 .. role:: added
131 131 .. role:: removed
132 132 .. parsed-literal::
133 133
134 134 Changed commits:
135 135 * :added:`1 added`
136 136 * :removed:`3 removed`
137 137
138 138 Changed files:
139 139 * `A /path/a.py <#a_c--68ed34923b68>`_
140 140 * `A /path/b.js <#a_c--64f90608b607>`_
141 141 * `M /path/d.js <#a_c--85842bf30c6e>`_
142 142 * `M /path/ę.py <#a_c--d713adf009cd>`_
143 143 * R /path/ź.py
144 144
145 145 .. |under_review| replace:: *"NEW STATUS"*'''
146 146
147 147 added = ['/path/a.py', '/path/b.js']
148 148 modified = ['/path/d.js', u'/path/ę.py']
149 149 removed = [u'/path/ź.py']
150 150
151 151 params = {
152 152 'under_review_label': 'NEW STATUS',
153 153 'added_commits': ['a'],
154 154 'removed_commits': ['a', 'b', 'c'],
155 155 'changed_files': added + modified + removed,
156 156 'added_files': added,
157 157 'modified_files': modified,
158 158 'removed_files': removed,
159 159 }
160 160 renderer = RstTemplateRenderer()
161 161 rendered = renderer.render('pull_request_update.mako', **params)
162 162
163 163 assert expected == rendered
164 164
165 165
166 166 def test_render_rst_auto_status_template():
167 167 expected = u'''\
168 168 Auto status change to |new_status|
169 169
170 170 .. |new_status| replace:: *"NEW STATUS"*'''
171 171
172 172 params = {
173 173 'new_status_label': 'NEW STATUS',
174 174 'pull_request': None,
175 175 'commit_id': None,
176 176 }
177 177 renderer = RstTemplateRenderer()
178 178 rendered = renderer.render('auto_status_change.mako', **params)
179 179 assert expected == rendered
General Comments 0
You need to be logged in to leave comments. Login now