Show More
@@ -1,150 +1,195 | |||||
1 | # -*- coding: utf-8 -*- |
|
1 | # -*- coding: utf-8 -*- | |
2 | """ |
|
2 | """ | |
3 | rhodecode.lib.markup_renderer |
|
3 | rhodecode.lib.markup_renderer | |
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
5 |
|
5 | |||
6 |
|
6 | |||
7 | Renderer for markup languages with ability to parse using rst or markdown |
|
7 | Renderer for markup languages with ability to parse using rst or markdown | |
8 |
|
8 | |||
9 | :created_on: Oct 27, 2011 |
|
9 | :created_on: Oct 27, 2011 | |
10 | :author: marcink |
|
10 | :author: marcink | |
11 | :copyright: (C) 2011-2012 Marcin Kuzminski <marcin@python-works.com> |
|
11 | :copyright: (C) 2011-2012 Marcin Kuzminski <marcin@python-works.com> | |
12 | :license: GPLv3, see COPYING for more details. |
|
12 | :license: GPLv3, see COPYING for more details. | |
13 | """ |
|
13 | """ | |
14 | # This program is free software: you can redistribute it and/or modify |
|
14 | # This program is free software: you can redistribute it and/or modify | |
15 | # it under the terms of the GNU General Public License as published by |
|
15 | # it under the terms of the GNU General Public License as published by | |
16 | # the Free Software Foundation, either version 3 of the License, or |
|
16 | # the Free Software Foundation, either version 3 of the License, or | |
17 | # (at your option) any later version. |
|
17 | # (at your option) any later version. | |
18 | # |
|
18 | # | |
19 | # This program is distributed in the hope that it will be useful, |
|
19 | # This program is distributed in the hope that it will be useful, | |
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | # GNU General Public License for more details. |
|
22 | # GNU General Public License for more details. | |
23 | # |
|
23 | # | |
24 | # You should have received a copy of the GNU General Public License |
|
24 | # You should have received a copy of the GNU General Public License | |
25 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
25 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
26 |
|
26 | |||
27 | import re |
|
27 | import re | |
28 | import logging |
|
28 | import logging | |
29 | import traceback |
|
29 | import traceback | |
30 |
|
30 | |||
31 | from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX |
|
31 | from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX | |
32 |
|
32 | |||
33 | log = logging.getLogger(__name__) |
|
33 | log = logging.getLogger(__name__) | |
34 |
|
34 | |||
35 |
|
35 | |||
36 | class MarkupRenderer(object): |
|
36 | class MarkupRenderer(object): | |
37 | RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw'] |
|
37 | RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw'] | |
38 |
|
38 | |||
39 | MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE) |
|
39 | MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE) | |
40 | RST_PAT = re.compile(r're?st', re.IGNORECASE) |
|
40 | RST_PAT = re.compile(r're?st', re.IGNORECASE) | |
41 | PLAIN_PAT = re.compile(r'readme', re.IGNORECASE) |
|
41 | PLAIN_PAT = re.compile(r'readme', re.IGNORECASE) | |
42 |
|
42 | |||
43 |
def |
|
43 | def _detect_renderer(self, source, filename=None): | |
44 | """ |
|
44 | """ | |
45 | runs detection of what renderer should be used for generating html |
|
45 | runs detection of what renderer should be used for generating html | |
46 | from a markup language |
|
46 | from a markup language | |
47 |
|
47 | |||
48 | filename can be also explicitly a renderer name |
|
48 | filename can be also explicitly a renderer name | |
49 |
|
49 | |||
50 | :param source: |
|
50 | :param source: | |
51 | :param filename: |
|
51 | :param filename: | |
52 | """ |
|
52 | """ | |
53 |
|
53 | |||
54 | if MarkupRenderer.MARKDOWN_PAT.findall(filename): |
|
54 | if MarkupRenderer.MARKDOWN_PAT.findall(filename): | |
55 | detected_renderer = 'markdown' |
|
55 | detected_renderer = 'markdown' | |
56 | elif MarkupRenderer.RST_PAT.findall(filename): |
|
56 | elif MarkupRenderer.RST_PAT.findall(filename): | |
57 | detected_renderer = 'rst' |
|
57 | detected_renderer = 'rst' | |
58 | elif MarkupRenderer.PLAIN_PAT.findall(filename): |
|
58 | elif MarkupRenderer.PLAIN_PAT.findall(filename): | |
59 | detected_renderer = 'rst' |
|
59 | detected_renderer = 'rst' | |
60 | else: |
|
60 | else: | |
61 | detected_renderer = 'plain' |
|
61 | detected_renderer = 'plain' | |
62 |
|
62 | |||
63 | return getattr(MarkupRenderer, detected_renderer) |
|
63 | return getattr(MarkupRenderer, detected_renderer) | |
64 |
|
64 | |||
|
65 | @classmethod | |||
|
66 | def _flavored_markdown(cls, text): | |||
|
67 | """ | |||
|
68 | Github style flavored markdown | |||
|
69 | ||||
|
70 | :param text: | |||
|
71 | """ | |||
|
72 | from hashlib import md5 | |||
|
73 | ||||
|
74 | # Extract pre blocks. | |||
|
75 | extractions = {} | |||
|
76 | def pre_extraction_callback(matchobj): | |||
|
77 | digest = md5(matchobj.group(0)).hexdigest() | |||
|
78 | extractions[digest] = matchobj.group(0) | |||
|
79 | return "{gfm-extraction-%s}" % digest | |||
|
80 | pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL) | |||
|
81 | text = re.sub(pattern, pre_extraction_callback, text) | |||
|
82 | ||||
|
83 | # Prevent foo_bar_baz from ending up with an italic word in the middle. | |||
|
84 | def italic_callback(matchobj): | |||
|
85 | s = matchobj.group(0) | |||
|
86 | if list(s).count('_') >= 2: | |||
|
87 | return s.replace('_', '\_') | |||
|
88 | return s | |||
|
89 | text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text) | |||
|
90 | ||||
|
91 | # In very clear cases, let newlines become <br /> tags. | |||
|
92 | def newline_callback(matchobj): | |||
|
93 | if len(matchobj.group(1)) == 1: | |||
|
94 | return matchobj.group(0).rstrip() + ' \n' | |||
|
95 | else: | |||
|
96 | return matchobj.group(0) | |||
|
97 | pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE) | |||
|
98 | text = re.sub(pattern, newline_callback, text) | |||
|
99 | ||||
|
100 | # Insert pre block extractions. | |||
|
101 | def pre_insert_callback(matchobj): | |||
|
102 | return '\n\n' + extractions[matchobj.group(1)] | |||
|
103 | text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', | |||
|
104 | pre_insert_callback, text) | |||
|
105 | ||||
|
106 | return text | |||
|
107 | ||||
65 | def render(self, source, filename=None): |
|
108 | def render(self, source, filename=None): | |
66 | """ |
|
109 | """ | |
67 | Renders a given filename using detected renderer |
|
110 | Renders a given filename using detected renderer | |
68 | it detects renderers based on file extension or mimetype. |
|
111 | it detects renderers based on file extension or mimetype. | |
69 | At last it will just do a simple html replacing new lines with <br/> |
|
112 | At last it will just do a simple html replacing new lines with <br/> | |
70 |
|
113 | |||
71 | :param file_name: |
|
114 | :param file_name: | |
72 | :param source: |
|
115 | :param source: | |
73 | """ |
|
116 | """ | |
74 |
|
117 | |||
75 |
renderer = self. |
|
118 | renderer = self._detect_renderer(source, filename) | |
76 | readme_data = renderer(source) |
|
119 | readme_data = renderer(source) | |
77 | return readme_data |
|
120 | return readme_data | |
78 |
|
121 | |||
79 | @classmethod |
|
122 | @classmethod | |
80 | def plain(cls, source): |
|
123 | def plain(cls, source): | |
81 | source = safe_unicode(source) |
|
124 | source = safe_unicode(source) | |
82 |
|
125 | |||
83 | def urlify_text(text): |
|
126 | def urlify_text(text): | |
84 | url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]' |
|
127 | url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]' | |
85 | '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)') |
|
128 | '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)') | |
86 |
|
129 | |||
87 | def url_func(match_obj): |
|
130 | def url_func(match_obj): | |
88 | url_full = match_obj.groups()[0] |
|
131 | url_full = match_obj.groups()[0] | |
89 | return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full}) |
|
132 | return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full}) | |
90 |
|
133 | |||
91 | return url_pat.sub(url_func, text) |
|
134 | return url_pat.sub(url_func, text) | |
92 |
|
135 | |||
93 | source = urlify_text(source) |
|
136 | source = urlify_text(source) | |
94 | return '<br />' + source.replace("\n", '<br />') |
|
137 | return '<br />' + source.replace("\n", '<br />') | |
95 |
|
138 | |||
96 | @classmethod |
|
139 | @classmethod | |
97 | def markdown(cls, source, safe=True): |
|
140 | def markdown(cls, source, safe=True, flavored=False): | |
98 | source = safe_unicode(source) |
|
141 | source = safe_unicode(source) | |
99 | try: |
|
142 | try: | |
100 | import markdown as __markdown |
|
143 | import markdown as __markdown | |
|
144 | if flavored: | |||
|
145 | source = cls._flavored_markdown(source) | |||
101 | return __markdown.markdown(source, ['codehilite', 'extra']) |
|
146 | return __markdown.markdown(source, ['codehilite', 'extra']) | |
102 | except ImportError: |
|
147 | except ImportError: | |
103 | log.warning('Install markdown to use this function') |
|
148 | log.warning('Install markdown to use this function') | |
104 | return cls.plain(source) |
|
149 | return cls.plain(source) | |
105 | except Exception: |
|
150 | except Exception: | |
106 | log.error(traceback.format_exc()) |
|
151 | log.error(traceback.format_exc()) | |
107 | if safe: |
|
152 | if safe: | |
108 | return source |
|
153 | return source | |
109 | else: |
|
154 | else: | |
110 | raise |
|
155 | raise | |
111 |
|
156 | |||
112 | @classmethod |
|
157 | @classmethod | |
113 | def rst(cls, source, safe=True): |
|
158 | def rst(cls, source, safe=True): | |
114 | source = safe_unicode(source) |
|
159 | source = safe_unicode(source) | |
115 | try: |
|
160 | try: | |
116 | from docutils.core import publish_parts |
|
161 | from docutils.core import publish_parts | |
117 | from docutils.parsers.rst import directives |
|
162 | from docutils.parsers.rst import directives | |
118 | docutils_settings = dict([(alias, None) for alias in |
|
163 | docutils_settings = dict([(alias, None) for alias in | |
119 | cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES]) |
|
164 | cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES]) | |
120 |
|
165 | |||
121 | docutils_settings.update({'input_encoding': 'unicode', |
|
166 | docutils_settings.update({'input_encoding': 'unicode', | |
122 | 'report_level': 4}) |
|
167 | 'report_level': 4}) | |
123 |
|
168 | |||
124 | for k, v in docutils_settings.iteritems(): |
|
169 | for k, v in docutils_settings.iteritems(): | |
125 | directives.register_directive(k, v) |
|
170 | directives.register_directive(k, v) | |
126 |
|
171 | |||
127 | parts = publish_parts(source=source, |
|
172 | parts = publish_parts(source=source, | |
128 | writer_name="html4css1", |
|
173 | writer_name="html4css1", | |
129 | settings_overrides=docutils_settings) |
|
174 | settings_overrides=docutils_settings) | |
130 |
|
175 | |||
131 | return parts['html_title'] + parts["fragment"] |
|
176 | return parts['html_title'] + parts["fragment"] | |
132 | except ImportError: |
|
177 | except ImportError: | |
133 | log.warning('Install docutils to use this function') |
|
178 | log.warning('Install docutils to use this function') | |
134 | return cls.plain(source) |
|
179 | return cls.plain(source) | |
135 | except Exception: |
|
180 | except Exception: | |
136 | log.error(traceback.format_exc()) |
|
181 | log.error(traceback.format_exc()) | |
137 | if safe: |
|
182 | if safe: | |
138 | return source |
|
183 | return source | |
139 | else: |
|
184 | else: | |
140 | raise |
|
185 | raise | |
141 |
|
186 | |||
142 | @classmethod |
|
187 | @classmethod | |
143 | def rst_with_mentions(cls, source): |
|
188 | def rst_with_mentions(cls, source): | |
144 | mention_pat = re.compile(MENTIONS_REGEX) |
|
189 | mention_pat = re.compile(MENTIONS_REGEX) | |
145 |
|
190 | |||
146 | def wrapp(match_obj): |
|
191 | def wrapp(match_obj): | |
147 | uname = match_obj.groups()[0] |
|
192 | uname = match_obj.groups()[0] | |
148 | return ' **@%(uname)s** ' % {'uname': uname} |
|
193 | return ' **@%(uname)s** ' % {'uname': uname} | |
149 | mention_hl = mention_pat.sub(wrapp, source).strip() |
|
194 | mention_hl = mention_pat.sub(wrapp, source).strip() | |
150 | return cls.rst(mention_hl) |
|
195 | return cls.rst(mention_hl) |
General Comments 0
You need to be logged in to leave comments.
Login now