##// END OF EJS Templates
i18n: updated translation for Polish...
i18n: updated translation for Polish Currently translated at 56.5% (614 of 1087 strings)

File last commit:

r8087:141066b8 default
r8092:7fef5132 default
Show More
markup_renderer.py
248 lines | 9.7 KiB | text/x-python | PythonLexer
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 # -*- coding: utf-8 -*-
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
kallithea.lib.markup_renderer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Renderer for markup languages with ability to parse using rst or markdown
Bradley M. Kuhn
RhodeCode GmbH is not the sole author of this work
r4211 This file was forked by the Kallithea project in July 2014.
Original author and date, and relevant copyright and licensing information is below:
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 :created_on: Oct 27, 2011
:author: marcink
Bradley M. Kuhn
RhodeCode GmbH is not the sole author of this work
r4211 :copyright: (c) 2013 RhodeCode GmbH, and others.
Bradley M. Kuhn
Correct licensing information in individual files....
r4208 :license: GPLv3, see LICENSE.md for more details.
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 """
Mads Kiilerich
scripts: initial run of import cleanup using isort
r7718 import logging
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 import re
import traceback
Mads Kiilerich
scripts: initial run of import cleanup using isort
r7718 import bleach
Mads Kiilerich
lib: refactor use of markdown library - it is a mandatory dependency
r7321 import markdown as markdown_mod
Mads Kiilerich
py3: rename all existing safe_unicode to safe_str
r8078 from kallithea.lib.utils2 import MENTIONS_REGEX, safe_str
Mads Kiilerich
scripts: initial run of import cleanup using isort
r7718
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187
log = logging.getLogger(__name__)
Mads Kiilerich
helpers: inline url markup in urlify_text...
r6147 url_re = re.compile(r'''\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
r'''(?:[/a-zA-Z0-9_=@#~&+%.,:;?!*()-]*[/a-zA-Z0-9_=@#~])?''')
Mads Kiilerich
helpers: tweak URL matching patterns - don't include trailing punctuation
r4691
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 class MarkupRenderer(object):
RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
RST_PAT = re.compile(r're?st', re.IGNORECASE)
PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
Mads Kiilerich
markup_renderer: use classmethods - there is really no need for the class; we already have the module namespace
r7569 @classmethod
def _detect_renderer(cls, source, filename):
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 """
runs detection of what renderer should be used for generating html
from a markup language
filename can be also explicitly a renderer name
"""
Mads Kiilerich
markup_renderer: use classmethods - there is really no need for the class; we already have the module namespace
r7569 if cls.MARKDOWN_PAT.findall(filename):
return cls.markdown
elif cls.RST_PAT.findall(filename):
return cls.rst
elif cls.PLAIN_PAT.findall(filename):
return cls.rst
return cls.plain
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187
@classmethod
def _flavored_markdown(cls, text):
"""
Github style flavored markdown
:param text:
"""
from hashlib import md5
# Extract pre blocks.
extractions = {}
Lars Kruse
codingstyle: trivial whitespace fixes...
r6789
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 def pre_extraction_callback(matchobj):
digest = md5(matchobj.group(0)).hexdigest()
extractions[digest] = matchobj.group(0)
return "{gfm-extraction-%s}" % digest
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
text = re.sub(pattern, pre_extraction_callback, text)
# Prevent foo_bar_baz from ending up with an italic word in the middle.
def italic_callback(matchobj):
s = matchobj.group(0)
if list(s).count('_') >= 2:
Mads Kiilerich
flake8: fix W605 invalid escape sequence
r7720 return s.replace('_', r'\_')
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 return s
text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
# In very clear cases, let newlines become <br /> tags.
def newline_callback(matchobj):
if len(matchobj.group(1)) == 1:
return matchobj.group(0).rstrip() + ' \n'
else:
return matchobj.group(0)
pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
text = re.sub(pattern, newline_callback, text)
# Insert pre block extractions.
def pre_insert_callback(matchobj):
return '\n\n' + extractions[matchobj.group(1)]
text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
pre_insert_callback, text)
return text
Mads Kiilerich
markup_renderer: use classmethods - there is really no need for the class; we already have the module namespace
r7569 @classmethod
def render(cls, source, filename=None):
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 """
Renders a given filename using detected renderer
it detects renderers based on file extension or mimetype.
At last it will just do a simple html replacing new lines with <br/>
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570
>>> MarkupRenderer.render('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''', '.md')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><img id="a" src="http://example.com/test.jpg" style="color: red;"></p>'
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 >>> MarkupRenderer.render('''<img class="c d" src="file://localhost/test.jpg">''', 'b.mkd')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><img class="c d"></p>'
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 >>> MarkupRenderer.render('''<a href="foo">foo</a>''', 'c.mkdn')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><a href="foo">foo</a></p>'
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 >>> MarkupRenderer.render('''<script>alert(1)</script>''', 'd.mdown')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '&lt;script&gt;alert(1)&lt;/script&gt;'
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 >>> MarkupRenderer.render('''<div onclick="alert(2)">yo</div>''', 'markdown')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<div>yo</div>'
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 >>> MarkupRenderer.render('''<a href="javascript:alert(3)">yo</a>''', 'md')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><a>yo</a></p>'
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 """
Mads Kiilerich
markup_renderer: use classmethods - there is really no need for the class; we already have the module namespace
r7569 renderer = cls._detect_renderer(source, filename)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 readme_data = renderer(source)
Thomas De Schampheleire
lib: sanitize HTML for all types of README rendering, not only markdown...
r7468 # Allow most HTML, while preventing XSS issues:
# no <script> tags, no onclick attributes, no javascript
# "protocol", and also limit styling to prevent defacing.
return bleach.clean(readme_data,
tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
'thead', 'tr', 'ul'],
attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
styles=['color'],
protocols=['http', 'https', 'mailto'],
)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187
@classmethod
def plain(cls, source, universal_newline=True):
Mads Kiilerich
py3: rename all existing safe_unicode to safe_str
r8078 source = safe_str(source)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 if universal_newline:
newline = '\n'
source = newline.join(source.splitlines())
Mads Kiilerich
helpers: tweak URL matching patterns - don't include trailing punctuation
r4691 def url_func(match_obj):
Mads Kiilerich
helpers: inline url markup in urlify_text...
r6147 url_full = match_obj.group(0)
Mads Kiilerich
helpers: tweak URL matching patterns - don't include trailing punctuation
r4691 return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
source = url_re.sub(url_func, source)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 return '<br />' + source.replace("\n", '<br />')
@classmethod
def markdown(cls, source, safe=True, flavored=False):
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 """
Mads Kiilerich
markup_renderer: fix doctests after 2ac4499b25eb; .markdown() is no longer safe, but .render() is
r7570 Convert Markdown (possibly GitHub Flavored) to INSECURE HTML, possibly
with "safe" fall-back to plaintext. Output from this method should be sanitized before use.
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320
>>> MarkupRenderer.markdown('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg"></p>'
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 >>> MarkupRenderer.markdown('''<img class="c d" src="file://localhost/test.jpg">''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><img class="c d" src="file://localhost/test.jpg"></p>'
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 >>> MarkupRenderer.markdown('''<a href="foo">foo</a>''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><a href="foo">foo</a></p>'
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 >>> MarkupRenderer.markdown('''<script>alert(1)</script>''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<script>alert(1)</script>'
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 >>> MarkupRenderer.markdown('''<div onclick="alert(2)">yo</div>''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<div onclick="alert(2)">yo</div>'
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 >>> MarkupRenderer.markdown('''<a href="javascript:alert(3)">yo</a>''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<p><a href="javascript:alert(3)">yo</a></p>'
Mads Kiilerich
tests: some coverage of Markdown rendering actually rendering as HTML and using code-highlight
r7645 >>> MarkupRenderer.markdown('''## Foo''')
Mads Kiilerich
tests: minor doctest updates for py3
r8087 '<h2>Foo</h2>'
>>> print(MarkupRenderer.markdown('''
Mads Kiilerich
tests: some coverage of Markdown rendering actually rendering as HTML and using code-highlight
r7645 ... #!/bin/bash
... echo "hello"
Mads Kiilerich
tests: minor doctest updates for py3
r8087 ... '''))
Mads Kiilerich
tests: some coverage of Markdown rendering actually rendering as HTML and using code-highlight
r7645 <table class="code-highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre>1
2</pre></div></td><td class="code"><div class="code-highlight"><pre><span></span><span class="ch">#!/bin/bash</span>
<span class="nb">echo</span> <span class="s2">&quot;hello&quot;</span>
</pre></div>
</td></tr></table>
Mads Kiilerich
lib: some doctest coverage of markup rendering
r7320 """
Mads Kiilerich
py3: rename all existing safe_unicode to safe_str
r8078 source = safe_str(source)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 try:
if flavored:
source = cls._flavored_markdown(source)
Thomas De Schampheleire
lib: sanitize HTML for all types of README rendering, not only markdown...
r7460 return markdown_mod.markdown(
source,
Mads Kiilerich
lib: use Python dot notation for Markdown extensions...
r7646 extensions=['markdown.extensions.codehilite', 'markdown.extensions.extra'],
extension_configs={'markdown.extensions.codehilite': {'css_class': 'code-highlight'}})
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 except Exception:
log.error(traceback.format_exc())
if safe:
Thomas De Schampheleire
lib: trivial typo fixes
r4918 log.debug('Falling back to render in plain mode')
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 return cls.plain(source)
else:
raise
@classmethod
def rst(cls, source, safe=True):
Mads Kiilerich
py3: rename all existing safe_unicode to safe_str
r8078 source = safe_str(source)
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 try:
from docutils.core import publish_parts
from docutils.parsers.rst import directives
docutils_settings = dict([(alias, None) for alias in
cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
docutils_settings.update({'input_encoding': 'unicode',
'report_level': 4})
Mads Kiilerich
py3: trivial renaming of .iteritems() to .items()...
r8059 for k, v in docutils_settings.items():
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 directives.register_directive(k, v)
parts = publish_parts(source=source,
writer_name="html4css1",
settings_overrides=docutils_settings)
return parts['html_title'] + parts["fragment"]
except ImportError:
log.warning('Install docutils to use this function')
return cls.plain(source)
except Exception:
log.error(traceback.format_exc())
if safe:
Thomas De Schampheleire
lib: trivial typo fixes
r4918 log.debug('Falling back to render in plain mode')
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 return cls.plain(source)
else:
raise
@classmethod
def rst_with_mentions(cls, source):
def wrapp(match_obj):
uname = match_obj.groups()[0]
Mads Kiilerich
flake8: fix W605 invalid escape sequence
r7720 return r'\ **@%(uname)s**\ ' % {'uname': uname}
Mads Kiilerich
helpers: inline @mention markup in urlify_text...
r6149 mention_hl = MENTIONS_REGEX.sub(wrapp, source).strip()
Bradley M. Kuhn
Second step in two-part process to rename directories....
r4187 return cls.rst(mention_hl)