##// END OF EJS Templates
issue-trackers: bleach.clean the url entry to avoid JS injections.
issue-trackers: bleach.clean the url entry to avoid JS injections.

File last commit:

r2440:86196e6b default
r2444:a18c6a2f default
Show More
markup_renderer.py
515 lines | 17.5 KiB | text/x-python | PythonLexer
project: added all source files and assets
r1 # -*- coding: utf-8 -*-
license: updated copyright year to 2017
r1271 # Copyright (C) 2011-2017 RhodeCode GmbH
project: added all source files and assets
r1 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
"""
Renderer for markup languages with ability to parse using rst or markdown
"""
import re
import os
markup-rendering: added relative image support....
r1527 import lxml
project: added all source files and assets
r1 import logging
markup-rendering: added relative image support....
r1527 import urlparse
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 import bleach
readme/markup: improved order of generating readme files. Fixes #4050...
r396
project: added all source files and assets
r1 from mako.lookup import TemplateLookup
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 from mako.template import Template as MakoTemplate
project: added all source files and assets
r1
from docutils.core import publish_parts
from docutils.parsers.rst import directives
security: use custom writer for RST rendering to prevent injection of javascript: tags.
r1833 from docutils import writers
from docutils.writers import html4css1
project: added all source files and assets
r1 import markdown
markup-rendering: added relative image support....
r1527 from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
from rhodecode.lib.utils2 import (
safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
project: added all source files and assets
r1
log = logging.getLogger(__name__)
# default renderer used to generate automated comments
DEFAULT_COMMENTS_RENDERER = 'rst'
security: use custom writer for RST rendering to prevent injection of javascript: tags.
r1833 class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
"""
Custom HTML Translator used for sandboxing potential
JS injections in ref links
"""
def visit_reference(self, node):
if 'refuri' in node.attributes:
refuri = node['refuri']
if ':' in refuri:
prefix, link = refuri.lstrip().split(':', 1)
if prefix == 'javascript':
# we don't allow javascript type of refs...
node['refuri'] = 'javascript:alert("SandBoxedJavascript")'
# old style class requires this...
return html4css1.HTMLTranslator.visit_reference(self, node)
class RhodeCodeWriter(writers.html4css1.Writer):
def __init__(self):
writers.Writer.__init__(self)
self.translator_class = CustomHTMLTranslator
markup: make relative links pint to raw files for images and to standard files as links....
r2003 def relative_links(html_source, server_paths):
makrup-renderer: fix some cases which could cause lxml errors, skip js flags
r1529 if not html_source:
return html_source
try:
markup: allow better lxml import failure detection....
r2002 from lxml.html import fromstring
from lxml.html import tostring
except ImportError:
log.exception('Failed to import lxml')
return html_source
try:
makrup-renderer: fix some cases which could cause lxml errors, skip js flags
r1529 doc = lxml.html.fromstring(html_source)
except Exception:
return html_source
markup-rendering: added relative image support....
r1527 for el in doc.cssselect('img, video'):
markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.
r1840 src = el.attrib.get('src')
markup-rendering: added relative image support....
r1527 if src:
markup: make relative links pint to raw files for images and to standard files as links....
r2003 el.attrib['src'] = relative_path(src, server_paths['raw'])
markup-rendering: added relative image support....
r1527
for el in doc.cssselect('a:not(.gfm)'):
markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.
r1840 src = el.attrib.get('href')
markup-rendering: added relative image support....
r1527 if src:
markup: make relative links pint to raw files for images and to standard files as links....
r2003 raw_mode = el.attrib['href'].endswith('?raw=1')
if raw_mode:
el.attrib['href'] = relative_path(src, server_paths['raw'])
else:
el.attrib['href'] = relative_path(src, server_paths['standard'])
markup-rendering: added relative image support....
r1527
return lxml.html.tostring(doc)
def relative_path(path, request_path, is_repo_file=None):
"""
relative link support, path is a rel path, and request_path is current
server path (not absolute)
e.g.
path = '../logo.png'
request_path= '/repo/files/path/file.md'
produces: '/repo/files/logo.png'
"""
# TODO(marcink): unicode/str support ?
# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))
def dummy_check(p):
return True # assume default is a valid file path
is_repo_file = is_repo_file or dummy_check
if not path:
return request_path
path = safe_unicode(path)
request_path = safe_unicode(request_path)
makrup-renderer: fix some cases which could cause lxml errors, skip js flags
r1529 if path.startswith((u'data:', u'javascript:', u'#', u':')):
markup-rendering: added relative image support....
r1527 # skip data, anchor, invalid links
return path
is_absolute = bool(urlparse.urlparse(path).netloc)
if is_absolute:
return path
if not request_path:
return path
if path.startswith(u'/'):
path = path[1:]
if path.startswith(u'./'):
path = path[2:]
parts = request_path.split('/')
# compute how deep we need to traverse the request_path
depth = 0
if is_repo_file(request_path):
# if request path is a VALID file, we use a relative path with
# one level up
depth += 1
while path.startswith(u'../'):
depth += 1
path = path[3:]
if depth > 0:
parts = parts[:-depth]
parts.append(path)
final_path = u'/'.join(parts).lstrip(u'/')
return u'/' + final_path
project: added all source files and assets
r1 class MarkupRenderer(object):
RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)
RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
project: added all source files and assets
r1 PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)
markup: use cached version of http pattern for urlify_text. This...
r2090 URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353 extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 output_format = 'html4'
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353 markdown_renderer = markdown.Markdown(
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 extensions, enable_attributes=False, output_format=output_format)
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353
markdown_renderer_flavored = markdown.Markdown(
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 extensions + [GithubFlavoredMarkdownExtension()],
enable_attributes=False, output_format=output_format)
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353
readme/markup: improved order of generating readme files. Fixes #4050...
r396 # extension together with weights. Lower is first means we control how
# extensions are attached to readme names with those.
PLAIN_EXTS = [
renderer: Clean up obsolete code...
r773 # prefer no extension
readme/markup: improved order of generating readme files. Fixes #4050...
r396 ('', 0), # special case that renders READMES names without extension
('.text', 2), ('.TEXT', 2),
('.txt', 3), ('.TXT', 3)
]
RST_EXTS = [
('.rst', 1), ('.rest', 1),
('.RST', 2), ('.REST', 2)
]
MARKDOWN_EXTS = [
('.md', 1), ('.MD', 1),
('.mkdn', 2), ('.MKDN', 2),
('.mdown', 3), ('.MDOWN', 3),
('.markdown', 4), ('.MARKDOWN', 4)
]
project: added all source files and assets
r1 def _detect_renderer(self, source, filename=None):
"""
runs detection of what renderer should be used for generating html
from a markup language
filename can be also explicitly a renderer name
:param source:
:param filename:
"""
if MarkupRenderer.MARKDOWN_PAT.findall(filename):
detected_renderer = 'markdown'
elif MarkupRenderer.RST_PAT.findall(filename):
detected_renderer = 'rst'
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 elif MarkupRenderer.JUPYTER_PAT.findall(filename):
detected_renderer = 'jupyter'
project: added all source files and assets
r1 elif MarkupRenderer.PLAIN_PAT.findall(filename):
renderer: don't render plaintext files as RST
r1289 detected_renderer = 'plain'
project: added all source files and assets
r1 else:
detected_renderer = 'plain'
return getattr(MarkupRenderer, detected_renderer)
readme/markup: improved order of generating readme files. Fixes #4050...
r396 @classmethod
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 def bleach_clean(cls, text):
from .bleach_whitelist import markdown_attrs, markdown_tags
allowed_tags = markdown_tags
allowed_attrs = markdown_attrs
return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs)
@classmethod
readme/markup: improved order of generating readme files. Fixes #4050...
r396 def renderer_from_filename(cls, filename, exclude):
"""
renderer: fixed the helper funtion to original version. This...
r401 Detect renderer markdown/rst from filename and optionally use exclude
list to remove some options. This is mostly used in helpers.
Returns None when no renderer can be detected.
readme/markup: improved order of generating readme files. Fixes #4050...
r396 """
def _filter(elements):
if isinstance(exclude, (list, tuple)):
return [x for x in elements if x not in exclude]
return elements
if filename.endswith(
tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
return 'markdown'
if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
return 'rst'
renderer: fixed the helper funtion to original version. This...
r401 return None
readme/markup: improved order of generating readme files. Fixes #4050...
r396
project: added all source files and assets
r1 def render(self, source, filename=None):
"""
Renders a given filename using detected renderer
it detects renderers based on file extension or mimetype.
At last it will just do a simple html replacing new lines with <br/>
:param file_name:
:param source:
"""
renderer = self._detect_renderer(source, filename)
readme_data = renderer(source)
return readme_data
@classmethod
def _flavored_markdown(cls, text):
"""
Github style flavored markdown
:param text:
"""
# Extract pre blocks.
extractions = {}
def pre_extraction_callback(matchobj):
digest = md5_safe(matchobj.group(0))
extractions[digest] = matchobj.group(0)
return "{gfm-extraction-%s}" % digest
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
text = re.sub(pattern, pre_extraction_callback, text)
# Prevent foo_bar_baz from ending up with an italic word in the middle.
def italic_callback(matchobj):
s = matchobj.group(0)
if list(s).count('_') >= 2:
return s.replace('_', r'\_')
return s
text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
# Insert pre block extractions.
def pre_insert_callback(matchobj):
return '\n\n' + extractions[matchobj.group(1)]
text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
pre_insert_callback, text)
return text
@classmethod
def urlify_text(cls, text):
def url_func(match_obj):
url_full = match_obj.groups()[0]
return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
markup: use cached version of http pattern for urlify_text. This...
r2090 return cls.URL_PAT.sub(url_func, text)
project: added all source files and assets
r1
@classmethod
def plain(cls, source, universal_newline=True):
source = safe_unicode(source)
if universal_newline:
newline = '\n'
source = newline.join(source.splitlines())
source = cls.urlify_text(source)
return '<br />' + source.replace("\n", '<br />')
@classmethod
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 def markdown(cls, source, safe=True, flavored=True, mentions=False,
clean_html=True):
"""
returns markdown rendered code cleaned by the bleach library
"""
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353
project: added all source files and assets
r1 if flavored:
markup-renderer: use global Markdown object to speed up markdown rendering.
r1353 markdown_renderer = cls.markdown_renderer_flavored
else:
markdown_renderer = cls.markdown_renderer
project: added all source files and assets
r1
if mentions:
mention_pat = re.compile(MENTIONS_REGEX)
def wrapp(match_obj):
uname = match_obj.groups()[0]
return ' **@%(uname)s** ' % {'uname': uname}
mention_hl = mention_pat.sub(wrapp, source).strip()
# we extracted mentions render with this using Mentions false
return cls.markdown(mention_hl, safe=safe, flavored=flavored,
mentions=False)
source = safe_unicode(source)
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440
project: added all source files and assets
r1 try:
if flavored:
source = cls._flavored_markdown(source)
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 rendered = markdown_renderer.convert(source)
if clean_html:
rendered = cls.bleach_clean(rendered)
return rendered
project: added all source files and assets
r1 except Exception:
log.exception('Error when rendering Markdown')
if safe:
markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown
r318 log.debug('Fallback to render in plain mode')
project: added all source files and assets
r1 return cls.plain(source)
else:
raise
@classmethod
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 def rst(cls, source, safe=True, mentions=False, clean_html=False):
project: added all source files and assets
r1 if mentions:
mention_pat = re.compile(MENTIONS_REGEX)
def wrapp(match_obj):
uname = match_obj.groups()[0]
return ' **@%(uname)s** ' % {'uname': uname}
mention_hl = mention_pat.sub(wrapp, source).strip()
# we extracted mentions render with this using Mentions false
return cls.rst(mention_hl, safe=safe, mentions=False)
source = safe_unicode(source)
try:
markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown
r318 docutils_settings = dict(
[(alias, None) for alias in
cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
project: added all source files and assets
r1
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 docutils_settings.update({
'input_encoding': 'unicode', 'report_level': 4})
project: added all source files and assets
r1
for k, v in docutils_settings.iteritems():
directives.register_directive(k, v)
parts = publish_parts(source=source,
security: use custom writer for RST rendering to prevent injection of javascript: tags.
r1833 writer=RhodeCodeWriter(),
project: added all source files and assets
r1 settings_overrides=docutils_settings)
markdown: use bleach to cleanup html from markdown. This also enabled strict...
r2440 rendered = parts["fragment"]
if clean_html:
rendered = cls.bleach_clean(rendered)
return parts['html_title'] + rendered
project: added all source files and assets
r1 except Exception:
log.exception('Error when rendering RST')
if safe:
log.debug('Fallbacking to render in plain mode')
return cls.plain(source)
else:
raise
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 @classmethod
jupyter-rendering: added a custom preprocessor to implement Javascript object...
r1495 def jupyter(cls, source, safe=True):
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 from rhodecode.lib import helpers
jupyter-rendering: added a custom preprocessor to implement Javascript object...
r1495
from traitlets.config import Config
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 import nbformat
from nbconvert import HTMLExporter
jupyter-rendering: added a custom preprocessor to implement Javascript object...
r1495 from nbconvert.preprocessors import Preprocessor
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491
class CustomHTMLExporter(HTMLExporter):
def _template_file_default(self):
return 'basic'
jupyter-rendering: added a custom preprocessor to implement Javascript object...
r1495 class Sandbox(Preprocessor):
def preprocess(self, nb, resources):
sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
for cell in nb['cells']:
if safe and 'outputs' in cell:
for cell_output in cell['outputs']:
if 'data' in cell_output:
if 'application/javascript' in cell_output['data']:
cell_output['data']['text/plain'] = sandbox_text
cell_output['data'].pop('application/javascript', None)
return nb, resources
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 def _sanitize_resources(resources):
"""
Skip/sanitize some of the CSS generated and included in jupyter
so it doesn't messes up UI so much
"""
# TODO(marcink): probably we should replace this with whole custom
# CSS set that doesn't screw up, but jupyter generated html has some
# special markers, so it requires Custom HTML exporter template with
# _default_template_path_default, to achieve that
# strip the reset CSS
resources[0] = resources[0][resources[0].find('/*! Source'):]
return resources
def as_html(notebook):
conf = Config()
jupyter-rendering: added a custom preprocessor to implement Javascript object...
r1495 conf.CustomHTMLExporter.preprocessors = [Sandbox]
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 html_exporter = CustomHTMLExporter(config=conf)
(body, resources) = html_exporter.from_notebook_node(notebook)
header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
js = MakoTemplate(r'''
<!-- Load mathjax -->
<!-- MathJax configuration -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
TeX: {
extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
},
tex2jax: {
inlineMath: [ ['$','$'], ["\\(","\\)"] ],
displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
processEscapes: true,
processEnvironments: true
},
// Center justify equations in code and markdown cells. Elsewhere
// we use CSS to left justify single line equations in code cells.
displayAlign: 'center',
"HTML-CSS": {
styles: {'.MathJax_Display': {"margin": 0}},
jupyter-rendering: limit fonts types to the default ones....
r1492 linebreaks: { automatic: true },
availableFonts: ["STIX", "TeX"]
jupyter-rendering: added rendering of notebook into MarkupRenderer class.
r1491 },
showMathMenu: false
});
</script>
<!-- End of mathjax configuration -->
<script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
''').render(h=helpers)
css = '<style>{}</style>'.format(
''.join(_sanitize_resources(resources['inlining']['css'])))
body = '\n'.join([header, css, js, body])
return body, resources
notebook = nbformat.reads(source, as_version=4)
(body, resources) = as_html(notebook)
return body
project: added all source files and assets
r1
class RstTemplateRenderer(object):
def __init__(self):
base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
self.template_store = TemplateLookup(
directories=rst_template_dirs,
input_encoding='utf-8',
imports=['from rhodecode.lib import helpers as h'])
def _get_template(self, templatename):
return self.template_store.get_template(templatename)
def render(self, template_name, **kwargs):
template = self._get_template(template_name)
return template.render(**kwargs)