rhodecode-enterprise-ce Files · rhodecode/lib/markup_renderer.py

dependencies: bring back supervisor to be able to run deamonized rhodecode without control.

marcink - - Load All Authors

File last commit:

r2090:f1192728 default


                r2378:8a80782a

default

Download file

             markup_renderer.py
        
                    499 lines
            
             | 17.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / lib / markup_renderer.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marcink
    
project: added all source files and assets

              r1
            
      # -*- coding: utf-8 -*-

        marcink
    
license: updated copyright year to 2017

              r1271
            
      # Copyright (C) 2011-2017 RhodeCode GmbH

        marcink
    
project: added all source files and assets

              r1
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      """

      Renderer for markup languages with ability to parse using rst or markdown

      """

      import re

      import os

        marcink
    
markup-rendering: added relative image support....

              r1527
            
      import lxml

        marcink
    
project: added all source files and assets

              r1
            
      import logging

        marcink
    
markup-rendering: added relative image support....

              r1527
            
      import urlparse

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
        marcink
    
project: added all source files and assets

              r1
            
      from mako.lookup import TemplateLookup

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
      from mako.template import Template as MakoTemplate

        marcink
    
project: added all source files and assets

              r1
            
      from docutils.core import publish_parts

      from docutils.parsers.rst import directives

        marcink
    
security: use custom writer for RST rendering to prevent injection of javascript: tags.

              r1833
            
      from docutils import writers

      from docutils.writers import html4css1

        marcink
    
project: added all source files and assets

              r1
            
      import markdown

        marcink
    
markup-rendering: added relative image support....

              r1527
            
      from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension

      from rhodecode.lib.utils2 import (

          safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)

        marcink
    
project: added all source files and assets

              r1
            
      log = logging.getLogger(__name__)

      # default renderer used to generate automated comments

      DEFAULT_COMMENTS_RENDERER = 'rst'

        marcink
    
security: use custom writer for RST rendering to prevent injection of javascript: tags.

              r1833
            
      class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):

          """

          Custom HTML Translator used for sandboxing potential

          JS injections in ref links

          """

          def visit_reference(self, node):

              if 'refuri' in node.attributes:

                  refuri = node['refuri']

                  if ':' in refuri:

                      prefix, link = refuri.lstrip().split(':', 1)

                      if prefix == 'javascript':

                          # we don't allow javascript type of refs...

                          node['refuri'] = 'javascript:alert("SandBoxedJavascript")'

              # old style class requires this...

              return html4css1.HTMLTranslator.visit_reference(self, node)

      class RhodeCodeWriter(writers.html4css1.Writer):

          def __init__(self):

              writers.Writer.__init__(self)

              self.translator_class = CustomHTMLTranslator

        marcink
    
markup: make relative links pint to raw files for images and to standard files as links....

              r2003
            
      def relative_links(html_source, server_paths):

        marcink
    
makrup-renderer: fix some cases which could cause lxml errors, skip js flags

              r1529
            
          if not html_source:

              return html_source

          try:

        marcink
    
markup: allow better lxml import failure detection....

              r2002
            
              from lxml.html import fromstring

              from lxml.html import tostring

          except ImportError:

              log.exception('Failed to import lxml')

              return html_source

          try:

        marcink
    
makrup-renderer: fix some cases which could cause lxml errors, skip js flags

              r1529
            
              doc = lxml.html.fromstring(html_source)

          except Exception:

              return html_source

        marcink
    
markup-rendering: added relative image support....

              r1527
            
          for el in doc.cssselect('img, video'):

        marcink
    
markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.

              r1840
            
              src = el.attrib.get('src')

        marcink
    
markup-rendering: added relative image support....

              r1527
            
              if src:

        marcink
    
markup: make relative links pint to raw files for images and to standard files as links....

              r2003
            
                  el.attrib['src'] = relative_path(src, server_paths['raw'])

        marcink
    
markup-rendering: added relative image support....

              r1527
            
          for el in doc.cssselect('a:not(.gfm)'):

        marcink
    
markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.

              r1840
            
              src = el.attrib.get('href')

        marcink
    
markup-rendering: added relative image support....

              r1527
            
              if src:

        marcink
    
markup: make relative links pint to raw files for images and to standard files as links....

              r2003
            
                  raw_mode = el.attrib['href'].endswith('?raw=1')

                  if raw_mode:

                      el.attrib['href'] = relative_path(src, server_paths['raw'])

                  else:

                      el.attrib['href'] = relative_path(src, server_paths['standard'])

        marcink
    
markup-rendering: added relative image support....

              r1527
            
          return lxml.html.tostring(doc)

      def relative_path(path, request_path, is_repo_file=None):

          """

          relative link support, path is a rel path, and request_path is current

          server path (not absolute)

          e.g.

          path = '../logo.png'

          request_path= '/repo/files/path/file.md'

          produces: '/repo/files/logo.png'

          """

          # TODO(marcink): unicode/str support ?

          # maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

          def dummy_check(p):

              return True  # assume default is a valid file path

          is_repo_file = is_repo_file or dummy_check

          if not path:

              return request_path

          path = safe_unicode(path)

          request_path = safe_unicode(request_path)

        marcink
    
makrup-renderer: fix some cases which could cause lxml errors, skip js flags

              r1529
            
          if path.startswith((u'data:', u'javascript:', u'#', u':')):

        marcink
    
markup-rendering: added relative image support....

              r1527
            
              # skip data, anchor, invalid links

              return path

          is_absolute = bool(urlparse.urlparse(path).netloc)

          if is_absolute:

              return path

          if not request_path:

              return path

          if path.startswith(u'/'):

              path = path[1:]

          if path.startswith(u'./'):

              path = path[2:]

          parts = request_path.split('/')

          # compute how deep we need to traverse the request_path

          depth = 0

          if is_repo_file(request_path):

              # if request path is a VALID file, we use a relative path with

              # one level up

              depth += 1

          while path.startswith(u'../'):

              depth += 1

              path = path[3:]

          if depth > 0:

              parts = parts[:-depth]

          parts.append(path)

          final_path = u'/'.join(parts).lstrip(u'/')

          return u'/' + final_path

        marcink
    
project: added all source files and assets

              r1
            
      class MarkupRenderer(object):

          RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

          MARKDOWN_PAT = re.compile(r'\.(md|mkdn?|mdown|markdown)$', re.IGNORECASE)

          RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
          JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)

        marcink
    
project: added all source files and assets

              r1
            
          PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

        marcink
    
markup: use cached version of http pattern for urlify_text. This...

              r2090
            
          URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

                               r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

        marcink
    
markup-renderer: use global Markdown object to speed up markdown rendering.

              r1353
            
          extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']

          markdown_renderer = markdown.Markdown(

              extensions, safe_mode=True, enable_attributes=False)

          markdown_renderer_flavored = markdown.Markdown(

              extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,

              enable_attributes=False)

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
          # extension together with weights. Lower is first means we control how

          # extensions are attached to readme names with those.

          PLAIN_EXTS = [

        johbo
    
renderer: Clean up obsolete code...

              r773
            
              # prefer no extension

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
              ('', 0),  # special case that renders READMES names without extension

              ('.text', 2), ('.TEXT', 2),

              ('.txt', 3), ('.TXT', 3)

          ]

          RST_EXTS = [

              ('.rst', 1), ('.rest', 1),

              ('.RST', 2), ('.REST', 2)

          ]

          MARKDOWN_EXTS = [

              ('.md', 1), ('.MD', 1),

              ('.mkdn', 2), ('.MKDN', 2),

              ('.mdown', 3), ('.MDOWN', 3),

              ('.markdown', 4), ('.MARKDOWN', 4)

          ]

        marcink
    
project: added all source files and assets

              r1
            
          def _detect_renderer(self, source, filename=None):

              """

              runs detection of what renderer should be used for generating html

              from a markup language

              filename can be also explicitly a renderer name

              :param source:

              :param filename:

              """

              if MarkupRenderer.MARKDOWN_PAT.findall(filename):

                  detected_renderer = 'markdown'

              elif MarkupRenderer.RST_PAT.findall(filename):

                  detected_renderer = 'rst'

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
              elif MarkupRenderer.JUPYTER_PAT.findall(filename):

                  detected_renderer = 'jupyter'

        marcink
    
project: added all source files and assets

              r1
            
              elif MarkupRenderer.PLAIN_PAT.findall(filename):

        marcink
    
renderer: don't render plaintext files as RST

              r1289
            
                  detected_renderer = 'plain'

        marcink
    
project: added all source files and assets

              r1
            
              else:

                  detected_renderer = 'plain'

              return getattr(MarkupRenderer, detected_renderer)

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
          @classmethod

          def renderer_from_filename(cls, filename, exclude):

              """

        marcink
    
renderer: fixed the helper funtion to original version. This...

              r401
            
              Detect renderer markdown/rst from filename and optionally use exclude

              list to remove some options. This is mostly used in helpers.

              Returns None when no renderer can be detected.

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
              """

              def _filter(elements):

                  if isinstance(exclude, (list, tuple)):

                      return [x for x in elements if x not in exclude]

                  return elements

              if filename.endswith(

                      tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):

                  return 'markdown'

              if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):

                  return 'rst'

        marcink
    
renderer: fixed the helper funtion to original version. This...

              r401
            
              return None

        marcink
    
readme/markup: improved order of generating readme files. Fixes #4050...

              r396
            
        marcink
    
project: added all source files and assets

              r1
            
          def render(self, source, filename=None):

              """

              Renders a given filename using detected renderer

              it detects renderers based on file extension or mimetype.

              At last it will just do a simple html replacing new lines with <br/>

              :param file_name:

              :param source:

              """

              renderer = self._detect_renderer(source, filename)

              readme_data = renderer(source)

              return readme_data

          @classmethod

          def _flavored_markdown(cls, text):

              """

              Github style flavored markdown

              :param text:

              """

              # Extract pre blocks.

              extractions = {}

              def pre_extraction_callback(matchobj):

                  digest = md5_safe(matchobj.group(0))

                  extractions[digest] = matchobj.group(0)

                  return "{gfm-extraction-%s}" % digest

              pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

              text = re.sub(pattern, pre_extraction_callback, text)

              # Prevent foo_bar_baz from ending up with an italic word in the middle.

              def italic_callback(matchobj):

                  s = matchobj.group(0)

                  if list(s).count('_') >= 2:

                      return s.replace('_', r'\_')

                  return s

              text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

              # Insert pre block extractions.

              def pre_insert_callback(matchobj):

                  return '\n\n' + extractions[matchobj.group(1)]

              text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',

                            pre_insert_callback, text)

              return text

          @classmethod

          def urlify_text(cls, text):

              def url_func(match_obj):

                  url_full = match_obj.groups()[0]

                  return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

        marcink
    
markup: use cached version of http pattern for urlify_text. This...

              r2090
            
              return cls.URL_PAT.sub(url_func, text)

        marcink
    
project: added all source files and assets

              r1
            
          @classmethod

          def plain(cls, source, universal_newline=True):

              source = safe_unicode(source)

              if universal_newline:

                  newline = '\n'

                  source = newline.join(source.splitlines())

              source = cls.urlify_text(source)

              return '<br />' + source.replace("\n", '<br />')

          @classmethod

        marcink
    
markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown

              r318
            
          def markdown(cls, source, safe=True, flavored=True, mentions=False):

        marcink
    
project: added all source files and assets

              r1
            
              # It does not allow to insert inline HTML. In presence of HTML tags, it

              # will replace them instead with [HTML_REMOVED]. This is controlled by

              # the safe_mode=True parameter of the markdown method.

        marcink
    
markup-renderer: use global Markdown object to speed up markdown rendering.

              r1353
            
        marcink
    
project: added all source files and assets

              r1
            
              if flavored:

        marcink
    
markup-renderer: use global Markdown object to speed up markdown rendering.

              r1353
            
                  markdown_renderer = cls.markdown_renderer_flavored

              else:

                  markdown_renderer = cls.markdown_renderer

        marcink
    
project: added all source files and assets

              r1
            
              if mentions:

                  mention_pat = re.compile(MENTIONS_REGEX)

                  def wrapp(match_obj):

                      uname = match_obj.groups()[0]

                      return ' **@%(uname)s** ' % {'uname': uname}

                  mention_hl = mention_pat.sub(wrapp, source).strip()

                  # we extracted mentions render with this using Mentions false

                  return cls.markdown(mention_hl, safe=safe, flavored=flavored,

                                      mentions=False)

              source = safe_unicode(source)

              try:

                  if flavored:

                      source = cls._flavored_markdown(source)

        marcink
    
markup-renderer: use global Markdown object to speed up markdown rendering.

              r1353
            
                  return markdown_renderer.convert(source)

        marcink
    
project: added all source files and assets

              r1
            
              except Exception:

                  log.exception('Error when rendering Markdown')

                  if safe:

        marcink
    
markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown

              r318
            
                      log.debug('Fallback to render in plain mode')

        marcink
    
project: added all source files and assets

              r1
            
                      return cls.plain(source)

                  else:

                      raise

          @classmethod

          def rst(cls, source, safe=True, mentions=False):

              if mentions:

                  mention_pat = re.compile(MENTIONS_REGEX)

                  def wrapp(match_obj):

                      uname = match_obj.groups()[0]

                      return ' **@%(uname)s** ' % {'uname': uname}

                  mention_hl = mention_pat.sub(wrapp, source).strip()

                  # we extracted mentions render with this using Mentions false

                  return cls.rst(mention_hl, safe=safe, mentions=False)

              source = safe_unicode(source)

              try:

        marcink
    
markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown

              r318
            
                  docutils_settings = dict(

                      [(alias, None) for alias in

                       cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

        marcink
    
project: added all source files and assets

              r1
            
                  docutils_settings.update({'input_encoding': 'unicode',

                                            'report_level': 4})

                  for k, v in docutils_settings.iteritems():

                      directives.register_directive(k, v)

                  parts = publish_parts(source=source,

        marcink
    
security: use custom writer for RST rendering to prevent injection of javascript: tags.

              r1833
            
                                        writer=RhodeCodeWriter(),

        marcink
    
project: added all source files and assets

              r1
            
                                        settings_overrides=docutils_settings)

                  return parts['html_title'] + parts["fragment"]

              except Exception:

                  log.exception('Error when rendering RST')

                  if safe:

                      log.debug('Fallbacking to render in plain mode')

                      return cls.plain(source)

                  else:

                      raise

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
          @classmethod

        marcink
    
jupyter-rendering: added a custom preprocessor to implement Javascript object...

              r1495
            
          def jupyter(cls, source, safe=True):

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
              from rhodecode.lib import helpers

        marcink
    
jupyter-rendering: added a custom preprocessor to implement Javascript object...

              r1495
            
              from traitlets.config import Config

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
              import nbformat

              from nbconvert import HTMLExporter

        marcink
    
jupyter-rendering: added a custom preprocessor to implement Javascript object...

              r1495
            
              from nbconvert.preprocessors import Preprocessor

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
              class CustomHTMLExporter(HTMLExporter):

                  def _template_file_default(self):

                      return 'basic'

        marcink
    
jupyter-rendering: added a custom preprocessor to implement Javascript object...

              r1495
            
              class Sandbox(Preprocessor):

                  def preprocess(self, nb, resources):

                      sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'

                      for cell in nb['cells']:

                          if safe and 'outputs' in cell:

                              for cell_output in cell['outputs']:

                                  if 'data' in cell_output:

                                      if 'application/javascript' in cell_output['data']:

                                          cell_output['data']['text/plain'] = sandbox_text

                                          cell_output['data'].pop('application/javascript', None)

                      return nb, resources

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
              def _sanitize_resources(resources):

                  """

                  Skip/sanitize some of the CSS generated and included in jupyter

                  so it doesn't messes up UI so much

                  """

                  # TODO(marcink): probably we should replace this with whole custom

                  # CSS set that doesn't screw up, but jupyter generated html has some

                  # special markers, so it requires Custom HTML exporter template with

                  # _default_template_path_default, to achieve that

                  # strip the reset CSS

                  resources[0] = resources[0][resources[0].find('/*! Source'):]

                  return resources

              def as_html(notebook):

                  conf = Config()

        marcink
    
jupyter-rendering: added a custom preprocessor to implement Javascript object...

              r1495
            
                  conf.CustomHTMLExporter.preprocessors = [Sandbox]

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
                  html_exporter = CustomHTMLExporter(config=conf)

                  (body, resources) = html_exporter.from_notebook_node(notebook)

                  header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'

                  js = MakoTemplate(r'''

                  <!-- Load mathjax -->

                      <!-- MathJax configuration -->

                      <script type="text/x-mathjax-config">

                      MathJax.Hub.Config({

                          jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],

                          extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],

                          TeX: {

                              extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]

                          },

                          tex2jax: {

                              inlineMath: [ ['$','$'], ["\\(","\\)"] ],

                              displayMath: [ ['$$','$$'], ["\\[","\\]"] ],

                              processEscapes: true,

                              processEnvironments: true

                          },

                          // Center justify equations in code and markdown cells. Elsewhere

                          // we use CSS to left justify single line equations in code cells.

                          displayAlign: 'center',

                          "HTML-CSS": {

                              styles: {'.MathJax_Display': {"margin": 0}},

        marcink
    
jupyter-rendering: limit fonts types to the default ones....

              r1492
            
                              linebreaks: { automatic: true },

                              availableFonts: ["STIX", "TeX"]

        marcink
    
jupyter-rendering: added rendering of notebook into MarkupRenderer class.

              r1491
            
                          },

                          showMathMenu: false

                      });

                      </script>

                      <!-- End of mathjax configuration -->

                      <script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>

                  ''').render(h=helpers)

                  css = '<style>{}</style>'.format(

                      ''.join(_sanitize_resources(resources['inlining']['css'])))

                  body = '\n'.join([header, css, js, body])

                  return body, resources

              notebook = nbformat.reads(source, as_version=4)

              (body, resources) = as_html(notebook)

              return body

        marcink
    
project: added all source files and assets

              r1
            
      class RstTemplateRenderer(object):

          def __init__(self):

              base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

              rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]

              self.template_store = TemplateLookup(

                  directories=rst_template_dirs,

                  input_encoding='utf-8',

                  imports=['from rhodecode.lib import helpers as h'])

          def _get_template(self, templatename):

              return self.template_store.get_template(templatename)

          def render(self, template_name, **kwargs):

              template = self._get_template(template_name)

              return template.render(**kwargs)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marcink project: added all source files and assets	r1	# -- coding: utf-8 --

marcink license: updated copyright year to 2017	r1271	# Copyright (C) 2011-2017 RhodeCode GmbH
marcink project: added all source files and assets	r1	#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/


		"""
		Renderer for markup languages with ability to parse using rst or markdown
		"""

		import re
		import os
marcink markup-rendering: added relative image support....	r1527	import lxml
marcink project: added all source files and assets	r1	import logging
marcink markup-rendering: added relative image support....	r1527	import urlparse
marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396
marcink project: added all source files and assets	r1	from mako.lookup import TemplateLookup
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	from mako.template import Template as MakoTemplate
marcink project: added all source files and assets	r1
		from docutils.core import publish_parts
		from docutils.parsers.rst import directives
marcink security: use custom writer for RST rendering to prevent injection of javascript: tags.	r1833	from docutils import writers
		from docutils.writers import html4css1
marcink project: added all source files and assets	r1	import markdown

marcink markup-rendering: added relative image support....	r1527	from rhodecode.lib.markdown_ext import GithubFlavoredMarkdownExtension
		from rhodecode.lib.utils2 import (
		safe_str, safe_unicode, md5_safe, MENTIONS_REGEX)
marcink project: added all source files and assets	r1
		log = logging.getLogger(__name__)

		# default renderer used to generate automated comments
		DEFAULT_COMMENTS_RENDERER = 'rst'


marcink security: use custom writer for RST rendering to prevent injection of javascript: tags.	r1833	class CustomHTMLTranslator(writers.html4css1.HTMLTranslator):
		"""
		Custom HTML Translator used for sandboxing potential
		JS injections in ref links
		"""

		def visit_reference(self, node):
		if 'refuri' in node.attributes:
		refuri = node['refuri']
		if ':' in refuri:
		prefix, link = refuri.lstrip().split(':', 1)
		if prefix == 'javascript':
		# we don't allow javascript type of refs...
		node['refuri'] = 'javascript:alert("SandBoxedJavascript")'

		# old style class requires this...
		return html4css1.HTMLTranslator.visit_reference(self, node)


		class RhodeCodeWriter(writers.html4css1.Writer):
		def __init__(self):
		writers.Writer.__init__(self)
		self.translator_class = CustomHTMLTranslator


marcink markup: make relative links pint to raw files for images and to standard files as links....	r2003	def relative_links(html_source, server_paths):
marcink makrup-renderer: fix some cases which could cause lxml errors, skip js flags	r1529	if not html_source:
		return html_source

		try:
marcink markup: allow better lxml import failure detection....	r2002	from lxml.html import fromstring
		from lxml.html import tostring
		except ImportError:
		log.exception('Failed to import lxml')
		return html_source

		try:
marcink makrup-renderer: fix some cases which could cause lxml errors, skip js flags	r1529	doc = lxml.html.fromstring(html_source)
		except Exception:
		return html_source

marcink markup-rendering: added relative image support....	r1527	for el in doc.cssselect('img, video'):
marcink markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.	r1840	src = el.attrib.get('src')
marcink markup-rendering: added relative image support....	r1527	if src:
marcink markup: make relative links pint to raw files for images and to standard files as links....	r2003	el.attrib['src'] = relative_path(src, server_paths['raw'])
marcink markup-rendering: added relative image support....	r1527
		for el in doc.cssselect('a:not(.gfm)'):
marcink markup-renderer: use safe fetching of attributes to prevent from errors on malformed html.	r1840	src = el.attrib.get('href')
marcink markup-rendering: added relative image support....	r1527	if src:
marcink markup: make relative links pint to raw files for images and to standard files as links....	r2003	raw_mode = el.attrib['href'].endswith('?raw=1')
		if raw_mode:
		el.attrib['href'] = relative_path(src, server_paths['raw'])
		else:
		el.attrib['href'] = relative_path(src, server_paths['standard'])
marcink markup-rendering: added relative image support....	r1527
		return lxml.html.tostring(doc)


		def relative_path(path, request_path, is_repo_file=None):
		"""
		relative link support, path is a rel path, and request_path is current
		server path (not absolute)

		e.g.

		path = '../logo.png'
		request_path= '/repo/files/path/file.md'
		produces: '/repo/files/logo.png'
		"""
		# TODO(marcink): unicode/str support ?
		# maybe=> safe_unicode(urllib.quote(safe_str(final_path), '/:'))

		def dummy_check(p):
		return True # assume default is a valid file path

		is_repo_file = is_repo_file or dummy_check
		if not path:
		return request_path

		path = safe_unicode(path)
		request_path = safe_unicode(request_path)

marcink makrup-renderer: fix some cases which could cause lxml errors, skip js flags	r1529	if path.startswith((u'data:', u'javascript:', u'#', u':')):
marcink markup-rendering: added relative image support....	r1527	# skip data, anchor, invalid links
		return path

		is_absolute = bool(urlparse.urlparse(path).netloc)
		if is_absolute:
		return path

		if not request_path:
		return path

		if path.startswith(u'/'):
		path = path[1:]

		if path.startswith(u'./'):
		path = path[2:]

		parts = request_path.split('/')
		# compute how deep we need to traverse the request_path
		depth = 0

		if is_repo_file(request_path):
		# if request path is a VALID file, we use a relative path with
		# one level up
		depth += 1

		while path.startswith(u'../'):
		depth += 1
		path = path[3:]

		if depth > 0:
		parts = parts[:-depth]

		parts.append(path)
		final_path = u'/'.join(parts).lstrip(u'/')

		return u'/' + final_path


marcink project: added all source files and assets	r1	class MarkupRenderer(object):
		RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

		MARKDOWN_PAT = re.compile(r'\.(md\|mkdn?\|mdown\|markdown)$', re.IGNORECASE)
		RST_PAT = re.compile(r'\.re?st$', re.IGNORECASE)
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	JUPYTER_PAT = re.compile(r'\.(ipynb)$', re.IGNORECASE)
marcink project: added all source files and assets	r1	PLAIN_PAT = re.compile(r'^readme$', re.IGNORECASE)

marcink markup: use cached version of http pattern for urlify_text. This...	r2090	URL_PAT = re.compile(r'(http[s]?://(?:[a-zA-Z]\|[0-9]\|[$-_@.&+]'
		r'\|[!*\(\),]\|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

marcink markup-renderer: use global Markdown object to speed up markdown rendering.	r1353	extensions = ['codehilite', 'extra', 'def_list', 'sane_lists']
		markdown_renderer = markdown.Markdown(
		extensions, safe_mode=True, enable_attributes=False)

		markdown_renderer_flavored = markdown.Markdown(
		extensions + [GithubFlavoredMarkdownExtension()], safe_mode=True,
		enable_attributes=False)

marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396	# extension together with weights. Lower is first means we control how
		# extensions are attached to readme names with those.
		PLAIN_EXTS = [
johbo renderer: Clean up obsolete code...	r773	# prefer no extension
marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396	('', 0), # special case that renders READMES names without extension
		('.text', 2), ('.TEXT', 2),
		('.txt', 3), ('.TXT', 3)
		]

		RST_EXTS = [
		('.rst', 1), ('.rest', 1),
		('.RST', 2), ('.REST', 2)
		]

		MARKDOWN_EXTS = [
		('.md', 1), ('.MD', 1),
		('.mkdn', 2), ('.MKDN', 2),
		('.mdown', 3), ('.MDOWN', 3),
		('.markdown', 4), ('.MARKDOWN', 4)
		]

marcink project: added all source files and assets	r1	def _detect_renderer(self, source, filename=None):
		"""
		runs detection of what renderer should be used for generating html
		from a markup language

		filename can be also explicitly a renderer name

		:param source:
		:param filename:
		"""

		if MarkupRenderer.MARKDOWN_PAT.findall(filename):
		detected_renderer = 'markdown'
		elif MarkupRenderer.RST_PAT.findall(filename):
		detected_renderer = 'rst'
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	elif MarkupRenderer.JUPYTER_PAT.findall(filename):
		detected_renderer = 'jupyter'
marcink project: added all source files and assets	r1	elif MarkupRenderer.PLAIN_PAT.findall(filename):
marcink renderer: don't render plaintext files as RST	r1289	detected_renderer = 'plain'
marcink project: added all source files and assets	r1	else:
		detected_renderer = 'plain'

		return getattr(MarkupRenderer, detected_renderer)

marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396	@classmethod
		def renderer_from_filename(cls, filename, exclude):
		"""
marcink renderer: fixed the helper funtion to original version. This...	r401	Detect renderer markdown/rst from filename and optionally use exclude
		list to remove some options. This is mostly used in helpers.
		Returns None when no renderer can be detected.
marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396	"""
		def _filter(elements):
		if isinstance(exclude, (list, tuple)):
		return [x for x in elements if x not in exclude]
		return elements

		if filename.endswith(
		tuple(_filter([x[0] for x in cls.MARKDOWN_EXTS if x[0]]))):
		return 'markdown'
		if filename.endswith(tuple(_filter([x[0] for x in cls.RST_EXTS if x[0]]))):
		return 'rst'

marcink renderer: fixed the helper funtion to original version. This...	r401	return None
marcink readme/markup: improved order of generating readme files. Fixes #4050...	r396
marcink project: added all source files and assets	r1	def render(self, source, filename=None):
		"""
		Renders a given filename using detected renderer
		it detects renderers based on file extension or mimetype.
		At last it will just do a simple html replacing new lines with <br/>

		:param file_name:
		:param source:
		"""

		renderer = self._detect_renderer(source, filename)
		readme_data = renderer(source)
		return readme_data

		@classmethod
		def _flavored_markdown(cls, text):
		"""
		Github style flavored markdown

		:param text:
		"""

		# Extract pre blocks.
		extractions = {}

		def pre_extraction_callback(matchobj):
		digest = md5_safe(matchobj.group(0))
		extractions[digest] = matchobj.group(0)
		return "{gfm-extraction-%s}" % digest
		pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE \| re.DOTALL)
		text = re.sub(pattern, pre_extraction_callback, text)

		# Prevent foo_bar_baz from ending up with an italic word in the middle.
		def italic_callback(matchobj):
		s = matchobj.group(0)
		if list(s).count('_') >= 2:
		return s.replace('_', r'\_')
		return s
		text = re.sub(r'^(?! {4}\|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

		# Insert pre block extractions.
		def pre_insert_callback(matchobj):
		return '\n\n' + extractions[matchobj.group(1)]
		text = re.sub(r'\{gfm-extraction-([0-9a-f]{32})\}',
		pre_insert_callback, text)

		return text

		@classmethod
		def urlify_text(cls, text):
		def url_func(match_obj):
		url_full = match_obj.groups()[0]
		return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

marcink markup: use cached version of http pattern for urlify_text. This...	r2090	return cls.URL_PAT.sub(url_func, text)
marcink project: added all source files and assets	r1
		@classmethod
		def plain(cls, source, universal_newline=True):
		source = safe_unicode(source)
		if universal_newline:
		newline = '\n'
		source = newline.join(source.splitlines())

		source = cls.urlify_text(source)
		return '<br />' + source.replace("\n", '<br />')

		@classmethod
marcink markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown	r318	def markdown(cls, source, safe=True, flavored=True, mentions=False):
marcink project: added all source files and assets	r1	# It does not allow to insert inline HTML. In presence of HTML tags, it
		# will replace them instead with [HTML_REMOVED]. This is controlled by
		# the safe_mode=True parameter of the markdown method.
marcink markup-renderer: use global Markdown object to speed up markdown rendering.	r1353
marcink project: added all source files and assets	r1	if flavored:
marcink markup-renderer: use global Markdown object to speed up markdown rendering.	r1353	markdown_renderer = cls.markdown_renderer_flavored
		else:
		markdown_renderer = cls.markdown_renderer
marcink project: added all source files and assets	r1
		if mentions:
		mention_pat = re.compile(MENTIONS_REGEX)

		def wrapp(match_obj):
		uname = match_obj.groups()[0]
		return ' @%(uname)s ' % {'uname': uname}
		mention_hl = mention_pat.sub(wrapp, source).strip()
		# we extracted mentions render with this using Mentions false
		return cls.markdown(mention_hl, safe=safe, flavored=flavored,
		mentions=False)

		source = safe_unicode(source)
		try:
		if flavored:
		source = cls._flavored_markdown(source)
marcink markup-renderer: use global Markdown object to speed up markdown rendering.	r1353	return markdown_renderer.convert(source)
marcink project: added all source files and assets	r1	except Exception:
		log.exception('Error when rendering Markdown')
		if safe:
marcink markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown	r318	log.debug('Fallback to render in plain mode')
marcink project: added all source files and assets	r1	return cls.plain(source)
		else:
		raise

		@classmethod
		def rst(cls, source, safe=True, mentions=False):
		if mentions:
		mention_pat = re.compile(MENTIONS_REGEX)

		def wrapp(match_obj):
		uname = match_obj.groups()[0]
		return ' @%(uname)s ' % {'uname': uname}
		mention_hl = mention_pat.sub(wrapp, source).strip()
		# we extracted mentions render with this using Mentions false
		return cls.rst(mention_hl, safe=safe, mentions=False)

		source = safe_unicode(source)
		try:
marcink markdown: enable gfm by default, this is much standard now and we should use it instead of plain markdown	r318	docutils_settings = dict(
		[(alias, None) for alias in
		cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
marcink project: added all source files and assets	r1
		docutils_settings.update({'input_encoding': 'unicode',
		'report_level': 4})

		for k, v in docutils_settings.iteritems():
		directives.register_directive(k, v)

		parts = publish_parts(source=source,
marcink security: use custom writer for RST rendering to prevent injection of javascript: tags.	r1833	writer=RhodeCodeWriter(),
marcink project: added all source files and assets	r1	settings_overrides=docutils_settings)

		return parts['html_title'] + parts["fragment"]
		except Exception:
		log.exception('Error when rendering RST')
		if safe:
		log.debug('Fallbacking to render in plain mode')
		return cls.plain(source)
		else:
		raise

marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	@classmethod
marcink jupyter-rendering: added a custom preprocessor to implement Javascript object...	r1495	def jupyter(cls, source, safe=True):
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	from rhodecode.lib import helpers
marcink jupyter-rendering: added a custom preprocessor to implement Javascript object...	r1495
		from traitlets.config import Config
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	import nbformat
		from nbconvert import HTMLExporter
marcink jupyter-rendering: added a custom preprocessor to implement Javascript object...	r1495	from nbconvert.preprocessors import Preprocessor
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491
		class CustomHTMLExporter(HTMLExporter):
		def _template_file_default(self):
		return 'basic'

marcink jupyter-rendering: added a custom preprocessor to implement Javascript object...	r1495	class Sandbox(Preprocessor):

		def preprocess(self, nb, resources):
		sandbox_text = 'SandBoxed(IPython.core.display.Javascript object)'
		for cell in nb['cells']:
		if safe and 'outputs' in cell:
		for cell_output in cell['outputs']:
		if 'data' in cell_output:
		if 'application/javascript' in cell_output['data']:
		cell_output['data']['text/plain'] = sandbox_text
		cell_output['data'].pop('application/javascript', None)
		return nb, resources

marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	def _sanitize_resources(resources):
		"""
		Skip/sanitize some of the CSS generated and included in jupyter
		so it doesn't messes up UI so much
		"""

		# TODO(marcink): probably we should replace this with whole custom
		# CSS set that doesn't screw up, but jupyter generated html has some
		# special markers, so it requires Custom HTML exporter template with
		# _default_template_path_default, to achieve that

		# strip the reset CSS
		resources[0] = resources[0][resources[0].find('/*! Source'):]
		return resources

		def as_html(notebook):
		conf = Config()
marcink jupyter-rendering: added a custom preprocessor to implement Javascript object...	r1495	conf.CustomHTMLExporter.preprocessors = [Sandbox]
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	html_exporter = CustomHTMLExporter(config=conf)

		(body, resources) = html_exporter.from_notebook_node(notebook)
		header = '<!-- ## IPYTHON NOTEBOOK RENDERING ## -->'
		js = MakoTemplate(r'''
		<!-- Load mathjax -->
		<!-- MathJax configuration -->
		<script type="text/x-mathjax-config">
		MathJax.Hub.Config({
		jax: ["input/TeX","output/HTML-CSS", "output/PreviewHTML"],
		extensions: ["tex2jax.js","MathMenu.js","MathZoom.js", "fast-preview.js", "AssistiveMML.js", "[Contrib]/a11y/accessibility-menu.js"],
		TeX: {
		extensions: ["AMSmath.js","AMSsymbols.js","noErrors.js","noUndefined.js"]
		},
		tex2jax: {
		inlineMath: [ ['$','$'], ["\\(","\\)"] ],
		displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
		processEscapes: true,
		processEnvironments: true
		},
		// Center justify equations in code and markdown cells. Elsewhere
		// we use CSS to left justify single line equations in code cells.
		displayAlign: 'center',
		"HTML-CSS": {
		styles: {'.MathJax_Display': {"margin": 0}},
marcink jupyter-rendering: limit fonts types to the default ones....	r1492	linebreaks: { automatic: true },
		availableFonts: ["STIX", "TeX"]
marcink jupyter-rendering: added rendering of notebook into MarkupRenderer class.	r1491	},
		showMathMenu: false
		});
		</script>
		<!-- End of mathjax configuration -->
		<script src="${h.asset('js/src/math_jax/MathJax.js')}"></script>
		''').render(h=helpers)

		css = '<style>{}</style>'.format(
		''.join(_sanitize_resources(resources['inlining']['css'])))

		body = '\n'.join([header, css, js, body])
		return body, resources

		notebook = nbformat.reads(source, as_version=4)
		(body, resources) = as_html(notebook)
		return body

marcink project: added all source files and assets	r1
		class RstTemplateRenderer(object):

		def __init__(self):
		base = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
		rst_template_dirs = [os.path.join(base, 'templates', 'rst_templates')]
		self.template_store = TemplateLookup(
		directories=rst_template_dirs,
		input_encoding='utf-8',
		imports=['from rhodecode.lib import helpers as h'])

		def _get_template(self, templatename):
		return self.template_store.get_template(templatename)

		def render(self, template_name, **kwargs):
		template = self._get_template(template_name)
		return template.render(**kwargs)