"""Markdown filters This file contains a collection of utility filters for dealing with markdown within Jinja templates. """ # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. from __future__ import print_function # Stdlib imports import os import subprocess from io import TextIOWrapper, BytesIO import re import mistune from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.util import ClassNotFound # IPython imports from IPython.nbconvert.filters.strings import add_anchor from IPython.nbconvert.utils.pandoc import pandoc from IPython.nbconvert.utils.exceptions import ConversionException from IPython.utils.decorators import undoc from IPython.utils.process import get_output_error_code from IPython.utils.py3compat import cast_bytes from IPython.utils.version import check_version marked = os.path.join(os.path.dirname(__file__), "marked.js") _node = None __all__ = [ 'markdown2html', 'markdown2html_pandoc', 'markdown2html_marked', 'markdown2html_mistune', 'markdown2latex', 'markdown2rst', ] class NodeJSMissing(ConversionException): """Exception raised when node.js is missing.""" pass def markdown2latex(source, markup='markdown', extra_args=None): """Convert a markdown string to LaTeX via pandoc. This function will raise an error if pandoc is not installed. Any error messages generated by pandoc are printed to stderr. Parameters ---------- source : string Input string, assumed to be valid markdown. markup : string Markup used by pandoc's reader default : pandoc extended markdown (see http://johnmacfarlane.net/pandoc/README.html#pandocs-markdown) Returns ------- out : string Output as returned by pandoc. """ return pandoc(source, markup, 'latex', extra_args=extra_args) @undoc class MathBlockGrammar(mistune.BlockGrammar): block_math = re.compile("^\$\$(.*?)\$\$", re.DOTALL) latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL) @undoc class MathBlockLexer(mistune.BlockLexer): default_features = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_features def __init__(self, rules=None, **kwargs): if rules is None: rules = MathBlockGrammar() super(MathBlockLexer, self).__init__(rules, **kwargs) def parse_block_math(self, m): """Parse a $$math$$ block""" self.tokens.append({ 'type': 'block_math', 'text': m.group(1) }) def parse_latex_environment(self, m): self.tokens.append({ 'type': 'latex_environment', 'name': m.group(1), 'text': m.group(2) }) @undoc class MathInlineGrammar(mistune.InlineGrammar): math = re.compile("^\$(.+?)\$") text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)') @undoc class MathInlineLexer(mistune.InlineLexer): default_features = ['math'] + mistune.InlineLexer.default_features def __init__(self, renderer, rules=None, **kwargs): if rules is None: rules = MathInlineGrammar() super(MathInlineLexer, self).__init__(renderer, rules, **kwargs) def output_math(self, m): return self.renderer.inline_math(m.group(1)) @undoc class MarkdownWithMath(mistune.Markdown): def __init__(self, renderer, **kwargs): if 'inline' not in kwargs: kwargs['inline'] = MathInlineLexer if 'block' not in kwargs: kwargs['block'] = MathBlockLexer super(MarkdownWithMath, self).__init__(renderer, **kwargs) def parse_block_math(self): return self.renderer.block_math(self.token['text']) def parse_latex_environment(self): return self.renderer.latex_environment(self.token['name'], self.token['text']) @undoc class IPythonRenderer(mistune.Renderer): def block_code(self, code, lang): if lang: try: lexer = get_lexer_by_name(lang, stripall=True) except ClassNotFound: code = lang + '\n' + code lang = None if not lang: return '\n<pre><code>%s</code></pre>\n' % \ mistune.escape(code) formatter = HtmlFormatter() return highlight(code, lexer, formatter) def header(self, text, level, raw=None): html = super(IPythonRenderer, self).header(text, level, raw=raw) return add_anchor(html) # Pass math through unaltered - mathjax does the rendering in the browser def block_math(self, text): return '$$%s$$' % text def latex_environment(self, name, text): return r'\begin{%s}%s\end{%s}' % (name, text, name) def inline_math(self, text): return '$%s$' % text def markdown2html_mistune(source): """Convert a markdown string to HTML using mistune""" return MarkdownWithMath(renderer=IPythonRenderer()).render(source) def markdown2html_pandoc(source, extra_args=None): """Convert a markdown string to HTML via pandoc""" extra_args = extra_args or ['--mathjax'] return pandoc(source, 'markdown', 'html', extra_args=extra_args) def _find_nodejs(): global _node if _node is None: # prefer md2html via marked if node.js >= 0.9.12 is available # node is called nodejs on debian, so try that first _node = 'nodejs' if not _verify_node(_node): _node = 'node' return _node def markdown2html_marked(source, encoding='utf-8'): """Convert a markdown string to HTML via marked""" command = [_find_nodejs(), marked] try: p = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE ) except OSError as e: raise NodeJSMissing( "The command '%s' returned an error: %s.\n" % (" ".join(command), e) + "Please check that Node.js is installed." ) out, _ = p.communicate(cast_bytes(source, encoding)) out = TextIOWrapper(BytesIO(out), encoding, 'replace').read() return out.rstrip('\n') # The mistune renderer is the default, because it's simple to depend on it markdown2html = markdown2html_mistune def markdown2rst(source, extra_args=None): """Convert a markdown string to ReST via pandoc. This function will raise an error if pandoc is not installed. Any error messages generated by pandoc are printed to stderr. Parameters ---------- source : string Input string, assumed to be valid markdown. Returns ------- out : string Output as returned by pandoc. """ return pandoc(source, 'markdown', 'rst', extra_args=extra_args) def _verify_node(cmd): """Verify that the node command exists and is at least the minimum supported version of node. Parameters ---------- cmd : string Node command to verify (i.e 'node').""" try: out, err, return_code = get_output_error_code([cmd, '--version']) except OSError: # Command not found return False if return_code: # Command error return False return check_version(out.lstrip('v'), '0.9.12')