##// END OF EJS Templates
allow passing extra args to pandoc filters...
MinRK -
Show More
@@ -1,225 +1,226
1 """Markdown filters
1 """Markdown filters
2
2
3 This file contains a collection of utility filters for dealing with
3 This file contains a collection of utility filters for dealing with
4 markdown within Jinja templates.
4 markdown within Jinja templates.
5 """
5 """
6 # Copyright (c) IPython Development Team.
6 # Copyright (c) IPython Development Team.
7 # Distributed under the terms of the Modified BSD License.
7 # Distributed under the terms of the Modified BSD License.
8
8
9 from __future__ import print_function
9 from __future__ import print_function
10
10
11 # Stdlib imports
11 # Stdlib imports
12 import os
12 import os
13 import subprocess
13 import subprocess
14 from io import TextIOWrapper, BytesIO
14 from io import TextIOWrapper, BytesIO
15 import re
15 import re
16
16
17 import mistune
17 import mistune
18 from pygments import highlight
18 from pygments import highlight
19 from pygments.lexers import get_lexer_by_name
19 from pygments.lexers import get_lexer_by_name
20 from pygments.formatters import HtmlFormatter
20 from pygments.formatters import HtmlFormatter
21 from pygments.util import ClassNotFound
21 from pygments.util import ClassNotFound
22
22
23 # IPython imports
23 # IPython imports
24 from IPython.nbconvert.utils.pandoc import pandoc
24 from IPython.nbconvert.utils.pandoc import pandoc
25 from IPython.nbconvert.utils.exceptions import ConversionException
25 from IPython.nbconvert.utils.exceptions import ConversionException
26 from IPython.utils.decorators import undoc
26 from IPython.utils.decorators import undoc
27 from IPython.utils.process import get_output_error_code
27 from IPython.utils.process import get_output_error_code
28 from IPython.utils.py3compat import cast_bytes
28 from IPython.utils.py3compat import cast_bytes
29 from IPython.utils.version import check_version
29 from IPython.utils.version import check_version
30
30
31
31
32 marked = os.path.join(os.path.dirname(__file__), "marked.js")
32 marked = os.path.join(os.path.dirname(__file__), "marked.js")
33 _node = None
33 _node = None
34
34
35 __all__ = [
35 __all__ = [
36 'markdown2html',
36 'markdown2html',
37 'markdown2html_pandoc',
37 'markdown2html_pandoc',
38 'markdown2html_marked',
38 'markdown2html_marked',
39 'markdown2html_mistune',
39 'markdown2html_mistune',
40 'markdown2latex',
40 'markdown2latex',
41 'markdown2rst',
41 'markdown2rst',
42 ]
42 ]
43
43
44 class NodeJSMissing(ConversionException):
44 class NodeJSMissing(ConversionException):
45 """Exception raised when node.js is missing."""
45 """Exception raised when node.js is missing."""
46 pass
46 pass
47
47
48 def markdown2latex(source):
48 def markdown2latex(source, extra_args=None):
49 """Convert a markdown string to LaTeX via pandoc.
49 """Convert a markdown string to LaTeX via pandoc.
50
50
51 This function will raise an error if pandoc is not installed.
51 This function will raise an error if pandoc is not installed.
52 Any error messages generated by pandoc are printed to stderr.
52 Any error messages generated by pandoc are printed to stderr.
53
53
54 Parameters
54 Parameters
55 ----------
55 ----------
56 source : string
56 source : string
57 Input string, assumed to be valid markdown.
57 Input string, assumed to be valid markdown.
58
58
59 Returns
59 Returns
60 -------
60 -------
61 out : string
61 out : string
62 Output as returned by pandoc.
62 Output as returned by pandoc.
63 """
63 """
64 return pandoc(source, 'markdown', 'latex')
64 return pandoc(source, 'markdown', 'latex', extra_args=extra_args)
65
65
66
66
67 @undoc
67 @undoc
68 class MathBlockGrammar(mistune.BlockGrammar):
68 class MathBlockGrammar(mistune.BlockGrammar):
69 block_math = re.compile("^\$\$(.*?)\$\$", re.DOTALL)
69 block_math = re.compile("^\$\$(.*?)\$\$", re.DOTALL)
70 latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
70 latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}",
71 re.DOTALL)
71 re.DOTALL)
72
72
73 @undoc
73 @undoc
74 class MathBlockLexer(mistune.BlockLexer):
74 class MathBlockLexer(mistune.BlockLexer):
75 default_features = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_features
75 default_features = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_features
76
76
77 def __init__(self, rules=None, **kwargs):
77 def __init__(self, rules=None, **kwargs):
78 if rules is None:
78 if rules is None:
79 rules = MathBlockGrammar()
79 rules = MathBlockGrammar()
80 super(MathBlockLexer, self).__init__(rules, **kwargs)
80 super(MathBlockLexer, self).__init__(rules, **kwargs)
81
81
82 def parse_block_math(self, m):
82 def parse_block_math(self, m):
83 """Parse a $$math$$ block"""
83 """Parse a $$math$$ block"""
84 self.tokens.append({
84 self.tokens.append({
85 'type': 'block_math',
85 'type': 'block_math',
86 'text': m.group(1)
86 'text': m.group(1)
87 })
87 })
88
88
89 def parse_latex_environment(self, m):
89 def parse_latex_environment(self, m):
90 self.tokens.append({
90 self.tokens.append({
91 'type': 'latex_environment',
91 'type': 'latex_environment',
92 'name': m.group(1),
92 'name': m.group(1),
93 'text': m.group(2)
93 'text': m.group(2)
94 })
94 })
95
95
96 @undoc
96 @undoc
97 class MathInlineGrammar(mistune.InlineGrammar):
97 class MathInlineGrammar(mistune.InlineGrammar):
98 math = re.compile("^\$(.+?)\$")
98 math = re.compile("^\$(.+?)\$")
99
99
100 @undoc
100 @undoc
101 class MathInlineLexer(mistune.InlineLexer):
101 class MathInlineLexer(mistune.InlineLexer):
102 default_features = ['math'] + mistune.InlineLexer.default_features
102 default_features = ['math'] + mistune.InlineLexer.default_features
103
103
104 def __init__(self, renderer, rules=None, **kwargs):
104 def __init__(self, renderer, rules=None, **kwargs):
105 if rules is None:
105 if rules is None:
106 rules = MathInlineGrammar()
106 rules = MathInlineGrammar()
107 super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)
107 super(MathInlineLexer, self).__init__(renderer, rules, **kwargs)
108
108
109 def output_math(self, m):
109 def output_math(self, m):
110 return self.renderer.inline_math(m.group(1))
110 return self.renderer.inline_math(m.group(1))
111
111
112 @undoc
112 @undoc
113 class MarkdownWithMath(mistune.Markdown):
113 class MarkdownWithMath(mistune.Markdown):
114 def __init__(self, renderer, **kwargs):
114 def __init__(self, renderer, **kwargs):
115 if 'inline' not in kwargs:
115 if 'inline' not in kwargs:
116 kwargs['inline'] = MathInlineLexer
116 kwargs['inline'] = MathInlineLexer
117 if 'block' not in kwargs:
117 if 'block' not in kwargs:
118 kwargs['block'] = MathBlockLexer
118 kwargs['block'] = MathBlockLexer
119 super(MarkdownWithMath, self).__init__(renderer, **kwargs)
119 super(MarkdownWithMath, self).__init__(renderer, **kwargs)
120
120
121 def parse_block_math(self):
121 def parse_block_math(self):
122 return self.renderer.block_math(self.token['text'])
122 return self.renderer.block_math(self.token['text'])
123
123
124 def parse_latex_environment(self):
124 def parse_latex_environment(self):
125 return self.renderer.latex_environment(self.token['name'], self.token['text'])
125 return self.renderer.latex_environment(self.token['name'], self.token['text'])
126
126
127 @undoc
127 @undoc
128 class IPythonRenderer(mistune.Renderer):
128 class IPythonRenderer(mistune.Renderer):
129 def block_code(self, code, lang):
129 def block_code(self, code, lang):
130 if lang:
130 if lang:
131 try:
131 try:
132 lexer = get_lexer_by_name(lang, stripall=True)
132 lexer = get_lexer_by_name(lang, stripall=True)
133 except ClassNotFound:
133 except ClassNotFound:
134 code = lang + '\n' + code
134 code = lang + '\n' + code
135 lang = None
135 lang = None
136
136
137 if not lang:
137 if not lang:
138 return '\n<pre><code>%s</code></pre>\n' % \
138 return '\n<pre><code>%s</code></pre>\n' % \
139 mistune.escape(code)
139 mistune.escape(code)
140
140
141 formatter = HtmlFormatter()
141 formatter = HtmlFormatter()
142 return highlight(code, lexer, formatter)
142 return highlight(code, lexer, formatter)
143
143
144 # Pass math through unaltered - mathjax does the rendering in the browser
144 # Pass math through unaltered - mathjax does the rendering in the browser
145 def block_math(self, text):
145 def block_math(self, text):
146 return '$$%s$$' % text
146 return '$$%s$$' % text
147
147
148 def latex_environment(self, name, text):
148 def latex_environment(self, name, text):
149 return r'\begin{%s}%s\end{%s}' % (name, text, name)
149 return r'\begin{%s}%s\end{%s}' % (name, text, name)
150
150
151 def inline_math(self, text):
151 def inline_math(self, text):
152 return '$%s$' % text
152 return '$%s$' % text
153
153
154 def markdown2html_mistune(source):
154 def markdown2html_mistune(source):
155 """Convert a markdown string to HTML using mistune"""
155 """Convert a markdown string to HTML using mistune"""
156 return MarkdownWithMath(renderer=IPythonRenderer()).render(source)
156 return MarkdownWithMath(renderer=IPythonRenderer()).render(source)
157
157
158 def markdown2html_pandoc(source):
158 def markdown2html_pandoc(source, extra_args=None):
159 """Convert a markdown string to HTML via pandoc"""
159 """Convert a markdown string to HTML via pandoc"""
160 return pandoc(source, 'markdown', 'html', extra_args=['--mathjax'])
160 extra_args = extra_args or ['--mathjax']
161 return pandoc(source, 'markdown', 'html', extra_args=extra_args)
161
162
162 def _find_nodejs():
163 def _find_nodejs():
163 global _node
164 global _node
164 if _node is None:
165 if _node is None:
165 # prefer md2html via marked if node.js >= 0.9.12 is available
166 # prefer md2html via marked if node.js >= 0.9.12 is available
166 # node is called nodejs on debian, so try that first
167 # node is called nodejs on debian, so try that first
167 _node = 'nodejs'
168 _node = 'nodejs'
168 if not _verify_node(_node):
169 if not _verify_node(_node):
169 _node = 'node'
170 _node = 'node'
170 return _node
171 return _node
171
172
172 def markdown2html_marked(source, encoding='utf-8'):
173 def markdown2html_marked(source, encoding='utf-8'):
173 """Convert a markdown string to HTML via marked"""
174 """Convert a markdown string to HTML via marked"""
174 command = [_find_nodejs(), marked]
175 command = [_find_nodejs(), marked]
175 try:
176 try:
176 p = subprocess.Popen(command,
177 p = subprocess.Popen(command,
177 stdin=subprocess.PIPE, stdout=subprocess.PIPE
178 stdin=subprocess.PIPE, stdout=subprocess.PIPE
178 )
179 )
179 except OSError as e:
180 except OSError as e:
180 raise NodeJSMissing(
181 raise NodeJSMissing(
181 "The command '%s' returned an error: %s.\n" % (" ".join(command), e) +
182 "The command '%s' returned an error: %s.\n" % (" ".join(command), e) +
182 "Please check that Node.js is installed."
183 "Please check that Node.js is installed."
183 )
184 )
184 out, _ = p.communicate(cast_bytes(source, encoding))
185 out, _ = p.communicate(cast_bytes(source, encoding))
185 out = TextIOWrapper(BytesIO(out), encoding, 'replace').read()
186 out = TextIOWrapper(BytesIO(out), encoding, 'replace').read()
186 return out.rstrip('\n')
187 return out.rstrip('\n')
187
188
188 # The mistune renderer is the default, because it's simple to depend on it
189 # The mistune renderer is the default, because it's simple to depend on it
189 markdown2html = markdown2html_mistune
190 markdown2html = markdown2html_mistune
190
191
191 def markdown2rst(source):
192 def markdown2rst(source, extra_args=None):
192 """Convert a markdown string to ReST via pandoc.
193 """Convert a markdown string to ReST via pandoc.
193
194
194 This function will raise an error if pandoc is not installed.
195 This function will raise an error if pandoc is not installed.
195 Any error messages generated by pandoc are printed to stderr.
196 Any error messages generated by pandoc are printed to stderr.
196
197
197 Parameters
198 Parameters
198 ----------
199 ----------
199 source : string
200 source : string
200 Input string, assumed to be valid markdown.
201 Input string, assumed to be valid markdown.
201
202
202 Returns
203 Returns
203 -------
204 -------
204 out : string
205 out : string
205 Output as returned by pandoc.
206 Output as returned by pandoc.
206 """
207 """
207 return pandoc(source, 'markdown', 'rst')
208 return pandoc(source, 'markdown', 'rst', extra_args=extra_args)
208
209
209 def _verify_node(cmd):
210 def _verify_node(cmd):
210 """Verify that the node command exists and is at least the minimum supported
211 """Verify that the node command exists and is at least the minimum supported
211 version of node.
212 version of node.
212
213
213 Parameters
214 Parameters
214 ----------
215 ----------
215 cmd : string
216 cmd : string
216 Node command to verify (i.e 'node')."""
217 Node command to verify (i.e 'node')."""
217 try:
218 try:
218 out, err, return_code = get_output_error_code([cmd, '--version'])
219 out, err, return_code = get_output_error_code([cmd, '--version'])
219 except OSError:
220 except OSError:
220 # Command not found
221 # Command not found
221 return False
222 return False
222 if return_code:
223 if return_code:
223 # Command error
224 # Command error
224 return False
225 return False
225 return check_version(out.lstrip('v'), '0.9.12')
226 return check_version(out.lstrip('v'), '0.9.12')
@@ -1,88 +1,109
1 """Tests for conversions from markdown to other formats"""
1 """Tests for conversions from markdown to other formats"""
2
2
3 # Copyright (c) IPython Development Team.
3 # Copyright (c) IPython Development Team.
4 # Distributed under the terms of the Modified BSD License.
4 # Distributed under the terms of the Modified BSD License.
5
5
6 from copy import copy
6 from copy import copy
7
7
8 from IPython.utils.py3compat import string_types
8 from IPython.utils.py3compat import string_types
9 from IPython.testing import decorators as dec
9 from IPython.testing import decorators as dec
10
10
11 from ...tests.base import TestsBase
11 from ...tests.base import TestsBase
12 from ..markdown import markdown2latex, markdown2html, markdown2rst
12 from ..markdown import markdown2latex, markdown2html, markdown2rst
13
13
14 from jinja2 import Environment
14
15
15 class TestMarkdown(TestsBase):
16 class TestMarkdown(TestsBase):
16
17
17 tests = [
18 tests = [
18 '*test',
19 '*test',
19 '**test',
20 '**test',
20 '*test*',
21 '*test*',
21 '_test_',
22 '_test_',
22 '__test__',
23 '__test__',
23 '__*test*__',
24 '__*test*__',
24 '**test**',
25 '**test**',
25 '#test',
26 '#test',
26 '##test',
27 '##test',
27 'test\n----',
28 'test\n----',
28 'test [link](https://google.com/)']
29 'test [link](https://google.com/)']
29
30
30 tokens = [
31 tokens = [
31 '*test',
32 '*test',
32 '**test',
33 '**test',
33 'test',
34 'test',
34 'test',
35 'test',
35 'test',
36 'test',
36 'test',
37 'test',
37 'test',
38 'test',
38 'test',
39 'test',
39 'test',
40 'test',
40 'test',
41 'test',
41 ('test', 'https://google.com/')]
42 ('test', 'https://google.com/')]
42
43
43
44
44 @dec.onlyif_cmds_exist('pandoc')
45 @dec.onlyif_cmds_exist('pandoc')
45 def test_markdown2latex(self):
46 def test_markdown2latex(self):
46 """markdown2latex test"""
47 """markdown2latex test"""
47 for index, test in enumerate(self.tests):
48 for index, test in enumerate(self.tests):
48 self._try_markdown(markdown2latex, test, self.tokens[index])
49 self._try_markdown(markdown2latex, test, self.tokens[index])
49
50
51 @dec.onlyif_cmds_exist('pandoc')
52 def test_pandoc_extra_args(self):
53 # pass --no-wrap
54 s = '\n'.join([
55 "#latex {{long_line | md2l(['--no-wrap'])}}",
56 "#rst {{long_line | md2r(['--columns', '5'])}}",
57 ])
58 long_line = ' '.join(['long'] * 30)
59 env = Environment()
60 env.filters.update({
61 'md2l': markdown2latex,
62 'md2r': markdown2rst,
63 })
64 tpl = env.from_string(s)
65 rendered = tpl.render(long_line=long_line)
66 _, latex, rst = rendered.split('#')
67
68 self.assertEqual(latex.strip(), 'latex %s' % long_line)
69 self.assertEqual(rst.strip(), 'rst %s' % long_line.replace(' ', '\n'))
70
50 def test_markdown2html(self):
71 def test_markdown2html(self):
51 """markdown2html test"""
72 """markdown2html test"""
52 for index, test in enumerate(self.tests):
73 for index, test in enumerate(self.tests):
53 self._try_markdown(markdown2html, test, self.tokens[index])
74 self._try_markdown(markdown2html, test, self.tokens[index])
54
75
55 def test_markdown2html_math(self):
76 def test_markdown2html_math(self):
56 # Mathematical expressions should be passed through unaltered
77 # Mathematical expressions should be passed through unaltered
57 cases = [("\\begin{equation*}\n"
78 cases = [("\\begin{equation*}\n"
58 "\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
79 "\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n"
59 "\\end{equation*}"),
80 "\\end{equation*}"),
60 ("$$\n"
81 ("$$\n"
61 "a = 1 *3* 5\n"
82 "a = 1 *3* 5\n"
62 "$$"),
83 "$$"),
63 "$ a = 1 *3* 5 $",
84 "$ a = 1 *3* 5 $",
64 ]
85 ]
65 for case in cases:
86 for case in cases:
66 self.assertIn(case, markdown2html(case))
87 self.assertIn(case, markdown2html(case))
67
88
68
89
69 @dec.onlyif_cmds_exist('pandoc')
90 @dec.onlyif_cmds_exist('pandoc')
70 def test_markdown2rst(self):
91 def test_markdown2rst(self):
71 """markdown2rst test"""
92 """markdown2rst test"""
72
93
73 #Modify token array for rst, escape asterik
94 #Modify token array for rst, escape asterik
74 tokens = copy(self.tokens)
95 tokens = copy(self.tokens)
75 tokens[0] = r'\*test'
96 tokens[0] = r'\*test'
76 tokens[1] = r'\*\*test'
97 tokens[1] = r'\*\*test'
77
98
78 for index, test in enumerate(self.tests):
99 for index, test in enumerate(self.tests):
79 self._try_markdown(markdown2rst, test, tokens[index])
100 self._try_markdown(markdown2rst, test, tokens[index])
80
101
81
102
82 def _try_markdown(self, method, test, tokens):
103 def _try_markdown(self, method, test, tokens):
83 results = method(test)
104 results = method(test)
84 if isinstance(tokens, string_types):
105 if isinstance(tokens, string_types):
85 assert tokens in results
106 assert tokens in results
86 else:
107 else:
87 for token in tokens:
108 for token in tokens:
88 assert token in results
109 assert token in results
General Comments 0
You need to be logged in to leave comments. Login now