##// END OF EJS Templates
Add argument parsing, and ability to convert an HTML file from command line
Anton I. Sipos -
Show More
@@ -1,32 +1,37 b''
1 ================================================================
1 ================================================================
2 nbconvert: conversion utilities for the IPython notebook format
2 nbconvert: conversion utilities for the IPython notebook format
3 ================================================================
3 ================================================================
4
4
5 Overview
5 Overview
6 ========
6 ========
7
7
8 nbconvert provides command line utilities to convert to and from IPython
8 nbconvert provides command line utilities to convert to and from IPython
9 notebooks and standard formats:
9 notebooks and standard formats:
10
10
11 - ReST
11 - ReST
12 - Markdown
12 - Markdown
13 - HTML
13 - HTML
14 - PDF
14 - PDF
15 - Python script
15 - Python script
16
16
17 As these tools mature, these utilities will be merged into IPython
17 As these tools mature, these utilities will be merged into IPython
18
18
19 Requirements
19 Requirements
20 ============
20 ============
21 The latest development version of doctest is required. This can be installed via
21 The latest development version of doctest is required. This can be installed via
22 ::
22 ::
23
23
24 $ curl http://docutils.svn.sourceforge.net/viewvc/docutils/trunk/docutils/?view=tar > docutils.gz
24 $ curl http://docutils.svn.sourceforge.net/viewvc/docutils/trunk/docutils/?view=tar > docutils.gz
25 $ pip install -U docutils.gz
25 $ pip install -U docutils.gz
26
26
27 For conversion to HTML, pygments is also required
28 ::
29
30 $ pip install pygments
31
27 Running Tests
32 Running Tests
28 =============
33 =============
29 ::
34 ::
30
35
31 $ pip install nose
36 $ pip install nose
32 $ nosetests
37 $ nosetests
@@ -1,253 +1,270 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """A really simple notebook to rst/html exporter.
3
3
4 Usage
4 Usage
5
5
6 ./nb2html.py file.ipynb
6 ./nb2html.py file.ipynb
7
7
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
9 called nb_figure_NN.png.
10
10
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16
16 import argparse
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import indent
18 from IPython.utils.text import indent
19
19
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30
30
31 def rst_directive(directive, text=''):
31 def rst_directive(directive, text=''):
32 out = [directive, '']
32 out = [directive, '']
33 if text:
33 if text:
34 out.extend([indent(text), ''])
34 out.extend([indent(text), ''])
35 return out
35 return out
36
36
37 # Converters for parts of a cell.
37 # Converters for parts of a cell.
38
38
39
39 class ConversionException(Exception):
40 class ConversionException(Exception):
40 pass
41 pass
41
42
42
43
43 class Converter(object):
44 class Converter(object):
44 default_encoding = 'utf-8'
45 default_encoding = 'utf-8'
45
46
46 def __init__(self, fname):
47 def __init__(self, infile):
47 self.fname = fname
48 self.infile = infile
48 self.dirpath = os.path.dirname(fname)
49 self.dirpath = os.path.dirname(infile)
49
50
50 @property
51 @property
51 def extension(self):
52 def extension(self):
52 raise ConversionException("""extension must be defined in Converter
53 raise ConversionException("""extension must be defined in Converter
53 subclass""")
54 subclass""")
54
55
55 def dispatch(self, cell_type):
56 def dispatch(self, cell_type):
56 """return cell_type dependent render method, for example render_code
57 """return cell_type dependent render method, for example render_code
57 """
58 """
58 return getattr(self, 'render_' + cell_type, unknown_cell)
59 return getattr(self, 'render_' + cell_type, unknown_cell)
59
60
60 def convert(self):
61 def convert(self):
61 lines = []
62 lines = []
62 for cell in self.nb.worksheets[0].cells:
63 for cell in self.nb.worksheets[0].cells:
63 conv_fn = self.dispatch(cell.cell_type)
64 conv_fn = self.dispatch(cell.cell_type)
64 lines.extend(conv_fn(cell))
65 lines.extend(conv_fn(cell))
65 lines.append('')
66 lines.append('')
66 return '\n'.join(lines)
67 return '\n'.join(lines)
67
68
68 def render(self):
69 def render(self):
69 "read, convert, and save self.fname"
70 "read, convert, and save self.infile"
70 self.read()
71 self.read()
71 self.output = self.convert()
72 self.output = self.convert()
72 return self.save()
73 return self.save()
73
74
74 def read(self):
75 def read(self):
75 "read and parse notebook into NotebookNode called self.nb"
76 "read and parse notebook into NotebookNode called self.nb"
76 with open(self.fname) as f:
77 with open(self.infile) as f:
77 self.nb = nbformat.read(f, 'json')
78 self.nb = nbformat.read(f, 'json')
78
79
79 def save(self, fname=None, encoding=None):
80 def save(self, infile=None, encoding=None):
80 "read and parse notebook into self.nb"
81 "read and parse notebook into self.nb"
81 if fname is None:
82 if infile is None:
82 fname = os.path.splitext(self.fname)[0] + '.' + self.extension
83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
83 if encoding is None:
84 if encoding is None:
84 encoding = self.default_encoding
85 encoding = self.default_encoding
85 with open(fname, 'w') as f:
86 with open(infile, 'w') as f:
86 f.write(self.output.encode(encoding))
87 f.write(self.output.encode(encoding))
87 return fname
88 return infile
88
89
89 def render_heading(self, cell):
90 def render_heading(self, cell):
90 raise NotImplementedError
91 raise NotImplementedError
91
92
92 def render_code(self, cell):
93 def render_code(self, cell):
93 raise NotImplementedError
94 raise NotImplementedError
94
95
95 def render_markdown(self, cell):
96 def render_markdown(self, cell):
96 raise NotImplementedError
97 raise NotImplementedError
97
98
98 def render_pyout(self, cell):
99 def render_pyout(self, cell):
99 raise NotImplementedError
100 raise NotImplementedError
100
101
101 def render_display_data(self, cell):
102 def render_display_data(self, cell):
102 raise NotImplementedError
103 raise NotImplementedError
103
104
104 def render_stream(self, cell):
105 def render_stream(self, cell):
105 raise NotImplementedError
106 raise NotImplementedError
106
107
107
108
108 class ConverterRST(Converter):
109 class ConverterRST(Converter):
109 extension = 'rst'
110 extension = 'rst'
110 figures_counter = 0
111 figures_counter = 0
111
112
112 def render_heading(self, cell):
113 def render_heading(self, cell):
113 """convert a heading cell to rst
114 """convert a heading cell to rst
114
115
115 Returns list."""
116 Returns list."""
116 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
117 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
117 marker = heading_level[cell.level]
118 marker = heading_level[cell.level]
118 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
119 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
119
120
120 def render_code(self, cell):
121 def render_code(self, cell):
121 """Convert a code cell to rst
122 """Convert a code cell to rst
122
123
123 Returns list."""
124 Returns list."""
124
125
125 if not cell.input:
126 if not cell.input:
126 return []
127 return []
127
128
128 lines = ['In[%s]:' % cell.prompt_number, '']
129 lines = ['In[%s]:' % cell.prompt_number, '']
129 lines.extend(rst_directive('.. code:: python', cell.input))
130 lines.extend(rst_directive('.. code:: python', cell.input))
130
131
131 for output in cell.outputs:
132 for output in cell.outputs:
132 conv_fn = self.dispatch(output.output_type)
133 conv_fn = self.dispatch(output.output_type)
133 lines.extend(conv_fn(output))
134 lines.extend(conv_fn(output))
134
135
135 return lines
136 return lines
136
137
137 def render_markdown(self, cell):
138 def render_markdown(self, cell):
138 """convert a markdown cell to rst
139 """convert a markdown cell to rst
139
140
140 Returns list."""
141 Returns list."""
141 return [cell.source]
142 return [cell.source]
142
143
143 def render_plaintext(self, cell):
144 def render_plaintext(self, cell):
144 """convert plain text to rst
145 """convert plain text to rst
145
146
146 Returns list."""
147 Returns list."""
147 return [cell.source]
148 return [cell.source]
148
149
149 def render_pyout(self, output):
150 def render_pyout(self, output):
150 """convert pyout part of a code cell to rst
151 """convert pyout part of a code cell to rst
151
152
152 Returns list."""
153 Returns list."""
153
154
154 lines = ['Out[%s]:' % output.prompt_number, '']
155 lines = ['Out[%s]:' % output.prompt_number, '']
155
156
156 # output is a dictionary like object with type as a key
157 # output is a dictionary like object with type as a key
157 if 'latex' in output:
158 if 'latex' in output:
158 lines.extend(rst_directive('.. math::', output.latex))
159 lines.extend(rst_directive('.. math::', output.latex))
159
160
160 if 'text' in output:
161 if 'text' in output:
161 lines.extend(rst_directive('.. parsed-literal::', output.text))
162 lines.extend(rst_directive('.. parsed-literal::', output.text))
162
163
163 return lines
164 return lines
164
165
165 def render_display_data(self, output):
166 def render_display_data(self, output):
166 """convert display data from the output of a code cell to rst.
167 """convert display data from the output of a code cell to rst.
167
168
168 Returns list.
169 Returns list.
169 """
170 """
170 lines = []
171 lines = []
171
172
172 if 'png' in output:
173 if 'png' in output:
173 fname = 'nb_figure_%s.png' % self.figures_counter
174 infile = 'nb_figure_%s.png' % self.figures_counter
174 fullname = os.path.join(self.dirpath, fname)
175 fullname = os.path.join(self.dirpath, infile)
175 with open(fullname, 'w') as f:
176 with open(fullname, 'w') as f:
176 f.write(output.png.decode('base64'))
177 f.write(output.png.decode('base64'))
177
178
178 self.figures_counter += 1
179 self.figures_counter += 1
179 lines.append('.. image:: %s' % fname)
180 lines.append('.. image:: %s' % infile)
180 lines.append('')
181 lines.append('')
181
182
182 return lines
183 return lines
183
184
184 def render_stream(self, output):
185 def render_stream(self, output):
185 """convert stream part of a code cell to rst
186 """convert stream part of a code cell to rst
186
187
187 Returns list."""
188 Returns list."""
188
189
189 lines = []
190 lines = []
190
191
191 if 'text' in output:
192 if 'text' in output:
192 lines.extend(rst_directive('.. parsed-literal::', output.text))
193 lines.extend(rst_directive('.. parsed-literal::', output.text))
193
194
194 return lines
195 return lines
195
196
196
197
197 def rst2simplehtml(fname):
198 def rst2simplehtml(infile):
198 """Convert a rst file to simplified html suitable for blogger.
199 """Convert a rst file to simplified html suitable for blogger.
199
200
200 This just runs rst2html with certain parameters to produce really simple
201 This just runs rst2html with certain parameters to produce really simple
201 html and strips the document header, so the resulting file can be easily
202 html and strips the document header, so the resulting file can be easily
202 pasted into a blogger edit window.
203 pasted into a blogger edit window.
203 """
204 """
204
205
205 # This is the template for the rst2html call that produces the cleanest,
206 # This is the template for the rst2html call that produces the cleanest,
206 # simplest html I could find. This should help in making it easier to
207 # simplest html I could find. This should help in making it easier to
207 # paste into the blogspot html window, though I'm still having problems
208 # paste into the blogspot html window, though I'm still having problems
208 # with linebreaks there...
209 # with linebreaks there...
209 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
210 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
210 "--no-generator --no-datestamp --no-source-link "
211 "--no-generator --no-datestamp --no-source-link "
211 "--no-toc-backlinks --no-section-numbering "
212 "--no-toc-backlinks --no-section-numbering "
212 "--strip-comments ")
213 "--strip-comments ")
213
214
214 cmd = "%s %s" % (cmd_template, fname)
215 cmd = "%s %s" % (cmd_template, infile)
215 proc = subprocess.Popen(cmd,
216 proc = subprocess.Popen(cmd,
216 stdout=subprocess.PIPE,
217 stdout=subprocess.PIPE,
217 stderr=subprocess.PIPE,
218 stderr=subprocess.PIPE,
218 shell=True)
219 shell=True)
219 html, stderr = proc.communicate()
220 html, stderr = proc.communicate()
220 if stderr:
221 if stderr:
221 raise IOError(stderr)
222 raise IOError(stderr)
222
223
223 # Make an iterator so breaking out holds state. Our implementation of
224 # Make an iterator so breaking out holds state. Our implementation of
224 # searching for the html body below is basically a trivial little state
225 # searching for the html body below is basically a trivial little state
225 # machine, so we need this.
226 # machine, so we need this.
226 walker = iter(html.splitlines())
227 walker = iter(html.splitlines())
227
228
228 # Find start of main text, break out to then print until we find end /div.
229 # Find start of main text, break out to then print until we find end /div.
229 # This may only work if there's a real title defined so we get a 'div class'
230 # This may only work if there's a real title defined so we get a 'div class'
230 # tag, I haven't really tried.
231 # tag, I haven't really tried.
231 for line in walker:
232 for line in walker:
232 if line.startswith('<body>'):
233 if line.startswith('<body>'):
233 break
234 break
234
235
235 newfname = os.path.splitext(fname)[0] + '.html'
236 newfname = os.path.splitext(infile)[0] + '.html'
236 with open(newfname, 'w') as f:
237 with open(newfname, 'w') as f:
237 for line in walker:
238 for line in walker:
238 if line.startswith('</body>'):
239 if line.startswith('</body>'):
239 break
240 break
240 f.write(line)
241 f.write(line)
241 f.write('\n')
242 f.write('\n')
242
243
243 return newfname
244 return newfname
244
245
245
246
246 def main(fname):
247 def main(infile, format='rst'):
247 """Convert a notebook to html in one step"""
248 """Convert a notebook to html in one step"""
248 converter = ConverterRST(fname)
249 if format == 'rst':
249 converter.render()
250 converter = ConverterRST(infile)
251 converter.render()
252 elif format == 'html':
253 #Currently, conversion to html is a 2 step process, nb->rst->html
254 converter = ConverterRST(infile)
255 rstfname = converter.render()
256 rst2simplehtml(rstfname)
250
257
251
258
252 if __name__ == '__main__':
259 if __name__ == '__main__':
253 main(sys.argv[1])
260 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
261
262 # TODO: consider passing file like object around, rather than filenames
263 # would allow us to process stdin, or even http streams
264 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
265
266 #Require a filename as a positional argument
267 parser.add_argument('infile', nargs=1)
268 parser.add_argument('-f', '--format', default='rst')
269 args = parser.parse_args()
270 main(infile=args.infile[0], format=args.format)
@@ -1,57 +1,70 b''
1 from nbconvert import ConverterRST, main
1 from nbconvert import ConverterRST, main
2 import nose.tools as nt
2 import nose.tools as nt
3
3
4 import os
4 import os
5 import glob
5 import glob
6 from IPython.nbformat import current as nbformat
6 from IPython.nbformat import current as nbformat
7
7
8 fname = 'tests/test.ipynb'
8 fname = 'tests/test.ipynb'
9 out_fname = 'tests/test.rst'
9 out_fname = 'tests/test.rst'
10
10
11
11
12 def clean_dir():
12 def clean_dir():
13 "Remove .rst files created during conversion"
13 "Remove .rst files created during conversion"
14 map(os.remove, glob.glob("./tests/*.rst"))
14 map(os.remove, glob.glob("./tests/*.rst"))
15 map(os.remove, glob.glob("./tests/*.png"))
15 map(os.remove, glob.glob("./tests/*.png"))
16 map(os.remove, glob.glob("./tests/*.html"))
17
16
18
17 @nt.with_setup(clean_dir, clean_dir)
19 @nt.with_setup(clean_dir, clean_dir)
18 def test_simple():
20 def test_simple():
19 c = ConverterRST(fname)
21 c = ConverterRST(fname)
20 f = c.render()
22 f = c.render()
21 nt.assert_true('rst' in f, 'changed file extension to rst')
23 nt.assert_true('rst' in f, 'changed file extension to rst')
22
24
25
23 @nt.with_setup(clean_dir, clean_dir)
26 @nt.with_setup(clean_dir, clean_dir)
24 def test_main():
27 def test_main():
25 """
28 """
26 Test main entry point
29 Test main entry point
27 """
30 """
28 main(fname)
31 main(fname)
29 nt.assert_true(os.path.exists(out_fname))
32 nt.assert_true(os.path.exists(out_fname))
30
33
34
31 def test_render_heading():
35 def test_render_heading():
32 """ Unit test for cell type "heading" """
36 """ Unit test for cell type "heading" """
33 # Generate and test heading cells level 1-6
37 # Generate and test heading cells level 1-6
34 for level in xrange(1,7):
38 for level in xrange(1, 7):
35 cell = {
39 cell = {
36 'cell_type': 'heading',
40 'cell_type': 'heading',
37 'level' : level,
41 'level' : level,
38 'source' : ['Test for heading type H{0}'.format(level)]
42 'source' : ['Test for heading type H{0}'.format(level)]
39 }
43 }
40 # Convert cell dictionaries to NotebookNode
44 # Convert cell dictionaries to NotebookNode
41 cell_nb = nbformat.NotebookNode(cell)
45 cell_nb = nbformat.NotebookNode(cell)
42 # Make sure "source" attribute is uniconde not list.
46 # Make sure "source" attribute is uniconde not list.
43 # For some reason, creating a NotebookNode manually like
47 # For some reason, creating a NotebookNode manually like
44 # this isn't converting source to a string like using
48 # this isn't converting source to a string like using
45 # the create-from-file routine.
49 # the create-from-file routine.
46 if type(cell_nb.source) is list:
50 if type(cell_nb.source) is list:
47 cell_nb.source = '\n'.join(cell_nb.source)
51 cell_nb.source = '\n'.join(cell_nb.source)
48 # Render to rst
52 # Render to rst
49 c = ConverterRST('')
53 c = ConverterRST('')
50 rst_list = c.render_heading(cell_nb)
54 rst_list = c.render_heading(cell_nb)
51 nt.assert_true(isinstance(rst_list,list)) # render should return a list
55 nt.assert_true(isinstance(rst_list, list)) # render should return a list
52 rst_str = "".join(rst_list)
56 rst_str = "".join(rst_list)
53 # Confirm rst content
57 # Confirm rst content
54 heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
58 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
55 chk_str = "Test for heading type H{0}\n{1}\n".format(
59 chk_str = "Test for heading type H{0}\n{1}\n".format(
56 level,heading_level[level]*24)
60 level, heading_level[level] * 24)
57 nt.assert_equal(rst_str,chk_str)
61 nt.assert_equal(rst_str, chk_str)
62
63
64 @nt.with_setup(clean_dir, clean_dir)
65 def test_main_html():
66 """
67 Test main entry point
68 """
69 main(fname, format='html')
70 nt.assert_true(os.path.exists('tests/test.html'))
General Comments 0
You need to be logged in to leave comments. Login now