##// END OF EJS Templates
Add argument parsing, and ability to convert an HTML file from command line
Anton I. Sipos -
Show More
@@ -1,32 +1,37 b''
1 1 ================================================================
2 2 nbconvert: conversion utilities for the IPython notebook format
3 3 ================================================================
4 4
5 5 Overview
6 6 ========
7 7
8 8 nbconvert provides command line utilities to convert to and from IPython
9 9 notebooks and standard formats:
10 10
11 11 - ReST
12 12 - Markdown
13 13 - HTML
14 14 - PDF
15 15 - Python script
16 16
17 17 As these tools mature, these utilities will be merged into IPython
18 18
19 19 Requirements
20 20 ============
21 21 The latest development version of doctest is required. This can be installed via
22 22 ::
23 23
24 24 $ curl http://docutils.svn.sourceforge.net/viewvc/docutils/trunk/docutils/?view=tar > docutils.gz
25 25 $ pip install -U docutils.gz
26 26
27 For conversion to HTML, pygments is also required
28 ::
29
30 $ pip install pygments
31
27 32 Running Tests
28 33 =============
29 34 ::
30 35
31 36 $ pip install nose
32 37 $ nosetests
@@ -1,253 +1,270 b''
1 1 #!/usr/bin/env python
2 2 """A really simple notebook to rst/html exporter.
3 3
4 4 Usage
5 5
6 6 ./nb2html.py file.ipynb
7 7
8 8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 9 called nb_figure_NN.png.
10 10
11 11 """
12 12
13 13 import os
14 14 import subprocess
15 15 import sys
16
16 import argparse
17 17 from IPython.nbformat import current as nbformat
18 18 from IPython.utils.text import indent
19 19
20 20
21 21 # Cell converters
22 22
23 23 def unknown_cell(cell):
24 24 """Default converter for cells of unknown type.
25 25 """
26 26
27 27 return rst_directive('.. warning:: Unknown cell') + \
28 28 [repr(cell)]
29 29
30 30
31 31 def rst_directive(directive, text=''):
32 32 out = [directive, '']
33 33 if text:
34 34 out.extend([indent(text), ''])
35 35 return out
36 36
37 37 # Converters for parts of a cell.
38 38
39
39 40 class ConversionException(Exception):
40 41 pass
41 42
42 43
43 44 class Converter(object):
44 45 default_encoding = 'utf-8'
45 46
46 def __init__(self, fname):
47 self.fname = fname
48 self.dirpath = os.path.dirname(fname)
47 def __init__(self, infile):
48 self.infile = infile
49 self.dirpath = os.path.dirname(infile)
49 50
50 51 @property
51 52 def extension(self):
52 53 raise ConversionException("""extension must be defined in Converter
53 54 subclass""")
54 55
55 56 def dispatch(self, cell_type):
56 57 """return cell_type dependent render method, for example render_code
57 58 """
58 59 return getattr(self, 'render_' + cell_type, unknown_cell)
59 60
60 61 def convert(self):
61 62 lines = []
62 63 for cell in self.nb.worksheets[0].cells:
63 64 conv_fn = self.dispatch(cell.cell_type)
64 65 lines.extend(conv_fn(cell))
65 66 lines.append('')
66 67 return '\n'.join(lines)
67 68
68 69 def render(self):
69 "read, convert, and save self.fname"
70 "read, convert, and save self.infile"
70 71 self.read()
71 72 self.output = self.convert()
72 73 return self.save()
73 74
74 75 def read(self):
75 76 "read and parse notebook into NotebookNode called self.nb"
76 with open(self.fname) as f:
77 with open(self.infile) as f:
77 78 self.nb = nbformat.read(f, 'json')
78 79
79 def save(self, fname=None, encoding=None):
80 def save(self, infile=None, encoding=None):
80 81 "read and parse notebook into self.nb"
81 if fname is None:
82 fname = os.path.splitext(self.fname)[0] + '.' + self.extension
82 if infile is None:
83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
83 84 if encoding is None:
84 85 encoding = self.default_encoding
85 with open(fname, 'w') as f:
86 with open(infile, 'w') as f:
86 87 f.write(self.output.encode(encoding))
87 return fname
88 return infile
88 89
89 90 def render_heading(self, cell):
90 91 raise NotImplementedError
91 92
92 93 def render_code(self, cell):
93 94 raise NotImplementedError
94 95
95 96 def render_markdown(self, cell):
96 97 raise NotImplementedError
97 98
98 99 def render_pyout(self, cell):
99 100 raise NotImplementedError
100 101
101 102 def render_display_data(self, cell):
102 103 raise NotImplementedError
103 104
104 105 def render_stream(self, cell):
105 106 raise NotImplementedError
106 107
107 108
108 109 class ConverterRST(Converter):
109 110 extension = 'rst'
110 111 figures_counter = 0
111 112
112 113 def render_heading(self, cell):
113 114 """convert a heading cell to rst
114 115
115 116 Returns list."""
116 117 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
117 118 marker = heading_level[cell.level]
118 119 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
119 120
120 121 def render_code(self, cell):
121 122 """Convert a code cell to rst
122 123
123 124 Returns list."""
124 125
125 126 if not cell.input:
126 127 return []
127 128
128 129 lines = ['In[%s]:' % cell.prompt_number, '']
129 130 lines.extend(rst_directive('.. code:: python', cell.input))
130 131
131 132 for output in cell.outputs:
132 133 conv_fn = self.dispatch(output.output_type)
133 134 lines.extend(conv_fn(output))
134 135
135 136 return lines
136 137
137 138 def render_markdown(self, cell):
138 139 """convert a markdown cell to rst
139 140
140 141 Returns list."""
141 142 return [cell.source]
142 143
143 144 def render_plaintext(self, cell):
144 145 """convert plain text to rst
145 146
146 147 Returns list."""
147 148 return [cell.source]
148 149
149 150 def render_pyout(self, output):
150 151 """convert pyout part of a code cell to rst
151 152
152 153 Returns list."""
153 154
154 155 lines = ['Out[%s]:' % output.prompt_number, '']
155 156
156 157 # output is a dictionary like object with type as a key
157 158 if 'latex' in output:
158 159 lines.extend(rst_directive('.. math::', output.latex))
159 160
160 161 if 'text' in output:
161 162 lines.extend(rst_directive('.. parsed-literal::', output.text))
162 163
163 164 return lines
164 165
165 166 def render_display_data(self, output):
166 167 """convert display data from the output of a code cell to rst.
167 168
168 169 Returns list.
169 170 """
170 171 lines = []
171 172
172 173 if 'png' in output:
173 fname = 'nb_figure_%s.png' % self.figures_counter
174 fullname = os.path.join(self.dirpath, fname)
174 infile = 'nb_figure_%s.png' % self.figures_counter
175 fullname = os.path.join(self.dirpath, infile)
175 176 with open(fullname, 'w') as f:
176 177 f.write(output.png.decode('base64'))
177 178
178 179 self.figures_counter += 1
179 lines.append('.. image:: %s' % fname)
180 lines.append('.. image:: %s' % infile)
180 181 lines.append('')
181 182
182 183 return lines
183 184
184 185 def render_stream(self, output):
185 186 """convert stream part of a code cell to rst
186 187
187 188 Returns list."""
188 189
189 190 lines = []
190 191
191 192 if 'text' in output:
192 193 lines.extend(rst_directive('.. parsed-literal::', output.text))
193 194
194 195 return lines
195 196
196 197
197 def rst2simplehtml(fname):
198 def rst2simplehtml(infile):
198 199 """Convert a rst file to simplified html suitable for blogger.
199 200
200 201 This just runs rst2html with certain parameters to produce really simple
201 202 html and strips the document header, so the resulting file can be easily
202 203 pasted into a blogger edit window.
203 204 """
204 205
205 206 # This is the template for the rst2html call that produces the cleanest,
206 207 # simplest html I could find. This should help in making it easier to
207 208 # paste into the blogspot html window, though I'm still having problems
208 209 # with linebreaks there...
209 210 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
210 211 "--no-generator --no-datestamp --no-source-link "
211 212 "--no-toc-backlinks --no-section-numbering "
212 213 "--strip-comments ")
213 214
214 cmd = "%s %s" % (cmd_template, fname)
215 cmd = "%s %s" % (cmd_template, infile)
215 216 proc = subprocess.Popen(cmd,
216 217 stdout=subprocess.PIPE,
217 218 stderr=subprocess.PIPE,
218 219 shell=True)
219 220 html, stderr = proc.communicate()
220 221 if stderr:
221 222 raise IOError(stderr)
222 223
223 224 # Make an iterator so breaking out holds state. Our implementation of
224 225 # searching for the html body below is basically a trivial little state
225 226 # machine, so we need this.
226 227 walker = iter(html.splitlines())
227 228
228 229 # Find start of main text, break out to then print until we find end /div.
229 230 # This may only work if there's a real title defined so we get a 'div class'
230 231 # tag, I haven't really tried.
231 232 for line in walker:
232 233 if line.startswith('<body>'):
233 234 break
234 235
235 newfname = os.path.splitext(fname)[0] + '.html'
236 newfname = os.path.splitext(infile)[0] + '.html'
236 237 with open(newfname, 'w') as f:
237 238 for line in walker:
238 239 if line.startswith('</body>'):
239 240 break
240 241 f.write(line)
241 242 f.write('\n')
242 243
243 244 return newfname
244 245
245 246
246 def main(fname):
247 def main(infile, format='rst'):
247 248 """Convert a notebook to html in one step"""
248 converter = ConverterRST(fname)
249 converter.render()
249 if format == 'rst':
250 converter = ConverterRST(infile)
251 converter.render()
252 elif format == 'html':
253 #Currently, conversion to html is a 2 step process, nb->rst->html
254 converter = ConverterRST(infile)
255 rstfname = converter.render()
256 rst2simplehtml(rstfname)
250 257
251 258
252 259 if __name__ == '__main__':
253 main(sys.argv[1])
260 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
261
262 # TODO: consider passing file like object around, rather than filenames
263 # would allow us to process stdin, or even http streams
264 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
265
266 #Require a filename as a positional argument
267 parser.add_argument('infile', nargs=1)
268 parser.add_argument('-f', '--format', default='rst')
269 args = parser.parse_args()
270 main(infile=args.infile[0], format=args.format)
@@ -1,57 +1,70 b''
1 1 from nbconvert import ConverterRST, main
2 2 import nose.tools as nt
3 3
4 4 import os
5 5 import glob
6 6 from IPython.nbformat import current as nbformat
7 7
8 8 fname = 'tests/test.ipynb'
9 9 out_fname = 'tests/test.rst'
10 10
11 11
12 12 def clean_dir():
13 13 "Remove .rst files created during conversion"
14 14 map(os.remove, glob.glob("./tests/*.rst"))
15 15 map(os.remove, glob.glob("./tests/*.png"))
16 map(os.remove, glob.glob("./tests/*.html"))
17
16 18
17 19 @nt.with_setup(clean_dir, clean_dir)
18 20 def test_simple():
19 21 c = ConverterRST(fname)
20 22 f = c.render()
21 23 nt.assert_true('rst' in f, 'changed file extension to rst')
22 24
25
23 26 @nt.with_setup(clean_dir, clean_dir)
24 27 def test_main():
25 28 """
26 29 Test main entry point
27 30 """
28 31 main(fname)
29 32 nt.assert_true(os.path.exists(out_fname))
30 33
34
31 35 def test_render_heading():
32 36 """ Unit test for cell type "heading" """
33 37 # Generate and test heading cells level 1-6
34 for level in xrange(1,7):
38 for level in xrange(1, 7):
35 39 cell = {
36 40 'cell_type': 'heading',
37 41 'level' : level,
38 'source' : ['Test for heading type H{0}'.format(level)]
42 'source' : ['Test for heading type H{0}'.format(level)]
39 43 }
40 44 # Convert cell dictionaries to NotebookNode
41 45 cell_nb = nbformat.NotebookNode(cell)
42 46 # Make sure "source" attribute is uniconde not list.
43 47 # For some reason, creating a NotebookNode manually like
44 48 # this isn't converting source to a string like using
45 49 # the create-from-file routine.
46 50 if type(cell_nb.source) is list:
47 51 cell_nb.source = '\n'.join(cell_nb.source)
48 52 # Render to rst
49 53 c = ConverterRST('')
50 54 rst_list = c.render_heading(cell_nb)
51 nt.assert_true(isinstance(rst_list,list)) # render should return a list
55 nt.assert_true(isinstance(rst_list, list)) # render should return a list
52 56 rst_str = "".join(rst_list)
53 57 # Confirm rst content
54 heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
58 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
55 59 chk_str = "Test for heading type H{0}\n{1}\n".format(
56 level,heading_level[level]*24)
57 nt.assert_equal(rst_str,chk_str)
60 level, heading_level[level] * 24)
61 nt.assert_equal(rst_str, chk_str)
62
63
64 @nt.with_setup(clean_dir, clean_dir)
65 def test_main_html():
66 """
67 Test main entry point
68 """
69 main(fname, format='html')
70 nt.assert_true(os.path.exists('tests/test.html'))
General Comments 0
You need to be logged in to leave comments. Login now