##// END OF EJS Templates
list known formats in help & on unknown fmt error...
Paul Ivanov -
Show More
@@ -1,383 +1,395 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3
3
4 Usage
4 Example:
5
5 ./nbconvert.py --format html file.ipynb
6 ./nb2html.py file.ipynb
7
6
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
10
9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 pretty.
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16 from IPython.external import argparse
16 from IPython.external import argparse
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import indent
18 from IPython.utils.text import indent
19 from decorators import DocInherit
19 from decorators import DocInherit
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30
30
31 def rst_directive(directive, text=''):
31 def rst_directive(directive, text=''):
32 out = [directive, '']
32 out = [directive, '']
33 if text:
33 if text:
34 out.extend([indent(text), ''])
34 out.extend([indent(text), ''])
35 return out
35 return out
36
36
37 # Converters for parts of a cell.
37 # Converters for parts of a cell.
38
38
39
39
40 class ConversionException(Exception):
40 class ConversionException(Exception):
41 pass
41 pass
42
42
43
43
44 class Converter(object):
44 class Converter(object):
45 default_encoding = 'utf-8'
45 default_encoding = 'utf-8'
46
46
47 def __init__(self, infile):
47 def __init__(self, infile):
48 self.infile = infile
48 self.infile = infile
49 self.dirpath = os.path.dirname(infile)
49 self.dirpath = os.path.dirname(infile)
50
50
51 @property
51 @property
52 def extension(self):
52 def extension(self):
53 raise ConversionException("""extension must be defined in Converter
53 raise ConversionException("""extension must be defined in Converter
54 subclass""")
54 subclass""")
55
55
56 def dispatch(self, cell_type):
56 def dispatch(self, cell_type):
57 """return cell_type dependent render method, for example render_code
57 """return cell_type dependent render method, for example render_code
58 """
58 """
59 # XXX: unknown_cell here is RST specific - make it generic
59 # XXX: unknown_cell here is RST specific - make it generic
60 return getattr(self, 'render_' + cell_type, unknown_cell)
60 return getattr(self, 'render_' + cell_type, unknown_cell)
61
61
62 def convert(self):
62 def convert(self):
63 lines = []
63 lines = []
64 lines.extend(self.optional_header())
64 lines.extend(self.optional_header())
65 for cell in self.nb.worksheets[0].cells:
65 for cell in self.nb.worksheets[0].cells:
66 conv_fn = self.dispatch(cell.cell_type)
66 conv_fn = self.dispatch(cell.cell_type)
67 lines.extend(conv_fn(cell))
67 lines.extend(conv_fn(cell))
68 lines.append('')
68 lines.append('')
69 lines.extend(self.optional_footer())
69 lines.extend(self.optional_footer())
70 return '\n'.join(lines)
70 return '\n'.join(lines)
71
71
72 def render(self):
72 def render(self):
73 "read, convert, and save self.infile"
73 "read, convert, and save self.infile"
74 self.read()
74 self.read()
75 self.output = self.convert()
75 self.output = self.convert()
76 return self.save()
76 return self.save()
77
77
78 def read(self):
78 def read(self):
79 "read and parse notebook into NotebookNode called self.nb"
79 "read and parse notebook into NotebookNode called self.nb"
80 with open(self.infile) as f:
80 with open(self.infile) as f:
81 self.nb = nbformat.read(f, 'json')
81 self.nb = nbformat.read(f, 'json')
82
82
83 def save(self, infile=None, encoding=None):
83 def save(self, infile=None, encoding=None):
84 "read and parse notebook into self.nb"
84 "read and parse notebook into self.nb"
85 if infile is None:
85 if infile is None:
86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
87 if encoding is None:
87 if encoding is None:
88 encoding = self.default_encoding
88 encoding = self.default_encoding
89 with open(infile, 'w') as f:
89 with open(infile, 'w') as f:
90 f.write(self.output.encode(encoding))
90 f.write(self.output.encode(encoding))
91 return infile
91 return infile
92
92
93 def optional_header(self):
93 def optional_header(self):
94 return []
94 return []
95
95
96 def optional_footer(self):
96 def optional_footer(self):
97 return []
97 return []
98
98
99 def render_heading(self, cell):
99 def render_heading(self, cell):
100 """convert a heading cell
100 """convert a heading cell
101
101
102 Returns list."""
102 Returns list."""
103 raise NotImplementedError
103 raise NotImplementedError
104
104
105 def render_code(self, cell):
105 def render_code(self, cell):
106 """Convert a code cell
106 """Convert a code cell
107
107
108 Returns list."""
108 Returns list."""
109 raise NotImplementedError
109 raise NotImplementedError
110
110
111 def render_markdown(self, cell):
111 def render_markdown(self, cell):
112 """convert a markdown cell
112 """convert a markdown cell
113
113
114 Returns list."""
114 Returns list."""
115 raise NotImplementedError
115 raise NotImplementedError
116
116
117 def render_pyout(self, cell):
117 def render_pyout(self, cell):
118 """convert pyout part of a code cell
118 """convert pyout part of a code cell
119
119
120 Returns list."""
120 Returns list."""
121 raise NotImplementedError
121 raise NotImplementedError
122
122
123 def render_display_data(self, cell):
123 def render_display_data(self, cell):
124 """convert display data from the output of a code cell
124 """convert display data from the output of a code cell
125
125
126 Returns list.
126 Returns list.
127 """
127 """
128 raise NotImplementedError
128 raise NotImplementedError
129
129
130 def render_stream(self, cell):
130 def render_stream(self, cell):
131 """convert stream part of a code cell
131 """convert stream part of a code cell
132
132
133 Returns list."""
133 Returns list."""
134 raise NotImplementedError
134 raise NotImplementedError
135
135
136 def render_plaintext(self, cell):
136 def render_plaintext(self, cell):
137 """convert plain text
137 """convert plain text
138
138
139 Returns list."""
139 Returns list."""
140 raise NotImplementedError
140 raise NotImplementedError
141
141
142
142
143 class ConverterRST(Converter):
143 class ConverterRST(Converter):
144 extension = 'rst'
144 extension = 'rst'
145 figures_counter = 0
145 figures_counter = 0
146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
147
147
148 @DocInherit
148 @DocInherit
149 def render_heading(self, cell):
149 def render_heading(self, cell):
150 marker = self.heading_level[cell.level]
150 marker = self.heading_level[cell.level]
151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
152
152
153 @DocInherit
153 @DocInherit
154 def render_code(self, cell):
154 def render_code(self, cell):
155 if not cell.input:
155 if not cell.input:
156 return []
156 return []
157
157
158 lines = ['In[%s]:' % cell.prompt_number, '']
158 lines = ['In[%s]:' % cell.prompt_number, '']
159 lines.extend(rst_directive('.. code:: python', cell.input))
159 lines.extend(rst_directive('.. code:: python', cell.input))
160
160
161 for output in cell.outputs:
161 for output in cell.outputs:
162 conv_fn = self.dispatch(output.output_type)
162 conv_fn = self.dispatch(output.output_type)
163 lines.extend(conv_fn(output))
163 lines.extend(conv_fn(output))
164
164
165 return lines
165 return lines
166
166
167 @DocInherit
167 @DocInherit
168 def render_markdown(self, cell):
168 def render_markdown(self, cell):
169 return [cell.source]
169 return [cell.source]
170
170
171 @DocInherit
171 @DocInherit
172 def render_plaintext(self, cell):
172 def render_plaintext(self, cell):
173 return [cell.source]
173 return [cell.source]
174
174
175 @DocInherit
175 @DocInherit
176 def render_pyout(self, output):
176 def render_pyout(self, output):
177 lines = ['Out[%s]:' % output.prompt_number, '']
177 lines = ['Out[%s]:' % output.prompt_number, '']
178
178
179 # output is a dictionary like object with type as a key
179 # output is a dictionary like object with type as a key
180 if 'latex' in output:
180 if 'latex' in output:
181 lines.extend(rst_directive('.. math::', output.latex))
181 lines.extend(rst_directive('.. math::', output.latex))
182
182
183 if 'text' in output:
183 if 'text' in output:
184 lines.extend(rst_directive('.. parsed-literal::', output.text))
184 lines.extend(rst_directive('.. parsed-literal::', output.text))
185
185
186 return lines
186 return lines
187
187
188 @DocInherit
188 @DocInherit
189 def render_display_data(self, output):
189 def render_display_data(self, output):
190 lines = []
190 lines = []
191
191
192 if 'png' in output:
192 if 'png' in output:
193 # XXX: make the figures notebooks specific (i.e. self.infile) so
194 # that multiple notebook conversions don't clobber each other's
195 # figures
193 infile = 'nb_figure_%s.png' % self.figures_counter
196 infile = 'nb_figure_%s.png' % self.figures_counter
194 fullname = os.path.join(self.dirpath, infile)
197 fullname = os.path.join(self.dirpath, infile)
195 with open(fullname, 'w') as f:
198 with open(fullname, 'w') as f:
196 f.write(output.png.decode('base64'))
199 f.write(output.png.decode('base64'))
197
200
198 self.figures_counter += 1
201 self.figures_counter += 1
199 lines.append('.. image:: %s' % infile)
202 lines.append('.. image:: %s' % infile)
200 lines.append('')
203 lines.append('')
201
204
202 return lines
205 return lines
203
206
204 @DocInherit
207 @DocInherit
205 def render_stream(self, output):
208 def render_stream(self, output):
206 lines = []
209 lines = []
207
210
208 if 'text' in output:
211 if 'text' in output:
209 lines.extend(rst_directive('.. parsed-literal::', output.text))
212 lines.extend(rst_directive('.. parsed-literal::', output.text))
210
213
211 return lines
214 return lines
212
215
213 class ConverterQuickHTML(Converter):
216 class ConverterQuickHTML(Converter):
214 extension = 'html'
217 extension = 'html'
215 figures_counter = 0
218 figures_counter = 0
216
219
217 def optional_header(self):
220 def optional_header(self):
218 # XXX: inject the IPython standard CSS into here
221 # XXX: inject the IPython standard CSS into here
219 s = """<html>
222 s = """<html>
220 <head>
223 <head>
221 </head>
224 </head>
222
225
223 <body>
226 <body>
224 """
227 """
225 return s.splitlines()
228 return s.splitlines()
226
229
227 def optional_footer(self):
230 def optional_footer(self):
228 s = """</body>
231 s = """</body>
229 </html>
232 </html>
230 """
233 """
231 return s.splitlines()
234 return s.splitlines()
232
235
233 @DocInherit
236 @DocInherit
234 def render_heading(self, cell):
237 def render_heading(self, cell):
235 marker = cell.level
238 marker = cell.level
236 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
239 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
237
240
238 @DocInherit
241 @DocInherit
239 def render_code(self, cell):
242 def render_code(self, cell):
240 if not cell.input:
243 if not cell.input:
241 return []
244 return []
242
245
243 lines = ['<table>']
246 lines = ['<table>']
244 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
247 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
245 lines.append("<br>\n".join(cell.input.splitlines()))
248 lines.append("<br>\n".join(cell.input.splitlines()))
246 lines.append('</tt></td></tr>')
249 lines.append('</tt></td></tr>')
247
250
248 for output in cell.outputs:
251 for output in cell.outputs:
249 lines.append('<tr><td></td><td>')
252 lines.append('<tr><td></td><td>')
250 conv_fn = self.dispatch(output.output_type)
253 conv_fn = self.dispatch(output.output_type)
251 lines.extend(conv_fn(output))
254 lines.extend(conv_fn(output))
252 lines.append('</td></tr>')
255 lines.append('</td></tr>')
253
256
254 lines.append('</table>')
257 lines.append('</table>')
255 return lines
258 return lines
256
259
257 @DocInherit
260 @DocInherit
258 def render_markdown(self, cell):
261 def render_markdown(self, cell):
259 return ["<pre>"+cell.source+"</pre>"]
262 return ["<pre>"+cell.source+"</pre>"]
260
263
261 @DocInherit
264 @DocInherit
262 def render_plaintext(self, cell):
265 def render_plaintext(self, cell):
263 return ["<pre>"+cell.source+"</pre>"]
266 return ["<pre>"+cell.source+"</pre>"]
264
267
265 @DocInherit
268 @DocInherit
266 def render_pyout(self, output):
269 def render_pyout(self, output):
267 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
270 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
268
271
269 # output is a dictionary like object with type as a key
272 # output is a dictionary like object with type as a key
270 if 'latex' in output:
273 if 'latex' in output:
271 lines.append("<pre>")
274 lines.append("<pre>")
272 lines.extend(indent(output.latex))
275 lines.extend(indent(output.latex))
273 lines.append("</pre>")
276 lines.append("</pre>")
274
277
275 if 'text' in output:
278 if 'text' in output:
276 lines.append("<pre>")
279 lines.append("<pre>")
277 lines.extend(indent(output.text))
280 lines.extend(indent(output.text))
278 lines.append("</pre>")
281 lines.append("</pre>")
279
282
280 return lines
283 return lines
281
284
282 @DocInherit
285 @DocInherit
283 def render_display_data(self, output):
286 def render_display_data(self, output):
284 lines = []
287 lines = []
285
288
286 if 'png' in output:
289 if 'png' in output:
287 infile = 'nb_figure_%s.png' % self.figures_counter
290 infile = 'nb_figure_%s.png' % self.figures_counter
288 fullname = os.path.join(self.dirpath, infile)
291 fullname = os.path.join(self.dirpath, infile)
289 with open(fullname, 'w') as f:
292 with open(fullname, 'w') as f:
290 f.write(output.png.decode('base64'))
293 f.write(output.png.decode('base64'))
291
294
292 self.figures_counter += 1
295 self.figures_counter += 1
293 lines.append('<img src="%s">' % infile)
296 lines.append('<img src="%s">' % infile)
294 lines.append('')
297 lines.append('')
295
298
296 return lines
299 return lines
297
300
298 @DocInherit
301 @DocInherit
299 def render_stream(self, output):
302 def render_stream(self, output):
300 lines = []
303 lines = []
301
304
302 if 'text' in output:
305 if 'text' in output:
303 lines.append(output.text)
306 lines.append(output.text)
304
307
305 return lines
308 return lines
306
309
307 def rst2simplehtml(infile):
310 def rst2simplehtml(infile):
308 """Convert a rst file to simplified html suitable for blogger.
311 """Convert a rst file to simplified html suitable for blogger.
309
312
310 This just runs rst2html with certain parameters to produce really simple
313 This just runs rst2html with certain parameters to produce really simple
311 html and strips the document header, so the resulting file can be easily
314 html and strips the document header, so the resulting file can be easily
312 pasted into a blogger edit window.
315 pasted into a blogger edit window.
313 """
316 """
314
317
315 # This is the template for the rst2html call that produces the cleanest,
318 # This is the template for the rst2html call that produces the cleanest,
316 # simplest html I could find. This should help in making it easier to
319 # simplest html I could find. This should help in making it easier to
317 # paste into the blogspot html window, though I'm still having problems
320 # paste into the blogspot html window, though I'm still having problems
318 # with linebreaks there...
321 # with linebreaks there...
319 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
322 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
320 "--no-generator --no-datestamp --no-source-link "
323 "--no-generator --no-datestamp --no-source-link "
321 "--no-toc-backlinks --no-section-numbering "
324 "--no-toc-backlinks --no-section-numbering "
322 "--strip-comments ")
325 "--strip-comments ")
323
326
324 cmd = "%s %s" % (cmd_template, infile)
327 cmd = "%s %s" % (cmd_template, infile)
325 proc = subprocess.Popen(cmd,
328 proc = subprocess.Popen(cmd,
326 stdout=subprocess.PIPE,
329 stdout=subprocess.PIPE,
327 stderr=subprocess.PIPE,
330 stderr=subprocess.PIPE,
328 shell=True)
331 shell=True)
329 html, stderr = proc.communicate()
332 html, stderr = proc.communicate()
330 if stderr:
333 if stderr:
331 raise IOError(stderr)
334 raise IOError(stderr)
332
335
333 # Make an iterator so breaking out holds state. Our implementation of
336 # Make an iterator so breaking out holds state. Our implementation of
334 # searching for the html body below is basically a trivial little state
337 # searching for the html body below is basically a trivial little state
335 # machine, so we need this.
338 # machine, so we need this.
336 walker = iter(html.splitlines())
339 walker = iter(html.splitlines())
337
340
338 # Find start of main text, break out to then print until we find end /div.
341 # Find start of main text, break out to then print until we find end /div.
339 # This may only work if there's a real title defined so we get a 'div class'
342 # This may only work if there's a real title defined so we get a 'div class'
340 # tag, I haven't really tried.
343 # tag, I haven't really tried.
341 for line in walker:
344 for line in walker:
342 if line.startswith('<body>'):
345 if line.startswith('<body>'):
343 break
346 break
344
347
345 newfname = os.path.splitext(infile)[0] + '.html'
348 newfname = os.path.splitext(infile)[0] + '.html'
346 with open(newfname, 'w') as f:
349 with open(newfname, 'w') as f:
347 for line in walker:
350 for line in walker:
348 if line.startswith('</body>'):
351 if line.startswith('</body>'):
349 break
352 break
350 f.write(line)
353 f.write(line)
351 f.write('\n')
354 f.write('\n')
352
355
353 return newfname
356 return newfname
354
357
358 known_formats = "rst (default), html, quick-html"
355
359
356 def main(infile, format='rst'):
360 def main(infile, format='rst'):
357 """Convert a notebook to html in one step"""
361 """Convert a notebook to html in one step"""
362 # XXX: this is just quick and dirty for now. When adding a new format,
363 # make sure to add it to the `known_formats` string above, which gets
364 # printed in in the catch-all else, as well as in the help
358 if format == 'rst':
365 if format == 'rst':
359 converter = ConverterRST(infile)
366 converter = ConverterRST(infile)
360 converter.render()
367 converter.render()
361 elif format == 'html':
368 elif format == 'html':
362 #Currently, conversion to html is a 2 step process, nb->rst->html
369 #Currently, conversion to html is a 2 step process, nb->rst->html
363 converter = ConverterRST(infile)
370 converter = ConverterRST(infile)
364 rstfname = converter.render()
371 rstfname = converter.render()
365 rst2simplehtml(rstfname)
372 rst2simplehtml(rstfname)
366 elif format == 'quick-html':
373 elif format == 'quick-html':
367 converter = ConverterQuickHTML(infile)
374 converter = ConverterQuickHTML(infile)
368 rstfname = converter.render()
375 rstfname = converter.render()
376 else:
377 raise SystemExit("Unknown format '%s', " % format +
378 "known formats are: " + known_formats)
369
379
370
380
371 if __name__ == '__main__':
372 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
373
381
382 if __name__ == '__main__':
383 parser = argparse.ArgumentParser(description=__doc__,
384 formatter_class=argparse.RawTextHelpFormatter)
374 # TODO: consider passing file like object around, rather than filenames
385 # TODO: consider passing file like object around, rather than filenames
375 # would allow us to process stdin, or even http streams
386 # would allow us to process stdin, or even http streams
376 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
387 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
377
388
378 #Require a filename as a positional argument
389 #Require a filename as a positional argument
379 parser.add_argument('infile', nargs=1)
390 parser.add_argument('infile', nargs=1)
380 parser.add_argument('-f', '--format', default='rst',
391 parser.add_argument('-f', '--format', default='rst',
381 help='Output format. Supported formats: rst (default), html.')
392 help='Output format. Supported formats: \n' +
393 known_formats)
382 args = parser.parse_args()
394 args = parser.parse_args()
383 main(infile=args.infile[0], format=args.format)
395 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now