##// END OF EJS Templates
closes #7
Paul Ivanov -
Show More
@@ -1,383 +1,383 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """A really simple notebook to rst/html exporter.
3
3
4 Usage
4 Usage
5
5
6 ./nb2html.py file.ipynb
6 ./nb2html.py file.ipynb
7
7
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
9 called nb_figure_NN.png.
10
10
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16 from IPython.external import argparse
16 from IPython.external import argparse
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import indent
18 from IPython.utils.text import indent
19 from decorators import DocInherit
19 from decorators import DocInherit
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30
30
31 def rst_directive(directive, text=''):
31 def rst_directive(directive, text=''):
32 out = [directive, '']
32 out = [directive, '']
33 if text:
33 if text:
34 out.extend([indent(text), ''])
34 out.extend([indent(text), ''])
35 return out
35 return out
36
36
37 # Converters for parts of a cell.
37 # Converters for parts of a cell.
38
38
39
39
40 class ConversionException(Exception):
40 class ConversionException(Exception):
41 pass
41 pass
42
42
43
43
44 class Converter(object):
44 class Converter(object):
45 default_encoding = 'utf-8'
45 default_encoding = 'utf-8'
46
46
47 def __init__(self, infile):
47 def __init__(self, infile):
48 self.infile = infile
48 self.infile = infile
49 self.dirpath = os.path.dirname(infile)
49 self.dirpath = os.path.dirname(infile)
50
50
51 @property
51 @property
52 def extension(self):
52 def extension(self):
53 raise ConversionException("""extension must be defined in Converter
53 raise ConversionException("""extension must be defined in Converter
54 subclass""")
54 subclass""")
55
55
56 def dispatch(self, cell_type):
56 def dispatch(self, cell_type):
57 """return cell_type dependent render method, for example render_code
57 """return cell_type dependent render method, for example render_code
58 """
58 """
59 # XXX: unknown_cell here is RST specific - make it generic
59 # XXX: unknown_cell here is RST specific - make it generic
60 return getattr(self, 'render_' + cell_type, unknown_cell)
60 return getattr(self, 'render_' + cell_type, unknown_cell)
61
61
62 def convert(self):
62 def convert(self):
63 lines = []
63 lines = []
64 lines.extend(self.optional_header())
64 lines.extend(self.optional_header())
65 for cell in self.nb.worksheets[0].cells:
65 for cell in self.nb.worksheets[0].cells:
66 conv_fn = self.dispatch(cell.cell_type)
66 conv_fn = self.dispatch(cell.cell_type)
67 lines.extend(conv_fn(cell))
67 lines.extend(conv_fn(cell))
68 lines.append('')
68 lines.append('')
69 lines.extend(self.optional_footer())
69 lines.extend(self.optional_footer())
70 return '\n'.join(lines)
70 return '\n'.join(lines)
71
71
72 def render(self):
72 def render(self):
73 "read, convert, and save self.infile"
73 "read, convert, and save self.infile"
74 self.read()
74 self.read()
75 self.output = self.convert()
75 self.output = self.convert()
76 return self.save()
76 return self.save()
77
77
78 def read(self):
78 def read(self):
79 "read and parse notebook into NotebookNode called self.nb"
79 "read and parse notebook into NotebookNode called self.nb"
80 with open(self.infile) as f:
80 with open(self.infile) as f:
81 self.nb = nbformat.read(f, 'json')
81 self.nb = nbformat.read(f, 'json')
82
82
83 def save(self, infile=None, encoding=None):
83 def save(self, infile=None, encoding=None):
84 "read and parse notebook into self.nb"
84 "read and parse notebook into self.nb"
85 if infile is None:
85 if infile is None:
86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
87 if encoding is None:
87 if encoding is None:
88 encoding = self.default_encoding
88 encoding = self.default_encoding
89 with open(infile, 'w') as f:
89 with open(infile, 'w') as f:
90 f.write(self.output.encode(encoding))
90 f.write(self.output.encode(encoding))
91 return infile
91 return infile
92
92
93 def optional_header():
93 def optional_header(self):
94 pass
94 return []
95
95
96 def optional_footer():
96 def optional_footer(self):
97 pass
97 return []
98
98
99 def render_heading(self, cell):
99 def render_heading(self, cell):
100 """convert a heading cell
100 """convert a heading cell
101
101
102 Returns list."""
102 Returns list."""
103 raise NotImplementedError
103 raise NotImplementedError
104
104
105 def render_code(self, cell):
105 def render_code(self, cell):
106 """Convert a code cell
106 """Convert a code cell
107
107
108 Returns list."""
108 Returns list."""
109 raise NotImplementedError
109 raise NotImplementedError
110
110
111 def render_markdown(self, cell):
111 def render_markdown(self, cell):
112 """convert a markdown cell
112 """convert a markdown cell
113
113
114 Returns list."""
114 Returns list."""
115 raise NotImplementedError
115 raise NotImplementedError
116
116
117 def render_pyout(self, cell):
117 def render_pyout(self, cell):
118 """convert pyout part of a code cell
118 """convert pyout part of a code cell
119
119
120 Returns list."""
120 Returns list."""
121 raise NotImplementedError
121 raise NotImplementedError
122
122
123 def render_display_data(self, cell):
123 def render_display_data(self, cell):
124 """convert display data from the output of a code cell
124 """convert display data from the output of a code cell
125
125
126 Returns list.
126 Returns list.
127 """
127 """
128 raise NotImplementedError
128 raise NotImplementedError
129
129
130 def render_stream(self, cell):
130 def render_stream(self, cell):
131 """convert stream part of a code cell
131 """convert stream part of a code cell
132
132
133 Returns list."""
133 Returns list."""
134 raise NotImplementedError
134 raise NotImplementedError
135
135
136 def render_plaintext(self, cell):
136 def render_plaintext(self, cell):
137 """convert plain text
137 """convert plain text
138
138
139 Returns list."""
139 Returns list."""
140 raise NotImplementedError
140 raise NotImplementedError
141
141
142
142
143 class ConverterRST(Converter):
143 class ConverterRST(Converter):
144 extension = 'rst'
144 extension = 'rst'
145 figures_counter = 0
145 figures_counter = 0
146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
147
147
148 @DocInherit
148 @DocInherit
149 def render_heading(self, cell):
149 def render_heading(self, cell):
150 marker = self.heading_level[cell.level]
150 marker = self.heading_level[cell.level]
151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
152
152
153 @DocInherit
153 @DocInherit
154 def render_code(self, cell):
154 def render_code(self, cell):
155 if not cell.input:
155 if not cell.input:
156 return []
156 return []
157
157
158 lines = ['In[%s]:' % cell.prompt_number, '']
158 lines = ['In[%s]:' % cell.prompt_number, '']
159 lines.extend(rst_directive('.. code:: python', cell.input))
159 lines.extend(rst_directive('.. code:: python', cell.input))
160
160
161 for output in cell.outputs:
161 for output in cell.outputs:
162 conv_fn = self.dispatch(output.output_type)
162 conv_fn = self.dispatch(output.output_type)
163 lines.extend(conv_fn(output))
163 lines.extend(conv_fn(output))
164
164
165 return lines
165 return lines
166
166
167 @DocInherit
167 @DocInherit
168 def render_markdown(self, cell):
168 def render_markdown(self, cell):
169 return [cell.source]
169 return [cell.source]
170
170
171 @DocInherit
171 @DocInherit
172 def render_plaintext(self, cell):
172 def render_plaintext(self, cell):
173 return [cell.source]
173 return [cell.source]
174
174
175 @DocInherit
175 @DocInherit
176 def render_pyout(self, output):
176 def render_pyout(self, output):
177 lines = ['Out[%s]:' % output.prompt_number, '']
177 lines = ['Out[%s]:' % output.prompt_number, '']
178
178
179 # output is a dictionary like object with type as a key
179 # output is a dictionary like object with type as a key
180 if 'latex' in output:
180 if 'latex' in output:
181 lines.extend(rst_directive('.. math::', output.latex))
181 lines.extend(rst_directive('.. math::', output.latex))
182
182
183 if 'text' in output:
183 if 'text' in output:
184 lines.extend(rst_directive('.. parsed-literal::', output.text))
184 lines.extend(rst_directive('.. parsed-literal::', output.text))
185
185
186 return lines
186 return lines
187
187
188 @DocInherit
188 @DocInherit
189 def render_display_data(self, output):
189 def render_display_data(self, output):
190 lines = []
190 lines = []
191
191
192 if 'png' in output:
192 if 'png' in output:
193 infile = 'nb_figure_%s.png' % self.figures_counter
193 infile = 'nb_figure_%s.png' % self.figures_counter
194 fullname = os.path.join(self.dirpath, infile)
194 fullname = os.path.join(self.dirpath, infile)
195 with open(fullname, 'w') as f:
195 with open(fullname, 'w') as f:
196 f.write(output.png.decode('base64'))
196 f.write(output.png.decode('base64'))
197
197
198 self.figures_counter += 1
198 self.figures_counter += 1
199 lines.append('.. image:: %s' % infile)
199 lines.append('.. image:: %s' % infile)
200 lines.append('')
200 lines.append('')
201
201
202 return lines
202 return lines
203
203
204 @DocInherit
204 @DocInherit
205 def render_stream(self, output):
205 def render_stream(self, output):
206 lines = []
206 lines = []
207
207
208 if 'text' in output:
208 if 'text' in output:
209 lines.extend(rst_directive('.. parsed-literal::', output.text))
209 lines.extend(rst_directive('.. parsed-literal::', output.text))
210
210
211 return lines
211 return lines
212
212
213 class ConverterQuickHTML(Converter):
213 class ConverterQuickHTML(Converter):
214 extension = 'html'
214 extension = 'html'
215 figures_counter = 0
215 figures_counter = 0
216
216
217 def optional_header(self):
217 def optional_header(self):
218 # XXX: inject the IPython standard CSS into here
218 # XXX: inject the IPython standard CSS into here
219 s = """<html>
219 s = """<html>
220 <head>
220 <head>
221 </head>
221 </head>
222
222
223 <body>
223 <body>
224 """
224 """
225 return s.splitlines()
225 return s.splitlines()
226
226
227 def optional_footer(self):
227 def optional_footer(self):
228 s = """</body>
228 s = """</body>
229 </html>
229 </html>
230 """
230 """
231 return s.splitlines()
231 return s.splitlines()
232
232
233 @DocInherit
233 @DocInherit
234 def render_heading(self, cell):
234 def render_heading(self, cell):
235 marker = cell.level
235 marker = cell.level
236 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
236 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
237
237
238 @DocInherit
238 @DocInherit
239 def render_code(self, cell):
239 def render_code(self, cell):
240 if not cell.input:
240 if not cell.input:
241 return []
241 return []
242
242
243 lines = ['<table>']
243 lines = ['<table>']
244 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
244 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
245 lines.append("<br>\n".join(cell.input.splitlines()))
245 lines.append("<br>\n".join(cell.input.splitlines()))
246 lines.append('</tt></td></tr>')
246 lines.append('</tt></td></tr>')
247
247
248 for output in cell.outputs:
248 for output in cell.outputs:
249 lines.append('<tr><td></td><td>')
249 lines.append('<tr><td></td><td>')
250 conv_fn = self.dispatch(output.output_type)
250 conv_fn = self.dispatch(output.output_type)
251 lines.extend(conv_fn(output))
251 lines.extend(conv_fn(output))
252 lines.append('</td></tr>')
252 lines.append('</td></tr>')
253
253
254 lines.append('</table>')
254 lines.append('</table>')
255 return lines
255 return lines
256
256
257 @DocInherit
257 @DocInherit
258 def render_markdown(self, cell):
258 def render_markdown(self, cell):
259 return ["<pre>"+cell.source+"</pre>"]
259 return ["<pre>"+cell.source+"</pre>"]
260
260
261 @DocInherit
261 @DocInherit
262 def render_plaintext(self, cell):
262 def render_plaintext(self, cell):
263 return ["<pre>"+cell.source+"</pre>"]
263 return ["<pre>"+cell.source+"</pre>"]
264
264
265 @DocInherit
265 @DocInherit
266 def render_pyout(self, output):
266 def render_pyout(self, output):
267 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
267 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
268
268
269 # output is a dictionary like object with type as a key
269 # output is a dictionary like object with type as a key
270 if 'latex' in output:
270 if 'latex' in output:
271 lines.append("<pre>")
271 lines.append("<pre>")
272 lines.extend(indent(output.latex))
272 lines.extend(indent(output.latex))
273 lines.append("</pre>")
273 lines.append("</pre>")
274
274
275 if 'text' in output:
275 if 'text' in output:
276 lines.append("<pre>")
276 lines.append("<pre>")
277 lines.extend(indent(output.text))
277 lines.extend(indent(output.text))
278 lines.append("</pre>")
278 lines.append("</pre>")
279
279
280 return lines
280 return lines
281
281
282 @DocInherit
282 @DocInherit
283 def render_display_data(self, output):
283 def render_display_data(self, output):
284 lines = []
284 lines = []
285
285
286 if 'png' in output:
286 if 'png' in output:
287 infile = 'nb_figure_%s.png' % self.figures_counter
287 infile = 'nb_figure_%s.png' % self.figures_counter
288 fullname = os.path.join(self.dirpath, infile)
288 fullname = os.path.join(self.dirpath, infile)
289 with open(fullname, 'w') as f:
289 with open(fullname, 'w') as f:
290 f.write(output.png.decode('base64'))
290 f.write(output.png.decode('base64'))
291
291
292 self.figures_counter += 1
292 self.figures_counter += 1
293 lines.append('<img src="%s">' % infile)
293 lines.append('<img src="%s">' % infile)
294 lines.append('')
294 lines.append('')
295
295
296 return lines
296 return lines
297
297
298 @DocInherit
298 @DocInherit
299 def render_stream(self, output):
299 def render_stream(self, output):
300 lines = []
300 lines = []
301
301
302 if 'text' in output:
302 if 'text' in output:
303 lines.append(output.text)
303 lines.append(output.text)
304
304
305 return lines
305 return lines
306
306
307 def rst2simplehtml(infile):
307 def rst2simplehtml(infile):
308 """Convert a rst file to simplified html suitable for blogger.
308 """Convert a rst file to simplified html suitable for blogger.
309
309
310 This just runs rst2html with certain parameters to produce really simple
310 This just runs rst2html with certain parameters to produce really simple
311 html and strips the document header, so the resulting file can be easily
311 html and strips the document header, so the resulting file can be easily
312 pasted into a blogger edit window.
312 pasted into a blogger edit window.
313 """
313 """
314
314
315 # This is the template for the rst2html call that produces the cleanest,
315 # This is the template for the rst2html call that produces the cleanest,
316 # simplest html I could find. This should help in making it easier to
316 # simplest html I could find. This should help in making it easier to
317 # paste into the blogspot html window, though I'm still having problems
317 # paste into the blogspot html window, though I'm still having problems
318 # with linebreaks there...
318 # with linebreaks there...
319 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
319 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
320 "--no-generator --no-datestamp --no-source-link "
320 "--no-generator --no-datestamp --no-source-link "
321 "--no-toc-backlinks --no-section-numbering "
321 "--no-toc-backlinks --no-section-numbering "
322 "--strip-comments ")
322 "--strip-comments ")
323
323
324 cmd = "%s %s" % (cmd_template, infile)
324 cmd = "%s %s" % (cmd_template, infile)
325 proc = subprocess.Popen(cmd,
325 proc = subprocess.Popen(cmd,
326 stdout=subprocess.PIPE,
326 stdout=subprocess.PIPE,
327 stderr=subprocess.PIPE,
327 stderr=subprocess.PIPE,
328 shell=True)
328 shell=True)
329 html, stderr = proc.communicate()
329 html, stderr = proc.communicate()
330 if stderr:
330 if stderr:
331 raise IOError(stderr)
331 raise IOError(stderr)
332
332
333 # Make an iterator so breaking out holds state. Our implementation of
333 # Make an iterator so breaking out holds state. Our implementation of
334 # searching for the html body below is basically a trivial little state
334 # searching for the html body below is basically a trivial little state
335 # machine, so we need this.
335 # machine, so we need this.
336 walker = iter(html.splitlines())
336 walker = iter(html.splitlines())
337
337
338 # Find start of main text, break out to then print until we find end /div.
338 # Find start of main text, break out to then print until we find end /div.
339 # This may only work if there's a real title defined so we get a 'div class'
339 # This may only work if there's a real title defined so we get a 'div class'
340 # tag, I haven't really tried.
340 # tag, I haven't really tried.
341 for line in walker:
341 for line in walker:
342 if line.startswith('<body>'):
342 if line.startswith('<body>'):
343 break
343 break
344
344
345 newfname = os.path.splitext(infile)[0] + '.html'
345 newfname = os.path.splitext(infile)[0] + '.html'
346 with open(newfname, 'w') as f:
346 with open(newfname, 'w') as f:
347 for line in walker:
347 for line in walker:
348 if line.startswith('</body>'):
348 if line.startswith('</body>'):
349 break
349 break
350 f.write(line)
350 f.write(line)
351 f.write('\n')
351 f.write('\n')
352
352
353 return newfname
353 return newfname
354
354
355
355
356 def main(infile, format='rst'):
356 def main(infile, format='rst'):
357 """Convert a notebook to html in one step"""
357 """Convert a notebook to html in one step"""
358 if format == 'rst':
358 if format == 'rst':
359 converter = ConverterRST(infile)
359 converter = ConverterRST(infile)
360 converter.render()
360 converter.render()
361 elif format == 'html':
361 elif format == 'html':
362 #Currently, conversion to html is a 2 step process, nb->rst->html
362 #Currently, conversion to html is a 2 step process, nb->rst->html
363 converter = ConverterRST(infile)
363 converter = ConverterRST(infile)
364 rstfname = converter.render()
364 rstfname = converter.render()
365 rst2simplehtml(rstfname)
365 rst2simplehtml(rstfname)
366 elif format == 'quick-html':
366 elif format == 'quick-html':
367 converter = ConverterQuickHTML(infile)
367 converter = ConverterQuickHTML(infile)
368 rstfname = converter.render()
368 rstfname = converter.render()
369
369
370
370
371 if __name__ == '__main__':
371 if __name__ == '__main__':
372 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
372 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
373
373
374 # TODO: consider passing file like object around, rather than filenames
374 # TODO: consider passing file like object around, rather than filenames
375 # would allow us to process stdin, or even http streams
375 # would allow us to process stdin, or even http streams
376 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
376 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
377
377
378 #Require a filename as a positional argument
378 #Require a filename as a positional argument
379 parser.add_argument('infile', nargs=1)
379 parser.add_argument('infile', nargs=1)
380 parser.add_argument('-f', '--format', default='rst',
380 parser.add_argument('-f', '--format', default='rst',
381 help='Output format. Supported formats: rst (default), html.')
381 help='Output format. Supported formats: rst (default), html.')
382 args = parser.parse_args()
382 args = parser.parse_args()
383 main(infile=args.infile[0], format=args.format)
383 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now