##// END OF EJS Templates
new quick-html converter which has no dependencies
Paul Ivanov -
Show More
@@ -1,278 +1,383 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """A really simple notebook to rst/html exporter.
3
3
4 Usage
4 Usage
5
5
6 ./nb2html.py file.ipynb
6 ./nb2html.py file.ipynb
7
7
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
9 called nb_figure_NN.png.
10
10
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16 from IPython.external import argparse
16 from IPython.external import argparse
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import indent
18 from IPython.utils.text import indent
19 from decorators import DocInherit
19 from decorators import DocInherit
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30
30
31 def rst_directive(directive, text=''):
31 def rst_directive(directive, text=''):
32 out = [directive, '']
32 out = [directive, '']
33 if text:
33 if text:
34 out.extend([indent(text), ''])
34 out.extend([indent(text), ''])
35 return out
35 return out
36
36
37 # Converters for parts of a cell.
37 # Converters for parts of a cell.
38
38
39
39
40 class ConversionException(Exception):
40 class ConversionException(Exception):
41 pass
41 pass
42
42
43
43
44 class Converter(object):
44 class Converter(object):
45 default_encoding = 'utf-8'
45 default_encoding = 'utf-8'
46
46
47 def __init__(self, infile):
47 def __init__(self, infile):
48 self.infile = infile
48 self.infile = infile
49 self.dirpath = os.path.dirname(infile)
49 self.dirpath = os.path.dirname(infile)
50
50
51 @property
51 @property
52 def extension(self):
52 def extension(self):
53 raise ConversionException("""extension must be defined in Converter
53 raise ConversionException("""extension must be defined in Converter
54 subclass""")
54 subclass""")
55
55
56 def dispatch(self, cell_type):
56 def dispatch(self, cell_type):
57 """return cell_type dependent render method, for example render_code
57 """return cell_type dependent render method, for example render_code
58 """
58 """
59 # XXX: unknown_cell here is RST specific - make it generic
59 return getattr(self, 'render_' + cell_type, unknown_cell)
60 return getattr(self, 'render_' + cell_type, unknown_cell)
60
61
61 def convert(self):
62 def convert(self):
62 lines = []
63 lines = []
64 lines.extend(self.optional_header())
63 for cell in self.nb.worksheets[0].cells:
65 for cell in self.nb.worksheets[0].cells:
64 conv_fn = self.dispatch(cell.cell_type)
66 conv_fn = self.dispatch(cell.cell_type)
65 lines.extend(conv_fn(cell))
67 lines.extend(conv_fn(cell))
66 lines.append('')
68 lines.append('')
69 lines.extend(self.optional_footer())
67 return '\n'.join(lines)
70 return '\n'.join(lines)
68
71
69 def render(self):
72 def render(self):
70 "read, convert, and save self.infile"
73 "read, convert, and save self.infile"
71 self.read()
74 self.read()
72 self.output = self.convert()
75 self.output = self.convert()
73 return self.save()
76 return self.save()
74
77
75 def read(self):
78 def read(self):
76 "read and parse notebook into NotebookNode called self.nb"
79 "read and parse notebook into NotebookNode called self.nb"
77 with open(self.infile) as f:
80 with open(self.infile) as f:
78 self.nb = nbformat.read(f, 'json')
81 self.nb = nbformat.read(f, 'json')
79
82
80 def save(self, infile=None, encoding=None):
83 def save(self, infile=None, encoding=None):
81 "read and parse notebook into self.nb"
84 "read and parse notebook into self.nb"
82 if infile is None:
85 if infile is None:
83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
84 if encoding is None:
87 if encoding is None:
85 encoding = self.default_encoding
88 encoding = self.default_encoding
86 with open(infile, 'w') as f:
89 with open(infile, 'w') as f:
87 f.write(self.output.encode(encoding))
90 f.write(self.output.encode(encoding))
88 return infile
91 return infile
89
92
93 def optional_header():
94 pass
95
96 def optional_footer():
97 pass
98
90 def render_heading(self, cell):
99 def render_heading(self, cell):
91 """convert a heading cell
100 """convert a heading cell
92
101
93 Returns list."""
102 Returns list."""
94 raise NotImplementedError
103 raise NotImplementedError
95
104
96 def render_code(self, cell):
105 def render_code(self, cell):
97 """Convert a code cell
106 """Convert a code cell
98
107
99 Returns list."""
108 Returns list."""
100 raise NotImplementedError
109 raise NotImplementedError
101
110
102 def render_markdown(self, cell):
111 def render_markdown(self, cell):
103 """convert a markdown cell
112 """convert a markdown cell
104
113
105 Returns list."""
114 Returns list."""
106 raise NotImplementedError
115 raise NotImplementedError
107
116
108 def render_pyout(self, cell):
117 def render_pyout(self, cell):
109 """convert pyout part of a code cell
118 """convert pyout part of a code cell
110
119
111 Returns list."""
120 Returns list."""
112 raise NotImplementedError
121 raise NotImplementedError
113
122
114 def render_display_data(self, cell):
123 def render_display_data(self, cell):
115 """convert display data from the output of a code cell
124 """convert display data from the output of a code cell
116
125
117 Returns list.
126 Returns list.
118 """
127 """
119 raise NotImplementedError
128 raise NotImplementedError
120
129
121 def render_stream(self, cell):
130 def render_stream(self, cell):
122 """convert stream part of a code cell
131 """convert stream part of a code cell
123
132
124 Returns list."""
133 Returns list."""
125 raise NotImplementedError
134 raise NotImplementedError
126
135
127 def render_plaintext(self, cell):
136 def render_plaintext(self, cell):
128 """convert plain text
137 """convert plain text
129
138
130 Returns list."""
139 Returns list."""
131 raise NotImplementedError
140 raise NotImplementedError
132
141
133
142
134 class ConverterRST(Converter):
143 class ConverterRST(Converter):
135 extension = 'rst'
144 extension = 'rst'
136 figures_counter = 0
145 figures_counter = 0
137 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
138
147
139 @DocInherit
148 @DocInherit
140 def render_heading(self, cell):
149 def render_heading(self, cell):
141 marker = self.heading_level[cell.level]
150 marker = self.heading_level[cell.level]
142 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
143
152
144 @DocInherit
153 @DocInherit
145 def render_code(self, cell):
154 def render_code(self, cell):
146 if not cell.input:
155 if not cell.input:
147 return []
156 return []
148
157
149 lines = ['In[%s]:' % cell.prompt_number, '']
158 lines = ['In[%s]:' % cell.prompt_number, '']
150 lines.extend(rst_directive('.. code:: python', cell.input))
159 lines.extend(rst_directive('.. code:: python', cell.input))
151
160
152 for output in cell.outputs:
161 for output in cell.outputs:
153 conv_fn = self.dispatch(output.output_type)
162 conv_fn = self.dispatch(output.output_type)
154 lines.extend(conv_fn(output))
163 lines.extend(conv_fn(output))
155
164
156 return lines
165 return lines
157
166
158 @DocInherit
167 @DocInherit
159 def render_markdown(self, cell):
168 def render_markdown(self, cell):
160 return [cell.source]
169 return [cell.source]
161
170
162 @DocInherit
171 @DocInherit
163 def render_plaintext(self, cell):
172 def render_plaintext(self, cell):
164 return [cell.source]
173 return [cell.source]
165
174
166 @DocInherit
175 @DocInherit
167 def render_pyout(self, output):
176 def render_pyout(self, output):
168 lines = ['Out[%s]:' % output.prompt_number, '']
177 lines = ['Out[%s]:' % output.prompt_number, '']
169
178
170 # output is a dictionary like object with type as a key
179 # output is a dictionary like object with type as a key
171 if 'latex' in output:
180 if 'latex' in output:
172 lines.extend(rst_directive('.. math::', output.latex))
181 lines.extend(rst_directive('.. math::', output.latex))
173
182
174 if 'text' in output:
183 if 'text' in output:
175 lines.extend(rst_directive('.. parsed-literal::', output.text))
184 lines.extend(rst_directive('.. parsed-literal::', output.text))
176
185
177 return lines
186 return lines
178
187
179 @DocInherit
188 @DocInherit
180 def render_display_data(self, output):
189 def render_display_data(self, output):
181 lines = []
190 lines = []
182
191
183 if 'png' in output:
192 if 'png' in output:
184 infile = 'nb_figure_%s.png' % self.figures_counter
193 infile = 'nb_figure_%s.png' % self.figures_counter
185 fullname = os.path.join(self.dirpath, infile)
194 fullname = os.path.join(self.dirpath, infile)
186 with open(fullname, 'w') as f:
195 with open(fullname, 'w') as f:
187 f.write(output.png.decode('base64'))
196 f.write(output.png.decode('base64'))
188
197
189 self.figures_counter += 1
198 self.figures_counter += 1
190 lines.append('.. image:: %s' % infile)
199 lines.append('.. image:: %s' % infile)
191 lines.append('')
200 lines.append('')
192
201
193 return lines
202 return lines
194
203
195 @DocInherit
204 @DocInherit
196 def render_stream(self, output):
205 def render_stream(self, output):
197 lines = []
206 lines = []
198
207
199 if 'text' in output:
208 if 'text' in output:
200 lines.extend(rst_directive('.. parsed-literal::', output.text))
209 lines.extend(rst_directive('.. parsed-literal::', output.text))
201
210
202 return lines
211 return lines
203
212
213 class ConverterQuickHTML(Converter):
214 extension = 'html'
215 figures_counter = 0
216
217 def optional_header(self):
218 # XXX: inject the IPython standard CSS into here
219 s = """<html>
220 <head>
221 </head>
222
223 <body>
224 """
225 return s.splitlines()
226
227 def optional_footer(self):
228 s = """</body>
229 </html>
230 """
231 return s.splitlines()
232
233 @DocInherit
234 def render_heading(self, cell):
235 marker = cell.level
236 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
237
238 @DocInherit
239 def render_code(self, cell):
240 if not cell.input:
241 return []
242
243 lines = ['<table>']
244 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
245 lines.append("<br>\n".join(cell.input.splitlines()))
246 lines.append('</tt></td></tr>')
247
248 for output in cell.outputs:
249 lines.append('<tr><td></td><td>')
250 conv_fn = self.dispatch(output.output_type)
251 lines.extend(conv_fn(output))
252 lines.append('</td></tr>')
253
254 lines.append('</table>')
255 return lines
256
257 @DocInherit
258 def render_markdown(self, cell):
259 return ["<pre>"+cell.source+"</pre>"]
260
261 @DocInherit
262 def render_plaintext(self, cell):
263 return ["<pre>"+cell.source+"</pre>"]
264
265 @DocInherit
266 def render_pyout(self, output):
267 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
268
269 # output is a dictionary like object with type as a key
270 if 'latex' in output:
271 lines.append("<pre>")
272 lines.extend(indent(output.latex))
273 lines.append("</pre>")
274
275 if 'text' in output:
276 lines.append("<pre>")
277 lines.extend(indent(output.text))
278 lines.append("</pre>")
279
280 return lines
281
282 @DocInherit
283 def render_display_data(self, output):
284 lines = []
285
286 if 'png' in output:
287 infile = 'nb_figure_%s.png' % self.figures_counter
288 fullname = os.path.join(self.dirpath, infile)
289 with open(fullname, 'w') as f:
290 f.write(output.png.decode('base64'))
291
292 self.figures_counter += 1
293 lines.append('<img src="%s">' % infile)
294 lines.append('')
295
296 return lines
297
298 @DocInherit
299 def render_stream(self, output):
300 lines = []
301
302 if 'text' in output:
303 lines.append(output.text)
304
305 return lines
204
306
205 def rst2simplehtml(infile):
307 def rst2simplehtml(infile):
206 """Convert a rst file to simplified html suitable for blogger.
308 """Convert a rst file to simplified html suitable for blogger.
207
309
208 This just runs rst2html with certain parameters to produce really simple
310 This just runs rst2html with certain parameters to produce really simple
209 html and strips the document header, so the resulting file can be easily
311 html and strips the document header, so the resulting file can be easily
210 pasted into a blogger edit window.
312 pasted into a blogger edit window.
211 """
313 """
212
314
213 # This is the template for the rst2html call that produces the cleanest,
315 # This is the template for the rst2html call that produces the cleanest,
214 # simplest html I could find. This should help in making it easier to
316 # simplest html I could find. This should help in making it easier to
215 # paste into the blogspot html window, though I'm still having problems
317 # paste into the blogspot html window, though I'm still having problems
216 # with linebreaks there...
318 # with linebreaks there...
217 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
319 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
218 "--no-generator --no-datestamp --no-source-link "
320 "--no-generator --no-datestamp --no-source-link "
219 "--no-toc-backlinks --no-section-numbering "
321 "--no-toc-backlinks --no-section-numbering "
220 "--strip-comments ")
322 "--strip-comments ")
221
323
222 cmd = "%s %s" % (cmd_template, infile)
324 cmd = "%s %s" % (cmd_template, infile)
223 proc = subprocess.Popen(cmd,
325 proc = subprocess.Popen(cmd,
224 stdout=subprocess.PIPE,
326 stdout=subprocess.PIPE,
225 stderr=subprocess.PIPE,
327 stderr=subprocess.PIPE,
226 shell=True)
328 shell=True)
227 html, stderr = proc.communicate()
329 html, stderr = proc.communicate()
228 if stderr:
330 if stderr:
229 raise IOError(stderr)
331 raise IOError(stderr)
230
332
231 # Make an iterator so breaking out holds state. Our implementation of
333 # Make an iterator so breaking out holds state. Our implementation of
232 # searching for the html body below is basically a trivial little state
334 # searching for the html body below is basically a trivial little state
233 # machine, so we need this.
335 # machine, so we need this.
234 walker = iter(html.splitlines())
336 walker = iter(html.splitlines())
235
337
236 # Find start of main text, break out to then print until we find end /div.
338 # Find start of main text, break out to then print until we find end /div.
237 # This may only work if there's a real title defined so we get a 'div class'
339 # This may only work if there's a real title defined so we get a 'div class'
238 # tag, I haven't really tried.
340 # tag, I haven't really tried.
239 for line in walker:
341 for line in walker:
240 if line.startswith('<body>'):
342 if line.startswith('<body>'):
241 break
343 break
242
344
243 newfname = os.path.splitext(infile)[0] + '.html'
345 newfname = os.path.splitext(infile)[0] + '.html'
244 with open(newfname, 'w') as f:
346 with open(newfname, 'w') as f:
245 for line in walker:
347 for line in walker:
246 if line.startswith('</body>'):
348 if line.startswith('</body>'):
247 break
349 break
248 f.write(line)
350 f.write(line)
249 f.write('\n')
351 f.write('\n')
250
352
251 return newfname
353 return newfname
252
354
253
355
254 def main(infile, format='rst'):
356 def main(infile, format='rst'):
255 """Convert a notebook to html in one step"""
357 """Convert a notebook to html in one step"""
256 if format == 'rst':
358 if format == 'rst':
257 converter = ConverterRST(infile)
359 converter = ConverterRST(infile)
258 converter.render()
360 converter.render()
259 elif format == 'html':
361 elif format == 'html':
260 #Currently, conversion to html is a 2 step process, nb->rst->html
362 #Currently, conversion to html is a 2 step process, nb->rst->html
261 converter = ConverterRST(infile)
363 converter = ConverterRST(infile)
262 rstfname = converter.render()
364 rstfname = converter.render()
263 rst2simplehtml(rstfname)
365 rst2simplehtml(rstfname)
366 elif format == 'quick-html':
367 converter = ConverterQuickHTML(infile)
368 rstfname = converter.render()
264
369
265
370
266 if __name__ == '__main__':
371 if __name__ == '__main__':
267 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
372 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
268
373
269 # TODO: consider passing file like object around, rather than filenames
374 # TODO: consider passing file like object around, rather than filenames
270 # would allow us to process stdin, or even http streams
375 # would allow us to process stdin, or even http streams
271 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
376 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
272
377
273 #Require a filename as a positional argument
378 #Require a filename as a positional argument
274 parser.add_argument('infile', nargs=1)
379 parser.add_argument('infile', nargs=1)
275 parser.add_argument('-f', '--format', default='rst',
380 parser.add_argument('-f', '--format', default='rst',
276 help='Output format. Supported formats: rst (default), html.')
381 help='Output format. Supported formats: rst (default), html.')
277 args = parser.parse_args()
382 args = parser.parse_args()
278 main(infile=args.infile[0], format=args.format)
383 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now