##// END OF EJS Templates
closes #7
Paul Ivanov -
Show More
@@ -1,383 +1,383 b''
1 1 #!/usr/bin/env python
2 2 """A really simple notebook to rst/html exporter.
3 3
4 4 Usage
5 5
6 6 ./nb2html.py file.ipynb
7 7
8 8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 9 called nb_figure_NN.png.
10 10
11 11 """
12 12
13 13 import os
14 14 import subprocess
15 15 import sys
16 16 from IPython.external import argparse
17 17 from IPython.nbformat import current as nbformat
18 18 from IPython.utils.text import indent
19 19 from decorators import DocInherit
20 20
21 21 # Cell converters
22 22
23 23 def unknown_cell(cell):
24 24 """Default converter for cells of unknown type.
25 25 """
26 26
27 27 return rst_directive('.. warning:: Unknown cell') + \
28 28 [repr(cell)]
29 29
30 30
31 31 def rst_directive(directive, text=''):
32 32 out = [directive, '']
33 33 if text:
34 34 out.extend([indent(text), ''])
35 35 return out
36 36
37 37 # Converters for parts of a cell.
38 38
39 39
40 40 class ConversionException(Exception):
41 41 pass
42 42
43 43
44 44 class Converter(object):
45 45 default_encoding = 'utf-8'
46 46
47 47 def __init__(self, infile):
48 48 self.infile = infile
49 49 self.dirpath = os.path.dirname(infile)
50 50
51 51 @property
52 52 def extension(self):
53 53 raise ConversionException("""extension must be defined in Converter
54 54 subclass""")
55 55
56 56 def dispatch(self, cell_type):
57 57 """return cell_type dependent render method, for example render_code
58 58 """
59 59 # XXX: unknown_cell here is RST specific - make it generic
60 60 return getattr(self, 'render_' + cell_type, unknown_cell)
61 61
62 62 def convert(self):
63 63 lines = []
64 64 lines.extend(self.optional_header())
65 65 for cell in self.nb.worksheets[0].cells:
66 66 conv_fn = self.dispatch(cell.cell_type)
67 67 lines.extend(conv_fn(cell))
68 68 lines.append('')
69 69 lines.extend(self.optional_footer())
70 70 return '\n'.join(lines)
71 71
72 72 def render(self):
73 73 "read, convert, and save self.infile"
74 74 self.read()
75 75 self.output = self.convert()
76 76 return self.save()
77 77
78 78 def read(self):
79 79 "read and parse notebook into NotebookNode called self.nb"
80 80 with open(self.infile) as f:
81 81 self.nb = nbformat.read(f, 'json')
82 82
83 83 def save(self, infile=None, encoding=None):
84 84 "read and parse notebook into self.nb"
85 85 if infile is None:
86 86 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
87 87 if encoding is None:
88 88 encoding = self.default_encoding
89 89 with open(infile, 'w') as f:
90 90 f.write(self.output.encode(encoding))
91 91 return infile
92 92
93 def optional_header():
94 pass
93 def optional_header(self):
94 return []
95 95
96 def optional_footer():
97 pass
96 def optional_footer(self):
97 return []
98 98
99 99 def render_heading(self, cell):
100 100 """convert a heading cell
101 101
102 102 Returns list."""
103 103 raise NotImplementedError
104 104
105 105 def render_code(self, cell):
106 106 """Convert a code cell
107 107
108 108 Returns list."""
109 109 raise NotImplementedError
110 110
111 111 def render_markdown(self, cell):
112 112 """convert a markdown cell
113 113
114 114 Returns list."""
115 115 raise NotImplementedError
116 116
117 117 def render_pyout(self, cell):
118 118 """convert pyout part of a code cell
119 119
120 120 Returns list."""
121 121 raise NotImplementedError
122 122
123 123 def render_display_data(self, cell):
124 124 """convert display data from the output of a code cell
125 125
126 126 Returns list.
127 127 """
128 128 raise NotImplementedError
129 129
130 130 def render_stream(self, cell):
131 131 """convert stream part of a code cell
132 132
133 133 Returns list."""
134 134 raise NotImplementedError
135 135
136 136 def render_plaintext(self, cell):
137 137 """convert plain text
138 138
139 139 Returns list."""
140 140 raise NotImplementedError
141 141
142 142
143 143 class ConverterRST(Converter):
144 144 extension = 'rst'
145 145 figures_counter = 0
146 146 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
147 147
148 148 @DocInherit
149 149 def render_heading(self, cell):
150 150 marker = self.heading_level[cell.level]
151 151 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
152 152
153 153 @DocInherit
154 154 def render_code(self, cell):
155 155 if not cell.input:
156 156 return []
157 157
158 158 lines = ['In[%s]:' % cell.prompt_number, '']
159 159 lines.extend(rst_directive('.. code:: python', cell.input))
160 160
161 161 for output in cell.outputs:
162 162 conv_fn = self.dispatch(output.output_type)
163 163 lines.extend(conv_fn(output))
164 164
165 165 return lines
166 166
167 167 @DocInherit
168 168 def render_markdown(self, cell):
169 169 return [cell.source]
170 170
171 171 @DocInherit
172 172 def render_plaintext(self, cell):
173 173 return [cell.source]
174 174
175 175 @DocInherit
176 176 def render_pyout(self, output):
177 177 lines = ['Out[%s]:' % output.prompt_number, '']
178 178
179 179 # output is a dictionary like object with type as a key
180 180 if 'latex' in output:
181 181 lines.extend(rst_directive('.. math::', output.latex))
182 182
183 183 if 'text' in output:
184 184 lines.extend(rst_directive('.. parsed-literal::', output.text))
185 185
186 186 return lines
187 187
188 188 @DocInherit
189 189 def render_display_data(self, output):
190 190 lines = []
191 191
192 192 if 'png' in output:
193 193 infile = 'nb_figure_%s.png' % self.figures_counter
194 194 fullname = os.path.join(self.dirpath, infile)
195 195 with open(fullname, 'w') as f:
196 196 f.write(output.png.decode('base64'))
197 197
198 198 self.figures_counter += 1
199 199 lines.append('.. image:: %s' % infile)
200 200 lines.append('')
201 201
202 202 return lines
203 203
204 204 @DocInherit
205 205 def render_stream(self, output):
206 206 lines = []
207 207
208 208 if 'text' in output:
209 209 lines.extend(rst_directive('.. parsed-literal::', output.text))
210 210
211 211 return lines
212 212
213 213 class ConverterQuickHTML(Converter):
214 214 extension = 'html'
215 215 figures_counter = 0
216 216
217 217 def optional_header(self):
218 218 # XXX: inject the IPython standard CSS into here
219 219 s = """<html>
220 220 <head>
221 221 </head>
222 222
223 223 <body>
224 224 """
225 225 return s.splitlines()
226 226
227 227 def optional_footer(self):
228 228 s = """</body>
229 229 </html>
230 230 """
231 231 return s.splitlines()
232 232
233 233 @DocInherit
234 234 def render_heading(self, cell):
235 235 marker = cell.level
236 236 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
237 237
238 238 @DocInherit
239 239 def render_code(self, cell):
240 240 if not cell.input:
241 241 return []
242 242
243 243 lines = ['<table>']
244 244 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
245 245 lines.append("<br>\n".join(cell.input.splitlines()))
246 246 lines.append('</tt></td></tr>')
247 247
248 248 for output in cell.outputs:
249 249 lines.append('<tr><td></td><td>')
250 250 conv_fn = self.dispatch(output.output_type)
251 251 lines.extend(conv_fn(output))
252 252 lines.append('</td></tr>')
253 253
254 254 lines.append('</table>')
255 255 return lines
256 256
257 257 @DocInherit
258 258 def render_markdown(self, cell):
259 259 return ["<pre>"+cell.source+"</pre>"]
260 260
261 261 @DocInherit
262 262 def render_plaintext(self, cell):
263 263 return ["<pre>"+cell.source+"</pre>"]
264 264
265 265 @DocInherit
266 266 def render_pyout(self, output):
267 267 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
268 268
269 269 # output is a dictionary like object with type as a key
270 270 if 'latex' in output:
271 271 lines.append("<pre>")
272 272 lines.extend(indent(output.latex))
273 273 lines.append("</pre>")
274 274
275 275 if 'text' in output:
276 276 lines.append("<pre>")
277 277 lines.extend(indent(output.text))
278 278 lines.append("</pre>")
279 279
280 280 return lines
281 281
282 282 @DocInherit
283 283 def render_display_data(self, output):
284 284 lines = []
285 285
286 286 if 'png' in output:
287 287 infile = 'nb_figure_%s.png' % self.figures_counter
288 288 fullname = os.path.join(self.dirpath, infile)
289 289 with open(fullname, 'w') as f:
290 290 f.write(output.png.decode('base64'))
291 291
292 292 self.figures_counter += 1
293 293 lines.append('<img src="%s">' % infile)
294 294 lines.append('')
295 295
296 296 return lines
297 297
298 298 @DocInherit
299 299 def render_stream(self, output):
300 300 lines = []
301 301
302 302 if 'text' in output:
303 303 lines.append(output.text)
304 304
305 305 return lines
306 306
307 307 def rst2simplehtml(infile):
308 308 """Convert a rst file to simplified html suitable for blogger.
309 309
310 310 This just runs rst2html with certain parameters to produce really simple
311 311 html and strips the document header, so the resulting file can be easily
312 312 pasted into a blogger edit window.
313 313 """
314 314
315 315 # This is the template for the rst2html call that produces the cleanest,
316 316 # simplest html I could find. This should help in making it easier to
317 317 # paste into the blogspot html window, though I'm still having problems
318 318 # with linebreaks there...
319 319 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
320 320 "--no-generator --no-datestamp --no-source-link "
321 321 "--no-toc-backlinks --no-section-numbering "
322 322 "--strip-comments ")
323 323
324 324 cmd = "%s %s" % (cmd_template, infile)
325 325 proc = subprocess.Popen(cmd,
326 326 stdout=subprocess.PIPE,
327 327 stderr=subprocess.PIPE,
328 328 shell=True)
329 329 html, stderr = proc.communicate()
330 330 if stderr:
331 331 raise IOError(stderr)
332 332
333 333 # Make an iterator so breaking out holds state. Our implementation of
334 334 # searching for the html body below is basically a trivial little state
335 335 # machine, so we need this.
336 336 walker = iter(html.splitlines())
337 337
338 338 # Find start of main text, break out to then print until we find end /div.
339 339 # This may only work if there's a real title defined so we get a 'div class'
340 340 # tag, I haven't really tried.
341 341 for line in walker:
342 342 if line.startswith('<body>'):
343 343 break
344 344
345 345 newfname = os.path.splitext(infile)[0] + '.html'
346 346 with open(newfname, 'w') as f:
347 347 for line in walker:
348 348 if line.startswith('</body>'):
349 349 break
350 350 f.write(line)
351 351 f.write('\n')
352 352
353 353 return newfname
354 354
355 355
356 356 def main(infile, format='rst'):
357 357 """Convert a notebook to html in one step"""
358 358 if format == 'rst':
359 359 converter = ConverterRST(infile)
360 360 converter.render()
361 361 elif format == 'html':
362 362 #Currently, conversion to html is a 2 step process, nb->rst->html
363 363 converter = ConverterRST(infile)
364 364 rstfname = converter.render()
365 365 rst2simplehtml(rstfname)
366 366 elif format == 'quick-html':
367 367 converter = ConverterQuickHTML(infile)
368 368 rstfname = converter.render()
369 369
370 370
371 371 if __name__ == '__main__':
372 372 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
373 373
374 374 # TODO: consider passing file like object around, rather than filenames
375 375 # would allow us to process stdin, or even http streams
376 376 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
377 377
378 378 #Require a filename as a positional argument
379 379 parser.add_argument('infile', nargs=1)
380 380 parser.add_argument('-f', '--format', default='rst',
381 381 help='Output format. Supported formats: rst (default), html.')
382 382 args = parser.parse_args()
383 383 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now