##// END OF EJS Templates
Improve SVG support, other small fixes.
Fernando Perez -
Show More
@@ -1,557 +1,595 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format html file.ipynb
6 6
7 7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 10 pretty.
11 11 """
12 12 from __future__ import print_function
13 13
14 import codecs
14 15 import os
15 16 import pprint
16 17 import re
17 18 import subprocess
18 19 import sys
19 20
20 21 from IPython.external import argparse
21 22 from IPython.nbformat import current as nbformat
22 23 from IPython.utils.text import indent
23 24 from decorators import DocInherit
24 25
25 26 def remove_ansi(src):
26 27 """Strip all ANSI color escape sequences from input string.
27 28
28 29 Parameters
29 30 ----------
30 31 src : string
31 32
32 33 Returns
33 34 -------
34 35 string
35 36 """
36 37 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
37 38
38 39 # Pandoc-dependent code
39 40 def markdown2latex(src):
40 41 """Convert a markdown string to LaTeX via pandoc.
41 42
42 43 This function will raise an error if pandoc is not installed.
43 44
44 45 Any error messages generated by pandoc are printed to stderr.
45 46
46 47 Parameters
47 48 ----------
48 49 src : string
49 50 Input string, assumed to be valid markdown.
50 51
51 52 Returns
52 53 -------
53 54 out : string
54 55 Output as returned by pandoc.
55 56 """
56 57 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
57 58 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
58 59 out, err = p.communicate(src)
59 60 if err:
60 61 print(err, file=sys.stderr)
61 62 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
62 63 return out
63 64
64 65 # Cell converters
65 66
66 67
67 68 def rst_directive(directive, text=''):
68 69 out = [directive, '']
69 70 if text:
70 71 out.extend([indent(text), ''])
71 72 return out
72 73
73 74 # Converters for parts of a cell.
74 75
75 76
76 77 class ConversionException(Exception):
77 78 pass
78 79
79 80
80 81 class Converter(object):
81 82 default_encoding = 'utf-8'
83 extension = str()
82 84 figures_counter = 0
83
85 infile = str()
86 infile_dir = str()
87 infile_root = str()
88 files_dir = str()
89
84 90 def __init__(self, infile):
85 91 self.infile = infile
86 self.dirpath = os.path.dirname(infile)
87
88 @property
89 def extension(self):
90 raise ConversionException("""extension must be defined in Converter
91 subclass""")
92 self.infile_dir = os.path.dirname(infile)
93 infile_root = os.path.splitext(infile)[0]
94 files_dir = infile_root + '_files'
95 if not os.path.isdir(files_dir):
96 os.mkdir(files_dir)
97 self.infile_root = infile_root
98 self.files_dir = files_dir
92 99
93 100 def dispatch(self, cell_type):
94 101 """return cell_type dependent render method, for example render_code
95 102 """
96 103 return getattr(self, 'render_' + cell_type, self.render_unknown)
97 104
98 105 def convert(self):
99 106 lines = []
100 107 lines.extend(self.optional_header())
101 108 for worksheet in self.nb.worksheets:
102 109 for cell in worksheet.cells:
103 110 conv_fn = self.dispatch(cell.cell_type)
104 111 lines.extend(conv_fn(cell))
105 112 lines.append('')
106 113 lines.extend(self.optional_footer())
107 114 return '\n'.join(lines)
108 115
109 116 def render(self):
110 117 "read, convert, and save self.infile"
111 118 self.read()
112 119 self.output = self.convert()
113 120 return self.save()
114 121
115 122 def read(self):
116 123 "read and parse notebook into NotebookNode called self.nb"
117 124 with open(self.infile) as f:
118 125 self.nb = nbformat.read(f, 'json')
119 126
120 127 def save(self, infile=None, encoding=None):
121 128 "read and parse notebook into self.nb"
122 129 if infile is None:
123 130 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
124 131 if encoding is None:
125 132 encoding = self.default_encoding
126 133 with open(infile, 'w') as f:
127 134 f.write(self.output.encode(encoding))
128 135 return infile
129 136
130 137 def optional_header(self):
131 138 return []
132 139
133 140 def optional_footer(self):
134 141 return []
135 142
136 def _new_figure(self, data, format):
137 basename = self.infile.replace('.ipynb', '')
138 figname = '%s_fig_%02i.%s' % (basename, self.figures_counter, format)
143 def _new_figure(self, data, fmt):
144 """Create a new figure file in the given format.
145
146 Returns a path relative to the input file.
147 """
148 figname = '%s_fig_%02i.%s' % (self.infile_root,
149 self.figures_counter, fmt)
139 150 self.figures_counter += 1
140 fullname = os.path.join(self.dirpath, figname)
141 with open(fullname, 'w') as f:
142 f.write(data.decode('base64'))
143 return figname
151 fullname = os.path.join(self.files_dir, figname)
152
153 # Binary files are base64-encoded, SVG is already XML
154 if fmt in ('png', 'jpg', 'pdf'):
155 data = data.decode('base64')
156 fopen = lambda fname: open(fname, 'wb')
157 else:
158 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
159
160 with fopen(fullname) as f:
161 f.write(data)
162
163 return fullname
144 164
145 165 def render_heading(self, cell):
146 166 """convert a heading cell
147 167
148 168 Returns list."""
149 169 raise NotImplementedError
150 170
151 171 def render_code(self, cell):
152 172 """Convert a code cell
153 173
154 174 Returns list."""
155 175 raise NotImplementedError
156 176
157 177 def render_markdown(self, cell):
158 178 """convert a markdown cell
159 179
160 180 Returns list."""
161 181 raise NotImplementedError
162 182
163 183 def render_pyout(self, output):
164 184 """convert pyout part of a code cell
165 185
166 186 Returns list."""
167 187 raise NotImplementedError
168 188
169 189
170 190 def render_pyerr(self, output):
171 191 """convert pyerr part of a code cell
172 192
173 193 Returns list."""
174 194 raise NotImplementedError
175 195
196 def _img_lines(self, img_file):
197 """Return list of lines to include an image file."""
198 # Note: subclasses may choose to implement format-specific _FMT_lines
199 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
200 raise NotImplementedError
201
176 202 def render_display_data(self, output):
177 203 """convert display data from the output of a code cell
178 204
179 205 Returns list.
180 206 """
181 raise NotImplementedError
207 lines = []
208
209 for fmt in ['png', 'svg', 'jpg', 'pdf']:
210 if fmt in output:
211 img_file = self._new_figure(output[fmt], fmt)
212 # Subclasses can have format-specific render functions (e.g.,
213 # latex has to auto-convert all SVG to PDF first).
214 lines_fun = getattr(self, '_%s_lines' % fmt, None)
215 if not lines_fun:
216 lines_fun = self._img_lines
217 lines.extend(lines_fun(img_file))
218
219 return lines
182 220
183 221 def render_stream(self, cell):
184 222 """convert stream part of a code cell
185 223
186 224 Returns list."""
187 225 raise NotImplementedError
188 226
189 227 def render_plaintext(self, cell):
190 228 """convert plain text
191 229
192 230 Returns list."""
193 231 raise NotImplementedError
194 232
195 233 def render_unknown(self, cell):
196 234 """Render cells of unkown type
197 235
198 236 Returns list."""
199 237 raise NotImplementedError
200 238
201 239
202 240 class ConverterRST(Converter):
203 241 extension = 'rst'
204 242 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
205 243
206 244 @DocInherit
207 245 def render_heading(self, cell):
208 246 marker = self.heading_level[cell.level]
209 247 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
210 248
211 249 @DocInherit
212 250 def render_code(self, cell):
213 251 if not cell.input:
214 252 return []
215 253
216 254 lines = ['In[%s]:' % cell.prompt_number, '']
217 255 lines.extend(rst_directive('.. code:: python', cell.input))
218 256
219 257 for output in cell.outputs:
220 258 conv_fn = self.dispatch(output.output_type)
221 259 lines.extend(conv_fn(output))
222 260
223 261 return lines
224 262
225 263 @DocInherit
226 264 def render_markdown(self, cell):
227 265 return [cell.source]
228 266
229 267 @DocInherit
230 268 def render_plaintext(self, cell):
231 269 return [cell.source]
232 270
233 271 @DocInherit
234 272 def render_pyout(self, output):
235 273 lines = ['Out[%s]:' % output.prompt_number, '']
236 274
237 275 # output is a dictionary like object with type as a key
238 276 if 'latex' in output:
239 277 lines.extend(rst_directive('.. math::', output.latex))
240 278
241 279 if 'text' in output:
242 280 lines.extend(rst_directive('.. parsed-literal::', output.text))
243 281
244 282 return lines
245 283
246 284 @DocInherit
247 def render_display_data(self, output):
248 lines = []
249
250 if 'png' in output:
251 figfile = self._new_figure(output.png, 'png')
252 lines.append('.. image:: %s' % figfile)
253 lines.append('')
254
255 return lines
256
285 def _img_lines(self, img_file):
286 return ['.. image:: %s' % figfile, '']
287
257 288 @DocInherit
258 289 def render_stream(self, output):
259 290 lines = []
260 291
261 292 if 'text' in output:
262 293 lines.extend(rst_directive('.. parsed-literal::', output.text))
263 294
264 295 return lines
265 296
266 297 @DocInherit
267 298 def render_unknown(self, cell):
268 299 return rst_directive('.. warning:: Unknown cell') + [repr(cell)]
269 300
270 301 class ConverterQuickHTML(Converter):
271 302 extension = 'html'
272 303
273 304 def optional_header(self):
274 305 # XXX: inject the IPython standard CSS into here
275 306 s = """<html>
276 307 <head>
277 308 </head>
278 309
279 310 <body>
280 311 """
281 312 return s.splitlines()
282 313
283 314 def optional_footer(self):
284 315 s = """</body>
285 316 </html>
286 317 """
287 318 return s.splitlines()
288 319
289 320 @DocInherit
290 321 def render_heading(self, cell):
291 322 marker = cell.level
292 323 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
293 324
294 325 @DocInherit
295 326 def render_code(self, cell):
296 327 if not cell.input:
297 328 return []
298 329
299 330 lines = ['<table>']
300 331 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
301 332 lines.append("<br>\n".join(cell.input.splitlines()))
302 333 lines.append('</tt></td></tr>')
303 334
304 335 for output in cell.outputs:
305 336 lines.append('<tr><td></td><td>')
306 337 conv_fn = self.dispatch(output.output_type)
307 338 lines.extend(conv_fn(output))
308 339 lines.append('</td></tr>')
309 340
310 341 lines.append('</table>')
311 342 return lines
312 343
313 344 @DocInherit
314 345 def render_markdown(self, cell):
315 346 return ["<pre>"+cell.source+"</pre>"]
316 347
317 348 @DocInherit
318 349 def render_plaintext(self, cell):
319 350 return ["<pre>"+cell.source+"</pre>"]
320 351
321 352 @DocInherit
322 353 def render_pyout(self, output):
323 354 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
324 355
325 356 # output is a dictionary like object with type as a key
326 357 if 'latex' in output:
327 358 lines.append("<pre>")
328 359 lines.extend(indent(output.latex))
329 360 lines.append("</pre>")
330 361
331 362 if 'text' in output:
332 363 lines.append("<pre>")
333 364 lines.extend(indent(output.text))
334 365 lines.append("</pre>")
335 366
336 367 return lines
337 368
338 369 @DocInherit
339 def render_display_data(self, output):
340 lines = []
341
342 if 'png' in output:
343 infile = 'nb_figure_%s.png' % self.figures_counter
344 fullname = os.path.join(self.dirpath, infile)
345 with open(fullname, 'w') as f:
346 f.write(output.png.decode('base64'))
347
348 self.figures_counter += 1
349 lines.append('<img src="%s">' % infile)
350 lines.append('')
351
352 return lines
370 def _img_lines(self, img_file):
371 return ['<img src="%s">' % img_file, '']
353 372
354 373 @DocInherit
355 374 def render_stream(self, output):
356 375 lines = []
357 376
358 377 if 'text' in output:
359 378 lines.append(output.text)
360 379
361 380 return lines
362 381
363 382
364 383 class ConverterLaTeX(Converter):
384 """Converts a notebook to a .tex file suitable for pdflatex.
385
386 Note: this converter *needs*:
387
388 - `pandoc`: for all conversion of markdown cells. If your notebook only
389 has Raw cells, pandoc will not be needed.
390
391 - `inkscape`: if your notebook has SVG figures. These need to be
392 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
393 understand SVG natively.
394
395 You will in general obtain much better final PDF results if you configure
396 the matplotlib backend to create SVG output with
397
398 %config InlineBackend.figure_format = 'svg'
399
400 (or set the equivalent flag at startup or in your configuration profile).
401 """
365 402 extension = 'tex'
403 heading_marker = {1: r'\section',
404 2: r'\subsection',
405 3: r'\subsubsection',
406 4: r'\paragraph',
407 5: r'\subparagraph',
408 6: r'\subparagraph'}
366 409
367 410 def env(self, environment, lines):
368 411 """Return list of environment lines for input lines
369 412
370 413 Parameters
371 414 ----------
372 415 env : string
373 416 Name of the environment to bracket with begin/end.
374 417
375 418 lines: """
376 419 out = [r'\begin{%s}' % environment]
377 420 if isinstance(lines, basestring):
378 421 out.append(lines)
379 422 else: # list
380 423 out.extend(lines)
381 424 out.append(r'\end{%s}' % environment)
382 425 return out
383 426
384 427 @DocInherit
385 428 def render_heading(self, cell):
386 heading_marker = {1: r'\section',
387 2: r'\subsection',
388 3: r'\subsubsection',
389 4: r'\paragraph',
390 5: r'\subparagraph',
391 6: r'\subparagraph'}
392 marker = heading_marker[cell.level]
429 marker = self.heading_marker[cell.level]
393 430 return ['%s{%s}\n\n' % (marker, cell.source) ]
394 431
395 432 @DocInherit
396 433 def render_code(self, cell):
397 434 if not cell.input:
398 435 return []
399 436
400 437 # Cell codes first carry input code, we use lstlisting for that
401 438 lines = [r'\begin{codecell}']
402 439
403 440 lines.extend(self.env('codeinput',
404 441 self.env('lstlisting', cell.input)))
405 442
406 443 outlines = []
407 444 for output in cell.outputs:
408 445 conv_fn = self.dispatch(output.output_type)
409 446 outlines.extend(conv_fn(output))
410 447
411 448 # And then output of many possible types; use a frame for all of it.
412 449 if outlines:
413 450 lines.extend(self.env('codeoutput', outlines))
414 451
415 452 lines.append(r'\end{codecell}')
416 453
417 454 return lines
418 455
419 @DocInherit
420 def render_display_data(self, output):
421 lines = []
422
423 if 'png' in output:
424 figfile = self._new_figure(output.png, 'png')
425 456
426 lines.extend(self.env('center',
427 [r'\includegraphics[width=3in]{%s}' % figfile,
428 r'\par']))
429 return lines
457 @DocInherit
458 def _img_lines(self, img_file):
459 return self.env('center',
460 [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
461
462 def _svg_lines(self, img_file):
463 base_file = os.path.splitext(img_file)[0]
464 pdf_file = base_file + '.pdf'
465 subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,
466 img_file])
467 return self._img_lines(pdf_file)
430 468
431 469 @DocInherit
432 470 def render_stream(self, output):
433 471 lines = []
434 472
435 473 if 'text' in output:
436 474 lines.extend(self.env('verbatim', output.text.strip()))
437 475
438 476 return lines
439 477
440 478 @DocInherit
441 479 def render_markdown(self, cell):
442 480 return [markdown2latex(cell['source'])]
443 481
444 482 @DocInherit
445 483 def render_pyout(self, output):
446 484 lines = []
447 485
448 486 # output is a dictionary like object with type as a key
449 487 if 'latex' in output:
450 488 lines.extend(output.latex)
451 489
452 490 if 'text' in output:
453 491 lines.extend(self.env('verbatim', output.text))
454 492
455 493 return lines
456 494
457 495 @DocInherit
458 496 def render_pyerr(self, output):
459 497 # Note: a traceback is a *list* of frames.
460 498 return self.env('traceback',
461 499 self.env('verbatim',
462 500 remove_ansi('\n'.join(output.traceback))))
463 501
464 502 @DocInherit
465 503 def render_unknown(self, cell):
466 504 return self.env('verbatim', pprint.pformat(cell))
467 505
468 506
469 507 def rst2simplehtml(infile):
470 508 """Convert a rst file to simplified html suitable for blogger.
471 509
472 510 This just runs rst2html with certain parameters to produce really simple
473 511 html and strips the document header, so the resulting file can be easily
474 512 pasted into a blogger edit window.
475 513 """
476 514
477 515 # This is the template for the rst2html call that produces the cleanest,
478 516 # simplest html I could find. This should help in making it easier to
479 517 # paste into the blogspot html window, though I'm still having problems
480 518 # with linebreaks there...
481 519 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
482 520 "--no-generator --no-datestamp --no-source-link "
483 521 "--no-toc-backlinks --no-section-numbering "
484 522 "--strip-comments ")
485 523
486 524 cmd = "%s %s" % (cmd_template, infile)
487 525 proc = subprocess.Popen(cmd,
488 526 stdout=subprocess.PIPE,
489 527 stderr=subprocess.PIPE,
490 528 shell=True)
491 529 html, stderr = proc.communicate()
492 530 if stderr:
493 531 raise IOError(stderr)
494 532
495 533 # Make an iterator so breaking out holds state. Our implementation of
496 534 # searching for the html body below is basically a trivial little state
497 535 # machine, so we need this.
498 536 walker = iter(html.splitlines())
499 537
500 538 # Find start of main text, break out to then print until we find end /div.
501 539 # This may only work if there's a real title defined so we get a 'div class'
502 540 # tag, I haven't really tried.
503 541 for line in walker:
504 542 if line.startswith('<body>'):
505 543 break
506 544
507 545 newfname = os.path.splitext(infile)[0] + '.html'
508 546 with open(newfname, 'w') as f:
509 547 for line in walker:
510 548 if line.startswith('</body>'):
511 549 break
512 550 f.write(line)
513 551 f.write('\n')
514 552
515 553 return newfname
516 554
517 555 known_formats = "rst (default), html, quick-html, latex"
518 556
519 557 def main(infile, format='rst'):
520 558 """Convert a notebook to html in one step"""
521 559 # XXX: this is just quick and dirty for now. When adding a new format,
522 560 # make sure to add it to the `known_formats` string above, which gets
523 561 # printed in in the catch-all else, as well as in the help
524 562 if format == 'rst':
525 563 converter = ConverterRST(infile)
526 564 converter.render()
527 565 elif format == 'html':
528 566 #Currently, conversion to html is a 2 step process, nb->rst->html
529 567 converter = ConverterRST(infile)
530 568 rstfname = converter.render()
531 569 rst2simplehtml(rstfname)
532 570 elif format == 'quick-html':
533 571 converter = ConverterQuickHTML(infile)
534 572 rstfname = converter.render()
535 573 elif format == 'latex':
536 574 converter = ConverterLaTeX(infile)
537 575 latexfname = converter.render()
538 576 else:
539 577 raise SystemExit("Unknown format '%s', " % format +
540 578 "known formats are: " + known_formats)
541 579
542 580
543 581
544 582 if __name__ == '__main__':
545 583 parser = argparse.ArgumentParser(description=__doc__,
546 584 formatter_class=argparse.RawTextHelpFormatter)
547 585 # TODO: consider passing file like object around, rather than filenames
548 586 # would allow us to process stdin, or even http streams
549 587 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
550 588
551 589 #Require a filename as a positional argument
552 590 parser.add_argument('infile', nargs=1)
553 591 parser.add_argument('-f', '--format', default='rst',
554 592 help='Output format. Supported formats: \n' +
555 593 known_formats)
556 594 args = parser.parse_args()
557 595 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now