##// END OF EJS Templates
last commit did not include nbconvert.py change to not reread the file -- adding now
Jonathan Taylor -
Show More
@@ -1,863 +1,864 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format html file.ipynb
6 6
7 7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 10 pretty.
11 11 """
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15 from __future__ import print_function
16 16
17 17 # Stdlib
18 18 import codecs
19 19 import logging
20 20 import os
21 21 import pprint
22 22 import re
23 23 import subprocess
24 24 import sys
25 25 import json
26 26 import copy
27 27 from shutil import rmtree
28 28
29 29 inkscape = 'inkscape'
30 30 if sys.platform == 'darwin':
31 31 inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
32 32 if not os.path.exists(inkscape):
33 33 inkscape = None
34 34
35 35 # From IPython
36 36 from IPython.external import argparse
37 37 from IPython.nbformat import current as nbformat
38 38 from IPython.utils.text import indent
39 39 from decorators import DocInherit
40 40 from IPython.nbformat.v3.nbjson import BytesEncoder
41 41 from IPython.utils import py3compat
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Utility functions
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def DocInherit(f):
48 48 return f
49 49
50 50 def remove_fake_files_url(cell):
51 51 """Remove from the cell source the /files/ pseudo-path we use.
52 52 """
53 53 src = cell.source
54 54 cell.source = src.replace('/files/', '')
55 55
56 56
57 57 def remove_ansi(src):
58 58 """Strip all ANSI color escape sequences from input string.
59 59
60 60 Parameters
61 61 ----------
62 62 src : string
63 63
64 64 Returns
65 65 -------
66 66 string
67 67 """
68 68 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
69 69
70 70
71 71 # Pandoc-dependent code
72 72 def markdown2latex(src):
73 73 """Convert a markdown string to LaTeX via pandoc.
74 74
75 75 This function will raise an error if pandoc is not installed.
76 76
77 77 Any error messages generated by pandoc are printed to stderr.
78 78
79 79 Parameters
80 80 ----------
81 81 src : string
82 82 Input string, assumed to be valid markdown.
83 83
84 84 Returns
85 85 -------
86 86 out : string
87 87 Output as returned by pandoc.
88 88 """
89 89 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
90 90 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
91 91 out, err = p.communicate(src.encode('utf-8'))
92 92 if err:
93 93 print(err, file=sys.stderr)
94 94 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
95 95 return unicode(out,'utf-8')
96 96
97 97
98 98 def markdown2rst(src):
99 99 """Convert a markdown string to LaTeX via pandoc.
100 100
101 101 This function will raise an error if pandoc is not installed.
102 102
103 103 Any error messages generated by pandoc are printed to stderr.
104 104
105 105 Parameters
106 106 ----------
107 107 src : string
108 108 Input string, assumed to be valid markdown.
109 109
110 110 Returns
111 111 -------
112 112 out : string
113 113 Output as returned by pandoc.
114 114 """
115 115 p = subprocess.Popen('pandoc -f markdown -t rst'.split(),
116 116 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
117 117 out, err = p.communicate(src.encode('utf-8'))
118 118 if err:
119 119 print(err, file=sys.stderr)
120 120 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
121 121 return unicode(out,'utf-8')
122 122
123 123
124 124 def rst_directive(directive, text=''):
125 125 out = [directive, '']
126 126 if text:
127 127 out.extend([indent(text), ''])
128 128 return out
129 129
130 130 #-----------------------------------------------------------------------------
131 131 # Class declarations
132 132 #-----------------------------------------------------------------------------
133 133
134 134 class ConversionException(Exception):
135 135 pass
136 136
137 137
138 138 class Converter(object):
139 139 default_encoding = 'utf-8'
140 140 extension = str()
141 141 figures_counter = 0
142 142 infile = str()
143 143 infile_dir = str()
144 144 infile_root = str()
145 145 files_dir = str()
146 146 with_preamble = True
147 147 user_preamble = None
148 148 output = str()
149 149 raw_as_verbatim = False
150 150
151 151 def __init__(self, infile):
152 152 self.infile = infile
153 153 self.infile_dir = os.path.dirname(infile)
154 154 infile_root = os.path.splitext(infile)[0]
155 155 files_dir = infile_root + '_files'
156 156 if not os.path.isdir(files_dir):
157 157 os.mkdir(files_dir)
158 158 self.infile_root = infile_root
159 159 self.files_dir = files_dir
160 160 self.outbase = infile_root
161 161
162 162 def dispatch(self, cell_type):
163 163 """return cell_type dependent render method, for example render_code
164 164 """
165 165 return getattr(self, 'render_' + cell_type, self.render_unknown)
166 166
167 167 def convert(self, cell_separator='\n'):
168 168 lines = []
169 169 lines.extend(self.optional_header())
170 170 converted_cells = []
171 171 for worksheet in self.nb.worksheets:
172 172 for cell in worksheet.cells:
173 173 #print(cell.cell_type) # dbg
174 174 conv_fn = self.dispatch(cell.cell_type)
175 175 if cell.cell_type in ('markdown', 'raw'):
176 176 remove_fake_files_url(cell)
177 177 converted_cells.append('\n'.join(conv_fn(cell)))
178 178 cell_lines = cell_separator.join(converted_cells).split('\n')
179 179 lines.extend(cell_lines)
180 180 lines.extend(self.optional_footer())
181 181 return u'\n'.join(lines)
182 182
183 183 def render(self):
184 184 "read, convert, and save self.infile"
185 if not hasattr(self, 'nb'):
185 186 self.read()
186 187 self.output = self.convert()
187 188 return self.save()
188 189
189 190 def read(self):
190 191 "read and parse notebook into NotebookNode called self.nb"
191 192 with open(self.infile) as f:
192 193 self.nb = nbformat.read(f, 'json')
193 194
194 195 def save(self, outfile=None, encoding=None):
195 196 "read and parse notebook into self.nb"
196 197 if outfile is None:
197 198 outfile = self.outbase + '.' + self.extension
198 199 if encoding is None:
199 200 encoding = self.default_encoding
200 201 with open(outfile, 'w') as f:
201 202 f.write(self.output.encode(encoding))
202 203 return os.path.abspath(outfile)
203 204
204 205 def optional_header(self):
205 206 return []
206 207
207 208 def optional_footer(self):
208 209 return []
209 210
210 211 def _new_figure(self, data, fmt):
211 212 """Create a new figure file in the given format.
212 213
213 214 Returns a path relative to the input file.
214 215 """
215 216 figname = '%s_fig_%02i.%s' % (self.infile_root,
216 217 self.figures_counter, fmt)
217 218 self.figures_counter += 1
218 219 fullname = os.path.join(self.files_dir, figname)
219 220
220 221 # Binary files are base64-encoded, SVG is already XML
221 222 if fmt in ('png', 'jpg', 'pdf'):
222 223 data = data.decode('base64')
223 224 fopen = lambda fname: open(fname, 'wb')
224 225 else:
225 226 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
226 227
227 228 with fopen(fullname) as f:
228 229 f.write(data)
229 230
230 231 return fullname
231 232
232 233 def render_heading(self, cell):
233 234 """convert a heading cell
234 235
235 236 Returns list."""
236 237 raise NotImplementedError
237 238
238 239 def render_code(self, cell):
239 240 """Convert a code cell
240 241
241 242 Returns list."""
242 243 raise NotImplementedError
243 244
244 245 def render_markdown(self, cell):
245 246 """convert a markdown cell
246 247
247 248 Returns list."""
248 249 raise NotImplementedError
249 250
250 251 def render_pyout(self, output):
251 252 """convert pyout part of a code cell
252 253
253 254 Returns list."""
254 255 raise NotImplementedError
255 256
256 257
257 258 def render_pyerr(self, output):
258 259 """convert pyerr part of a code cell
259 260
260 261 Returns list."""
261 262 raise NotImplementedError
262 263
263 264 def _img_lines(self, img_file):
264 265 """Return list of lines to include an image file."""
265 266 # Note: subclasses may choose to implement format-specific _FMT_lines
266 267 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
267 268 raise NotImplementedError
268 269
269 270 def render_display_data(self, output):
270 271 """convert display data from the output of a code cell
271 272
272 273 Returns list.
273 274 """
274 275 lines = []
275 276
276 277 for fmt in ['png', 'svg', 'jpg', 'pdf']:
277 278 if fmt in output:
278 279 img_file = self._new_figure(output[fmt], fmt)
279 280 # Subclasses can have format-specific render functions (e.g.,
280 281 # latex has to auto-convert all SVG to PDF first).
281 282 lines_fun = getattr(self, '_%s_lines' % fmt, None)
282 283 if not lines_fun:
283 284 lines_fun = self._img_lines
284 285 lines.extend(lines_fun(img_file))
285 286
286 287 return lines
287 288
288 289 def render_stream(self, cell):
289 290 """convert stream part of a code cell
290 291
291 292 Returns list."""
292 293 raise NotImplementedError
293 294
294 295 def render_raw(self, cell):
295 296 """convert a cell with raw text
296 297
297 298 Returns list."""
298 299 raise NotImplementedError
299 300
300 301 def render_unknown(self, cell):
301 302 """Render cells of unkown type
302 303
303 304 Returns list."""
304 305 data = pprint.pformat(cell)
305 306 logging.warning('Unknown cell:\n%s' % data)
306 307 return self._unknown_lines(data)
307 308
308 309 def _unknown_lines(self, data):
309 310 """Return list of lines for an unknown cell.
310 311
311 312 Parameters
312 313 ----------
313 314 data : str
314 315 The content of the unknown data as a single string.
315 316 """
316 317 raise NotImplementedError
317 318
318 319
319 320 class ConverterRST(Converter):
320 321 extension = 'rst'
321 322 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
322 323
323 324 @DocInherit
324 325 def render_heading(self, cell):
325 326 marker = self.heading_level[cell.level]
326 327 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
327 328
328 329 @DocInherit
329 330 def render_code(self, cell):
330 331 if not cell.input:
331 332 return []
332 333
333 334 lines = ['In[%s]:' % cell.prompt_number, '']
334 335 lines.extend(rst_directive('.. code:: python', cell.input))
335 336
336 337 for output in cell.outputs:
337 338 conv_fn = self.dispatch(output.output_type)
338 339 lines.extend(conv_fn(output))
339 340
340 341 return lines
341 342
342 343 @DocInherit
343 344 def render_markdown(self, cell):
344 345 #return [cell.source]
345 346 return [markdown2rst(cell.source)]
346 347
347 348 @DocInherit
348 349 def render_raw(self, cell):
349 350 if self.raw_as_verbatim:
350 351 return ['::', '', indent(cell.source), '']
351 352 else:
352 353 return [cell.source]
353 354
354 355 @DocInherit
355 356 def render_pyout(self, output):
356 357 lines = ['Out[%s]:' % output.prompt_number, '']
357 358
358 359 # output is a dictionary like object with type as a key
359 360 if 'latex' in output:
360 361 lines.extend(rst_directive('.. math::', output.latex))
361 362
362 363 if 'text' in output:
363 364 lines.extend(rst_directive('.. parsed-literal::', output.text))
364 365
365 366 return lines
366 367
367 368 @DocInherit
368 369 def render_pyerr(self, output):
369 370 # Note: a traceback is a *list* of frames.
370 371 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
371 372
372 373 @DocInherit
373 374 def _img_lines(self, img_file):
374 375 return ['.. image:: %s' % img_file, '']
375 376
376 377 @DocInherit
377 378 def render_stream(self, output):
378 379 lines = []
379 380
380 381 if 'text' in output:
381 382 lines.extend(rst_directive('.. parsed-literal::', output.text))
382 383
383 384 return lines
384 385
385 386 @DocInherit
386 387 def _unknown_lines(self, data):
387 388 return rst_directive('.. warning:: Unknown cell') + [data]
388 389
389 390
390 391 class ConverterQuickHTML(Converter):
391 392 extension = 'html'
392 393
393 394 def in_tag(self, tag, src):
394 395 """Return a list of elements bracketed by the given tag"""
395 396 return ['<%s>' % tag, src, '</%s>' % tag]
396 397
397 398 def optional_header(self):
398 399 # XXX: inject the IPython standard CSS into here
399 400 s = """<html>
400 401 <head>
401 402 </head>
402 403
403 404 <body>
404 405 """
405 406 return s.splitlines()
406 407
407 408 def optional_footer(self):
408 409 s = """</body>
409 410 </html>
410 411 """
411 412 return s.splitlines()
412 413
413 414 @DocInherit
414 415 def render_heading(self, cell):
415 416 marker = cell.level
416 417 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
417 418
418 419 @DocInherit
419 420 def render_code(self, cell):
420 421 if not cell.input:
421 422 return []
422 423
423 424 lines = ['<table>']
424 425 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
425 426 lines.append("<br>\n".join(cell.input.splitlines()))
426 427 lines.append('</tt></td></tr>')
427 428
428 429 for output in cell.outputs:
429 430 lines.append('<tr><td></td><td>')
430 431 conv_fn = self.dispatch(output.output_type)
431 432 lines.extend(conv_fn(output))
432 433 lines.append('</td></tr>')
433 434
434 435 lines.append('</table>')
435 436 return lines
436 437
437 438 @DocInherit
438 439 def render_markdown(self, cell):
439 440 return self.in_tag('pre', cell.source)
440 441
441 442 @DocInherit
442 443 def render_raw(self, cell):
443 444 if self.raw_as_verbatim:
444 445 return self.in_tag('pre', cell.source)
445 446 else:
446 447 return [cell.source]
447 448
448 449 @DocInherit
449 450 def render_pyout(self, output):
450 451 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
451 452 output.prompt_number, '<td>']
452 453
453 454 # output is a dictionary like object with type as a key
454 455 for out_type in ('text', 'latex'):
455 456 if out_type in output:
456 457 lines.extend(self.in_tag('pre', indent(output[out_type])))
457 458
458 459 return lines
459 460
460 461 @DocInherit
461 462 def render_pyerr(self, output):
462 463 # Note: a traceback is a *list* of frames.
463 464 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
464 465
465 466 @DocInherit
466 467 def _img_lines(self, img_file):
467 468 return ['<img src="%s">' % img_file, '']
468 469
469 470 @DocInherit
470 471 def render_stream(self, output):
471 472 lines = []
472 473
473 474 if 'text' in output:
474 475 lines.append(output.text)
475 476
476 477 return lines
477 478
478 479 @DocInherit
479 480 def _unknown_lines(self, data):
480 481 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
481 482
482 483
483 484 class ConverterLaTeX(Converter):
484 485 """Converts a notebook to a .tex file suitable for pdflatex.
485 486
486 487 Note: this converter *needs*:
487 488
488 489 - `pandoc`: for all conversion of markdown cells. If your notebook only
489 490 has Raw cells, pandoc will not be needed.
490 491
491 492 - `inkscape`: if your notebook has SVG figures. These need to be
492 493 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
493 494 understand SVG natively.
494 495
495 496 You will in general obtain much better final PDF results if you configure
496 497 the matplotlib backend to create SVG output with
497 498
498 499 %config InlineBackend.figure_format = 'svg'
499 500
500 501 (or set the equivalent flag at startup or in your configuration profile).
501 502 """
502 503 extension = 'tex'
503 504 documentclass = 'article'
504 505 documentclass_options = '11pt,english'
505 506 heading_map = {1: r'\section',
506 507 2: r'\subsection',
507 508 3: r'\subsubsection',
508 509 4: r'\paragraph',
509 510 5: r'\subparagraph',
510 511 6: r'\subparagraph'}
511 512
512 513 def in_env(self, environment, lines):
513 514 """Return list of environment lines for input lines
514 515
515 516 Parameters
516 517 ----------
517 518 env : string
518 519 Name of the environment to bracket with begin/end.
519 520
520 521 lines: """
521 522 out = [ur'\begin{%s}' % environment]
522 523 if isinstance(lines, basestring):
523 524 out.append(lines)
524 525 else: # list
525 526 out.extend(lines)
526 527 out.append(ur'\end{%s}' % environment)
527 528 return out
528 529
529 530 def convert(self):
530 531 # The main body is done by the logic in the parent class, and that's
531 532 # all we need if preamble support has been turned off.
532 533 body = super(ConverterLaTeX, self).convert()
533 534 if not self.with_preamble:
534 535 return body
535 536 # But if preamble is on, then we need to construct a proper, standalone
536 537 # tex file.
537 538
538 539 # Tag the document at the top and set latex class
539 540 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
540 541 r'%% Conversion from the original notebook file:',
541 542 r'%% {0}'.format(self.infile),
542 543 r'%%',
543 544 r'\documentclass[%s]{%s}' % (self.documentclass_options,
544 545 self.documentclass),
545 546 '',
546 547 ]
547 548 # Load our own preamble, which is stored next to the main file. We
548 549 # need to be careful in case the script entry point is a symlink
549 550 myfile = __file__ if not os.path.islink(__file__) else \
550 551 os.readlink(__file__)
551 552 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
552 553 final.append(f.read())
553 554
554 555 # Load any additional user-supplied preamble
555 556 if self.user_preamble:
556 557 final.extend(['', '%% Adding user preamble from file:',
557 558 '%% {0}'.format(self.user_preamble), ''])
558 559 with open(self.user_preamble) as f:
559 560 final.append(f.read())
560 561
561 562 # Include document body
562 563 final.extend([ r'\begin{document}', '',
563 564 body,
564 565 r'\end{document}', ''])
565 566 # Retun value must be a string
566 567 return '\n'.join(final)
567 568
568 569 @DocInherit
569 570 def render_heading(self, cell):
570 571 marker = self.heading_map[cell.level]
571 572 return ['%s{%s}' % (marker, cell.source) ]
572 573
573 574 @DocInherit
574 575 def render_code(self, cell):
575 576 if not cell.input:
576 577 return []
577 578
578 579 # Cell codes first carry input code, we use lstlisting for that
579 580 lines = [ur'\begin{codecell}']
580 581
581 582 lines.extend(self.in_env('codeinput',
582 583 self.in_env('lstlisting', cell.input)))
583 584
584 585 outlines = []
585 586 for output in cell.outputs:
586 587 conv_fn = self.dispatch(output.output_type)
587 588 outlines.extend(conv_fn(output))
588 589
589 590 # And then output of many possible types; use a frame for all of it.
590 591 if outlines:
591 592 lines.extend(self.in_env('codeoutput', outlines))
592 593
593 594 lines.append(ur'\end{codecell}')
594 595
595 596 return lines
596 597
597 598
598 599 @DocInherit
599 600 def _img_lines(self, img_file):
600 601 return self.in_env('center',
601 602 [r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])
602 603
603 604 def _svg_lines(self, img_file):
604 605 base_file = os.path.splitext(img_file)[0]
605 606 pdf_file = base_file + '.pdf'
606 607 subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,
607 608 img_file])
608 609 return self._img_lines(pdf_file)
609 610
610 611 @DocInherit
611 612 def render_stream(self, output):
612 613 lines = []
613 614
614 615 if 'text' in output:
615 616 lines.extend(self.in_env('verbatim', output.text.strip()))
616 617
617 618 return lines
618 619
619 620 @DocInherit
620 621 def render_markdown(self, cell):
621 622 return [markdown2latex(cell.source)]
622 623
623 624 @DocInherit
624 625 def render_pyout(self, output):
625 626 lines = []
626 627
627 628 # output is a dictionary like object with type as a key
628 629 if 'latex' in output:
629 630 lines.extend(output.latex)
630 631
631 632 if 'text' in output:
632 633 lines.extend(self.in_env('verbatim', output.text))
633 634
634 635 return lines
635 636
636 637 @DocInherit
637 638 def render_pyerr(self, output):
638 639 # Note: a traceback is a *list* of frames.
639 640 return self.in_env('traceback',
640 641 self.in_env('verbatim',
641 642 remove_ansi('\n'.join(output.traceback))))
642 643
643 644 @DocInherit
644 645 def render_raw(self, cell):
645 646 if self.raw_as_verbatim:
646 647 return self.in_env('verbatim', cell.source)
647 648 else:
648 649 return [cell.source]
649 650
650 651 @DocInherit
651 652 def _unknown_lines(self, data):
652 653 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
653 654 self.in_env('verbatim', data)
654 655
655 656
656 657 class ConverterNotebook(Converter):
657 658 """
658 659 A converter that is essentially a null-op.
659 660 This exists so it can be subclassed
660 661 for custom handlers of .ipynb files
661 662 that create new .ipynb files.
662 663
663 664 What distinguishes this from JSONWriter is that
664 665 subclasses can specify what to do with each type of cell.
665 666
666 667 Writes out a notebook file.
667 668
668 669 """
669 670 extension = 'ipynb'
670 671
671 672 def __init__(self, infile, outbase):
672 673 Converter.__init__(self, infile)
673 674 self.outbase = outbase
674 675 rmtree(self.files_dir)
675 676
676 677 def convert(self):
677 678 return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)
678 679
679 680 def optional_header(self):
680 681 s = \
681 682 """{
682 683 "metadata": {
683 684 "name": "%(name)s"
684 685 },
685 686 "nbformat": 3,
686 687 "worksheets": [
687 688 {
688 689 "cells": [""" % {'name':self.outbase}
689 690
690 691 return s.split('\n')
691 692
692 693 def optional_footer(self):
693 694 s = \
694 695 """]
695 696 }
696 697 ]
697 698 }"""
698 699 return s.split('\n')
699 700
700 701 @DocInherit
701 702 def render_heading(self, cell):
702 703 return cell_to_lines(cell)
703 704
704 705 @DocInherit
705 706 def render_code(self, cell):
706 707 return cell_to_lines(cell)
707 708
708 709 @DocInherit
709 710 def render_markdown(self, cell):
710 711 return cell_to_lines(cell)
711 712
712 713 @DocInherit
713 714 def render_raw(self, cell):
714 715 return cell_to_lines(cell)
715 716
716 717 @DocInherit
717 718 def render_pyout(self, output):
718 719 return cell_to_lines(cell)
719 720
720 721 @DocInherit
721 722 def render_pyerr(self, output):
722 723 return cell_to_lines(cell)
723 724
724 725 #-----------------------------------------------------------------------------
725 726 # Standalone conversion functions
726 727 #-----------------------------------------------------------------------------
727 728
728 729 def rst2simplehtml(infile):
729 730 """Convert a rst file to simplified html suitable for blogger.
730 731
731 732 This just runs rst2html with certain parameters to produce really simple
732 733 html and strips the document header, so the resulting file can be easily
733 734 pasted into a blogger edit window.
734 735 """
735 736
736 737 # This is the template for the rst2html call that produces the cleanest,
737 738 # simplest html I could find. This should help in making it easier to
738 739 # paste into the blogspot html window, though I'm still having problems
739 740 # with linebreaks there...
740 741 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
741 742 "--no-generator --no-datestamp --no-source-link "
742 743 "--no-toc-backlinks --no-section-numbering "
743 744 "--strip-comments ")
744 745
745 746 cmd = "%s %s" % (cmd_template, infile)
746 747 proc = subprocess.Popen(cmd,
747 748 stdout=subprocess.PIPE,
748 749 stderr=subprocess.PIPE,
749 750 shell=True)
750 751 html, stderr = proc.communicate()
751 752 if stderr:
752 753 raise IOError(stderr)
753 754
754 755 # Make an iterator so breaking out holds state. Our implementation of
755 756 # searching for the html body below is basically a trivial little state
756 757 # machine, so we need this.
757 758 walker = iter(html.splitlines())
758 759
759 760 # Find start of main text, break out to then print until we find end /div.
760 761 # This may only work if there's a real title defined so we get a 'div class'
761 762 # tag, I haven't really tried.
762 763 for line in walker:
763 764 if line.startswith('<body>'):
764 765 break
765 766
766 767 newfname = os.path.splitext(infile)[0] + '.html'
767 768 with open(newfname, 'w') as f:
768 769 for line in walker:
769 770 if line.startswith('</body>'):
770 771 break
771 772 f.write(line)
772 773 f.write('\n')
773 774
774 775 return newfname
775 776
776 777 #-----------------------------------------------------------------------------
777 778 # Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions
778 779 # but at cell level instead of whole notebook level
779 780 #-----------------------------------------------------------------------------
780 781
781 782 def writes_cell(cell, **kwargs):
782 783 kwargs['cls'] = BytesEncoder
783 784 kwargs['indent'] = 3
784 785 kwargs['sort_keys'] = True
785 786 kwargs['separators'] = (',',': ')
786 787 if kwargs.pop('split_lines', True):
787 788 cell = split_lines_cell(copy.deepcopy(cell))
788 789 return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')
789 790
790 791 _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
791 792 def split_lines_cell(cell):
792 793 """
793 794 Split lines within a cell as in
794 795 IPython.nbformat.v3.rwbase.split_lines
795 796
796 797 """
797 798 if cell.cell_type == 'code':
798 799 if 'input' in cell and isinstance(cell.input, basestring):
799 800 cell.input = (cell.input + '\n').splitlines()
800 801 for output in cell.outputs:
801 802 for key in _multiline_outputs:
802 803 item = output.get(key, None)
803 804 if isinstance(item, basestring):
804 805 output[key] = (item + '\n').splitlines()
805 806 else: # text, heading cell
806 807 for key in ['source', 'rendered']:
807 808 item = cell.get(key, None)
808 809 if isinstance(item, basestring):
809 810 cell[key] = (item + '\n').splitlines()
810 811 return cell
811 812
812 813 def cell_to_lines(cell):
813 814 '''
814 815 Write a cell to json, returning the split lines.
815 816 '''
816 817 split_lines_cell(cell)
817 818 s = writes_cell(cell).strip()
818 819 return s.split('\n')
819 820
820 821
821 822 known_formats = "rst (default), html, quick-html, latex"
822 823
823 824 def main(infile, format='rst'):
824 825 """Convert a notebook to html in one step"""
825 826 # XXX: this is just quick and dirty for now. When adding a new format,
826 827 # make sure to add it to the `known_formats` string above, which gets
827 828 # printed in in the catch-all else, as well as in the help
828 829 if format == 'rst':
829 830 converter = ConverterRST(infile)
830 831 converter.render()
831 832 elif format == 'html':
832 833 #Currently, conversion to html is a 2 step process, nb->rst->html
833 834 converter = ConverterRST(infile)
834 835 rstfname = converter.render()
835 836 rst2simplehtml(rstfname)
836 837 elif format == 'quick-html':
837 838 converter = ConverterQuickHTML(infile)
838 839 rstfname = converter.render()
839 840 elif format == 'latex':
840 841 converter = ConverterLaTeX(infile)
841 842 latexfname = converter.render()
842 843 else:
843 844 raise SystemExit("Unknown format '%s', " % format +
844 845 "known formats are: " + known_formats)
845 846
846 847 #-----------------------------------------------------------------------------
847 848 # Script main
848 849 #-----------------------------------------------------------------------------
849 850
850 851 if __name__ == '__main__':
851 852 parser = argparse.ArgumentParser(description=__doc__,
852 853 formatter_class=argparse.RawTextHelpFormatter)
853 854 # TODO: consider passing file like object around, rather than filenames
854 855 # would allow us to process stdin, or even http streams
855 856 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
856 857
857 858 #Require a filename as a positional argument
858 859 parser.add_argument('infile', nargs=1)
859 860 parser.add_argument('-f', '--format', default='rst',
860 861 help='Output format. Supported formats: \n' +
861 862 known_formats)
862 863 args = parser.parse_args()
863 864 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now