##// END OF EJS Templates
Updates to markdown conversion.
Fernando Perez -
Show More
@@ -1,1143 +1,1208 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format html file.ipynb
6 6
7 7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 10 pretty.
11 11 """
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15 from __future__ import print_function
16 16
17 17 # Stdlib
18 18 import codecs
19 19 import logging
20 20 import os
21 21 import pprint
22 22 import re
23 23 import subprocess
24 24 import sys
25 25 import json
26 26 import copy
27 27 from shutil import rmtree
28 28
29 29 inkscape = 'inkscape'
30 30 if sys.platform == 'darwin':
31 31 inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
32 32 if not os.path.exists(inkscape):
33 33 inkscape = None
34 34
35 35 # From IPython
36 36 from IPython.external import argparse
37 37 from IPython.nbformat import current as nbformat
38 38 from IPython.utils.text import indent
39 39 from decorators import DocInherit
40 40 from IPython.nbformat.v3.nbjson import BytesEncoder
41 41 from IPython.utils import py3compat
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Utility functions
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def DocInherit(f):
48 48 return f
49 49
50 50 def remove_fake_files_url(cell):
51 51 """Remove from the cell source the /files/ pseudo-path we use.
52 52 """
53 53 src = cell.source
54 54 cell.source = src.replace('/files/', '')
55 55
56 56
57 57 def remove_ansi(src):
58 58 """Strip all ANSI color escape sequences from input string.
59 59
60 60 Parameters
61 61 ----------
62 62 src : string
63 63
64 64 Returns
65 65 -------
66 66 string
67 67 """
68 68 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
69 69
70 70
71 71 # Pandoc-dependent code
72 72 def markdown2latex(src):
73 73 """Convert a markdown string to LaTeX via pandoc.
74 74
75 75 This function will raise an error if pandoc is not installed.
76 76
77 77 Any error messages generated by pandoc are printed to stderr.
78 78
79 79 Parameters
80 80 ----------
81 81 src : string
82 82 Input string, assumed to be valid markdown.
83 83
84 84 Returns
85 85 -------
86 86 out : string
87 87 Output as returned by pandoc.
88 88 """
89 89 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
90 90 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
91 91 out, err = p.communicate(src.encode('utf-8'))
92 92 if err:
93 93 print(err, file=sys.stderr)
94 94 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
95 95 return unicode(out,'utf-8')
96 96
97 97
98 98 def markdown2rst(src):
99 99 """Convert a markdown string to LaTeX via pandoc.
100 100
101 101 This function will raise an error if pandoc is not installed.
102 102
103 103 Any error messages generated by pandoc are printed to stderr.
104 104
105 105 Parameters
106 106 ----------
107 107 src : string
108 108 Input string, assumed to be valid markdown.
109 109
110 110 Returns
111 111 -------
112 112 out : string
113 113 Output as returned by pandoc.
114 114 """
115 115 p = subprocess.Popen('pandoc -f markdown -t rst'.split(),
116 116 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
117 117 out, err = p.communicate(src.encode('utf-8'))
118 118 if err:
119 119 print(err, file=sys.stderr)
120 120 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
121 121 return unicode(out,'utf-8')
122 122
123 123
124 124 def rst_directive(directive, text=''):
125 125 out = [directive, '']
126 126 if text:
127 127 out.extend([indent(text), ''])
128 128 return out
129 129
130 130 #-----------------------------------------------------------------------------
131 131 # Class declarations
132 132 #-----------------------------------------------------------------------------
133 133
134 134 class ConversionException(Exception):
135 135 pass
136 136
137 137
138 138 class Converter(object):
139 139 default_encoding = 'utf-8'
140 140 extension = str()
141 141 figures_counter = 0
142 142 infile = str()
143 143 infile_dir = str()
144 144 infile_root = str()
145 145 files_dir = str()
146 146 with_preamble = True
147 147 user_preamble = None
148 148 output = str()
149 149 raw_as_verbatim = False
150
150
151 151 def __init__(self, infile):
152 152 self.infile = infile
153 153 self.infile_dir, infile_root = os.path.split(infile)
154 154 infile_root = os.path.splitext(infile_root)[0]
155 155 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
156 156 if not os.path.isdir(files_dir):
157 157 os.mkdir(files_dir)
158 158 self.infile_root = infile_root
159 159 self.files_dir = files_dir
160 160 self.outbase = os.path.join(self.infile_dir, infile_root)
161 161
162 162 def dispatch(self, cell_type):
163 163 """return cell_type dependent render method, for example render_code
164 164 """
165 165 return getattr(self, 'render_' + cell_type, self.render_unknown)
166 166
167 167 def dispatch_display_format(self, format):
168 168 """return output_type dependent render method, for example render_output_text
169 169 """
170 170 return getattr(self, 'render_display_format_' + format, self.render_unknown)
171 171
172 172 def convert(self, cell_separator='\n'):
173 173 lines = []
174 174 lines.extend(self.optional_header())
175 175 converted_cells = []
176 176 for worksheet in self.nb.worksheets:
177 177 for cell in worksheet.cells:
178 178 #print(cell.cell_type) # dbg
179 179 conv_fn = self.dispatch(cell.cell_type)
180 180 if cell.cell_type in ('markdown', 'raw'):
181 181 remove_fake_files_url(cell)
182 182 converted_cells.append('\n'.join(conv_fn(cell)))
183 183 cell_lines = cell_separator.join(converted_cells).split('\n')
184 184 lines.extend(cell_lines)
185 185 lines.extend(self.optional_footer())
186 186 return u'\n'.join(lines)
187 187
188 188 def render(self):
189 189 "read, convert, and save self.infile"
190 190 if not hasattr(self, 'nb'):
191 191 self.read()
192 192 self.output = self.convert()
193 193 return self.save()
194 194
195 195 def read(self):
196 196 "read and parse notebook into NotebookNode called self.nb"
197 197 with open(self.infile) as f:
198 198 self.nb = nbformat.read(f, 'json')
199 199
200 200 def save(self, outfile=None, encoding=None):
201 201 "read and parse notebook into self.nb"
202 202 if outfile is None:
203 203 outfile = self.outbase + '.' + self.extension
204 204 if encoding is None:
205 205 encoding = self.default_encoding
206 206 with open(outfile, 'w') as f:
207 207 f.write(self.output.encode(encoding))
208 208 return os.path.abspath(outfile)
209 209
210 210 def optional_header(self):
211 211 return []
212 212
213 213 def optional_footer(self):
214 214 return []
215 215
216 216 def _new_figure(self, data, fmt):
217 217 """Create a new figure file in the given format.
218 218
219 219 Returns a path relative to the input file.
220 220 """
221 221 figname = '%s_fig_%02i.%s' % (self.infile_root,
222 222 self.figures_counter, fmt)
223 223 self.figures_counter += 1
224 224 fullname = os.path.join(self.files_dir, figname)
225 225
226 226 # Binary files are base64-encoded, SVG is already XML
227 227 if fmt in ('png', 'jpg', 'pdf'):
228 228 data = data.decode('base64')
229 229 fopen = lambda fname: open(fname, 'wb')
230 230 else:
231 231 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
232 232
233 233 with fopen(fullname) as f:
234 234 f.write(data)
235 235
236 236 return fullname
237 237
238 238 def render_heading(self, cell):
239 239 """convert a heading cell
240 240
241 241 Returns list."""
242 242 raise NotImplementedError
243 243
244 244 def render_code(self, cell):
245 245 """Convert a code cell
246 246
247 247 Returns list."""
248 248 raise NotImplementedError
249 249
250 250 def render_markdown(self, cell):
251 251 """convert a markdown cell
252 252
253 253 Returns list."""
254 254 raise NotImplementedError
255 255
256 256 def _img_lines(self, img_file):
257 257 """Return list of lines to include an image file."""
258 258 # Note: subclasses may choose to implement format-specific _FMT_lines
259 259 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
260 260 raise NotImplementedError
261 261
262 262 def render_display_data(self, output):
263 263 """convert display data from the output of a code cell
264 264
265 265 Returns list.
266 266 """
267 267 lines = []
268 268
269 269 for fmt in output.keys():
270 270 if fmt in ['png', 'svg', 'jpg', 'pdf']:
271 271 img_file = self._new_figure(output[fmt], fmt)
272 272 # Subclasses can have format-specific render functions (e.g.,
273 273 # latex has to auto-convert all SVG to PDF first).
274 274 lines_fun = getattr(self, '_%s_lines' % fmt, None)
275 275 if not lines_fun:
276 276 lines_fun = self._img_lines
277 277 lines.extend(lines_fun(img_file))
278 278 elif fmt != 'output_type':
279 279 conv_fn = self.dispatch_display_format(fmt)
280 280 lines.extend(conv_fn(output))
281 281 return lines
282 282
283 283 def render_raw(self, cell):
284 284 """convert a cell with raw text
285 285
286 286 Returns list."""
287 287 raise NotImplementedError
288 288
289 289 def render_unknown(self, cell):
290 290 """Render cells of unkown type
291 291
292 292 Returns list."""
293 293 data = pprint.pformat(cell)
294 294 logging.warning('Unknown cell:\n%s' % data)
295 295 return self._unknown_lines(data)
296 296
297 297 def render_stream(self, output):
298 298 """render the stream part of an output
299 299
300 300 Returns list.
301 301
302 302 Identical to render_display_format_text
303 303 """
304 304 return self.render_display_format_text(output)
305 305
306 306 def render_pyout(self, output):
307 307 """convert pyout part of a code cell
308 308
309 309 Returns list."""
310 310 raise NotImplementedError
311 311
312 312
313 313 def render_pyerr(self, output):
314 314 """convert pyerr part of a code cell
315 315
316 316 Returns list."""
317 317 raise NotImplementedError
318 318
319 319 def _unknown_lines(self, data):
320 320 """Return list of lines for an unknown cell.
321 321
322 322 Parameters
323 323 ----------
324 324 data : str
325 325 The content of the unknown data as a single string.
326 326 """
327 327 raise NotImplementedError
328 328
329 329 # These are the possible format types in an output node
330 330
331 331 def render_display_format_text(self, output):
332 332 """render the text part of an output
333 333
334 334 Returns list.
335 335 """
336 336 raise NotImplementedError
337 337
338 338 def render_display_format_html(self, output):
339 339 """render the html part of an output
340 340
341 341 Returns list.
342 342 """
343 343 raise NotImplementedError
344 344
345 345 def render_display_format_latex(self, output):
346 346 """render the latex part of an output
347 347
348 348 Returns list.
349 349 """
350 350 raise NotImplementedError
351 351
352 352 def render_display_format_json(self, output):
353 353 """render the json part of an output
354 354
355 355 Returns list.
356 356 """
357 357 raise NotImplementedError
358 358
359 359 def render_display_format_javascript(self, output):
360 360 """render the javascript part of an output
361 361
362 362 Returns list.
363 363 """
364 364 raise NotImplementedError
365 365
366 366
367 367 class ConverterRST(Converter):
368 368 extension = 'rst'
369 369 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
370 370
371 371 @DocInherit
372 372 def render_heading(self, cell):
373 373 marker = self.heading_level[cell.level]
374 374 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
375 375
376 376 @DocInherit
377 377 def render_code(self, cell):
378 378 if not cell.input:
379 379 return []
380 380
381 381 lines = ['In[%s]:' % cell.prompt_number, '']
382 382 lines.extend(rst_directive('.. code:: python', cell.input))
383 383
384 384 for output in cell.outputs:
385 385 conv_fn = self.dispatch(output.output_type)
386 386 lines.extend(conv_fn(output))
387 387
388 388 return lines
389 389
390 390 @DocInherit
391 391 def render_markdown(self, cell):
392 392 #return [cell.source]
393 393 return [markdown2rst(cell.source)]
394 394
395 395 @DocInherit
396 396 def render_raw(self, cell):
397 397 if self.raw_as_verbatim:
398 398 return ['::', '', indent(cell.source), '']
399 399 else:
400 400 return [cell.source]
401 401
402 402 @DocInherit
403 403 def render_pyout(self, output):
404 404 lines = ['Out[%s]:' % output.prompt_number, '']
405 405
406 406 # output is a dictionary like object with type as a key
407 407 if 'latex' in output:
408 408 lines.extend(rst_directive('.. math::', output.latex))
409 409
410 410 if 'text' in output:
411 411 lines.extend(rst_directive('.. parsed-literal::', output.text))
412 412
413 413 return lines
414 414
415 415 @DocInherit
416 416 def render_pyerr(self, output):
417 417 # Note: a traceback is a *list* of frames.
418 418 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
419 419
420 420 @DocInherit
421 421 def _img_lines(self, img_file):
422 422 return ['.. image:: %s' % img_file, '']
423 423
424 424 @DocInherit
425 425 def render_display_format_text(self, output):
426 426 return rst_directive('.. parsed-literal::', output.text)
427 427
428 428 @DocInherit
429 429 def _unknown_lines(self, data):
430 430 return rst_directive('.. warning:: Unknown cell') + [data]
431 431
432 432 def render_display_format_html(self, output):
433 433 """render the html part of an output
434 434
435 435 Returns list.
436 436 """
437 437 return rst_directive('.. raw:: html', output.html)
438 438
439 439 def render_display_format_latex(self, output):
440 440 """render the latex part of an output
441 441
442 442 Returns list.
443 443 """
444 444 return rst_directive('.. math::', output.latex)
445 445
446 446 def render_display_format_json(self, output):
447 447 """render the json part of an output
448 448
449 449 Returns list.
450 450 """
451 451 return rst_directive('.. raw:: json', output.json)
452 452
453 453
454 454 def render_display_format_javascript(self, output):
455 455 """render the javascript part of an output
456 456
457 457 Returns list.
458 458 """
459 459 return rst_directive('.. raw:: javascript', output.javascript)
460 460
461 461
462
463
464 def highlight(src, lang='python'):
465 """Return a syntax-highlighted version of the input source.
466 """
467 from pygments import highlight
468 from pygments.lexers import get_lexer_by_name
469 from pygments.formatters import HtmlFormatter
470
471 lexer = get_lexer_by_name(lang, stripall=True)
472 return highlight(src, lexer, HtmlFormatter())
473
474
462 475 class ConverterMarkdown(Converter):
463 476 extension = 'md'
464 477
478 def __init__(self, infile, highlight_source=False):
479 super(ConverterMarkdown, self).__init__(infile)
480 self.highlight_source = highlight_source
481
465 482 @DocInherit
466 483 def render_heading(self, cell):
467 484 return ['{0} {1}'.format('#'*cell.level, cell.source), '']
468 485
469 486 @DocInherit
470 487 def render_code(self, cell):
471 488 if not cell.input:
472 489 return []
473 490 lines = []
474 491 #lines.append('----')
475 492 lines.extend(['*In[%s]:*' % cell.prompt_number, ''])
476 lines.extend([indent(cell.input), ''])
493 src = highlight(cell.input) if self.highlight_source else \
494 indent(cell.input)
495 lines.extend([src, ''])
477 496 if cell.outputs:
478 497 lines.extend(['==>', ''])
479 498 for output in cell.outputs:
480 499 conv_fn = self.dispatch(output.output_type)
481 500 lines.extend(conv_fn(output))
482 501
483 502 #lines.append('----')
484 503 lines.append('')
485 504 return lines
486 505
487 506 @DocInherit
488 507 def render_markdown(self, cell):
489 508 return [cell.source, '']
490 #return [markdown2rst(cell.source)]
491 509
492 510 @DocInherit
493 511 def render_raw(self, cell):
494 512 if self.raw_as_verbatim:
495 513 return [indent(cell.source), '']
496 514 else:
497 515 return [cell.source, '']
498 516
499 517 @DocInherit
500 518 def render_pyout(self, output):
501 519 lines = []
502 #lines.extend(['*Out[%s]:*' % output.prompt_number, ''])
520
521 ## if 'text' in output:
522 ## lines.extend(['*Out[%s]:*' % output.prompt_number, ''])
503 523
504 524 # output is a dictionary like object with type as a key
505 525 if 'latex' in output:
506 526 pass
507 527
508 528 if 'text' in output:
509 lines.extend([indent(output.text)])
529 lines.extend(['<pre>', indent(output.text), '</pre>'])
510 530
511 531 lines.append('')
512 532 return lines
513 533
514 534 @DocInherit
515 535 def render_pyerr(self, output):
516 536 # Note: a traceback is a *list* of frames.
517 537 return [indent(remove_ansi('\n'.join(output.traceback))), '']
518 538
519 539 @DocInherit
520 540 def _img_lines(self, img_file):
521 541 return ['', '![image](%s)' % img_file, '']
522 542
523 543 @DocInherit
524 544 def render_display_format_text(self, output):
525 545 return [indent(output.text)]
526 546
527 547 @DocInherit
528 548 def _unknown_lines(self, data):
529 549 return ['Warning: Unknown cell', data]
530 550
531 551 def render_display_format_html(self, output):
532 552 """render the html part of an output
533 553
534 554 Returns list.
535 555 """
536 556 return [output.html]
537 557
538 558 def render_display_format_latex(self, output):
539 559 """render the latex part of an output
540 560
541 561 Returns list.
542 562 """
543 563 return ['LaTeX::', indent(output.latex)]
544 564
545 565 def render_display_format_json(self, output):
546 566 """render the json part of an output
547 567
548 568 Returns list.
549 569 """
550 570 return ['JSON:', indent(output.json)]
551 571
552 572
553 573 def render_display_format_javascript(self, output):
554 574 """render the javascript part of an output
555 575
556 576 Returns list.
557 577 """
558 578 return ['JavaScript:', indent(output.javascript)]
559 579
560 580
581 def return_list(x):
582 """Ensure that x is returned as a list or inside one"""
583 return x if isinstance(x, list) else [x]
584
585
561 586 class ConverterQuickHTML(Converter):
562 587 extension = 'html'
563 588
564 589 def in_tag(self, tag, src):
565 590 """Return a list of elements bracketed by the given tag"""
566 591 return ['<%s>' % tag, src, '</%s>' % tag]
567 592
568 593 def optional_header(self):
569 594 # XXX: inject the IPython standard CSS into here
570 595 s = """<html>
571 596 <head>
572 597 </head>
573 598
574 599 <body>
575 600 """
576 601 return s.splitlines()
577 602
578 603 def optional_footer(self):
579 604 s = """</body>
580 605 </html>
581 606 """
582 607 return s.splitlines()
583 608
584 609 @DocInherit
585 610 def render_heading(self, cell):
586 611 marker = cell.level
587 612 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
588 613
589 614 @DocInherit
590 615 def render_code(self, cell):
591 616 if not cell.input:
592 617 return []
593 618
594 619 lines = ['<table>']
595 620 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
596 621 lines.append("<br>\n".join(cell.input.splitlines()))
597 622 lines.append('</tt></td></tr>')
598 623
599 624 for output in cell.outputs:
600 625 lines.append('<tr><td></td><td>')
601 626 conv_fn = self.dispatch(output.output_type)
602 627 lines.extend(conv_fn(output))
603 628 lines.append('</td></tr>')
604 629
605 630 lines.append('</table>')
606 631 return lines
607 632
608 633 @DocInherit
609 634 def render_markdown(self, cell):
610 635 return self.in_tag('pre', cell.source)
611 636
612 637 @DocInherit
613 638 def render_raw(self, cell):
614 639 if self.raw_as_verbatim:
615 640 return self.in_tag('pre', cell.source)
616 641 else:
617 642 return [cell.source]
618 643
619 644 @DocInherit
620 645 def render_pyout(self, output):
621 646 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
622 647 output.prompt_number, '<td>']
623 648
624 649 # output is a dictionary like object with type as a key
625 650 for out_type in ('text', 'latex'):
626 651 if out_type in output:
627 652 lines.extend(self.in_tag('pre', indent(output[out_type])))
628 653
629 654 return lines
630 655
631 656 @DocInherit
632 657 def render_pyerr(self, output):
633 658 # Note: a traceback is a *list* of frames.
634 659 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
635 660
636 661 @DocInherit
637 662 def _img_lines(self, img_file):
638 663 return ['<img src="%s">' % img_file, '']
639 664
640 665 @DocInherit
641 666 def render_display_format_text(self, output):
642 return [output.text]
667 return_list(output.text)
643 668
644 669 @DocInherit
645 670 def _unknown_lines(self, data):
646 671 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
647 672
648 673
649 674 def render_display_format_text(self, output):
650 675 """render the text part of an output
651 676
652 677 Returns list.
653 678 """
654 if type(output.text) == type([]):
655 return output.text
656 return [output.text]
679 return_list(output.text)
657 680
658 681 def render_display_format_html(self, output):
659 682 """render the html part of an output
660 683
661 684 Returns list.
662 685 """
663 if type(output.html) == type([]):
664 return output.html
665 return [output.html]
686 return_list(output.html)
666 687
667 688 def render_display_format_latex(self, output):
668 689 """render the latex part of an output
669 690
670 691 Returns [].
671 692 """
672 693 # quickhtml ignores latex
673 694 return []
674 695
675 696 def render_display_format_json(self, output):
676 697 """render the json part of an output
677 698
678 699 Returns [].
679 700 """
680 701 # quickhtml ignores json
681 702 return []
682 703
683 704
684 705 def render_display_format_javascript(self, output):
685 706 """render the javascript part of an output
686 707
687 708 Returns list.
688 709 """
689 if type(output.javascript) == type([]):
690 return output.javascript
691 return [output.javascript]
710 return_list(output.javascript)
692 711
693 712
694 713 class ConverterLaTeX(Converter):
695 714 """Converts a notebook to a .tex file suitable for pdflatex.
696 715
697 716 Note: this converter *needs*:
698 717
699 718 - `pandoc`: for all conversion of markdown cells. If your notebook only
700 719 has Raw cells, pandoc will not be needed.
701 720
702 721 - `inkscape`: if your notebook has SVG figures. These need to be
703 722 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
704 723 understand SVG natively.
705 724
706 725 You will in general obtain much better final PDF results if you configure
707 726 the matplotlib backend to create SVG output with
708 727
709 728 %config InlineBackend.figure_format = 'svg'
710 729
711 730 (or set the equivalent flag at startup or in your configuration profile).
712 731 """
713 732 extension = 'tex'
714 733 documentclass = 'article'
715 734 documentclass_options = '11pt,english'
716 735 heading_map = {1: r'\section',
717 736 2: r'\subsection',
718 737 3: r'\subsubsection',
719 738 4: r'\paragraph',
720 739 5: r'\subparagraph',
721 740 6: r'\subparagraph'}
722 741
723 742 def in_env(self, environment, lines):
724 743 """Return list of environment lines for input lines
725 744
726 745 Parameters
727 746 ----------
728 747 env : string
729 748 Name of the environment to bracket with begin/end.
730 749
731 750 lines: """
732 751 out = [ur'\begin{%s}' % environment]
733 752 if isinstance(lines, basestring):
734 753 out.append(lines)
735 754 else: # list
736 755 out.extend(lines)
737 756 out.append(ur'\end{%s}' % environment)
738 757 return out
739 758
740 759 def convert(self):
741 760 # The main body is done by the logic in the parent class, and that's
742 761 # all we need if preamble support has been turned off.
743 762 body = super(ConverterLaTeX, self).convert()
744 763 if not self.with_preamble:
745 764 return body
746 765 # But if preamble is on, then we need to construct a proper, standalone
747 766 # tex file.
748 767
749 768 # Tag the document at the top and set latex class
750 769 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
751 770 r'%% Conversion from the original notebook file:',
752 771 r'%% {0}'.format(self.infile),
753 772 r'%%',
754 773 r'\documentclass[%s]{%s}' % (self.documentclass_options,
755 774 self.documentclass),
756 775 '',
757 776 ]
758 777 # Load our own preamble, which is stored next to the main file. We
759 778 # need to be careful in case the script entry point is a symlink
760 779 myfile = __file__ if not os.path.islink(__file__) else \
761 780 os.readlink(__file__)
762 781 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
763 782 final.append(f.read())
764 783
765 784 # Load any additional user-supplied preamble
766 785 if self.user_preamble:
767 786 final.extend(['', '%% Adding user preamble from file:',
768 787 '%% {0}'.format(self.user_preamble), ''])
769 788 with open(self.user_preamble) as f:
770 789 final.append(f.read())
771 790
772 791 # Include document body
773 792 final.extend([ r'\begin{document}', '',
774 793 body,
775 794 r'\end{document}', ''])
776 795 # Retun value must be a string
777 796 return '\n'.join(final)
778 797
779 798 @DocInherit
780 799 def render_heading(self, cell):
781 800 marker = self.heading_map[cell.level]
782 801 return ['%s{%s}' % (marker, cell.source) ]
783 802
784 803 @DocInherit
785 804 def render_code(self, cell):
786 805 if not cell.input:
787 806 return []
788 807
789 808 # Cell codes first carry input code, we use lstlisting for that
790 809 lines = [ur'\begin{codecell}']
791 810
792 811 lines.extend(self.in_env('codeinput',
793 812 self.in_env('lstlisting', cell.input)))
794 813
795 814 outlines = []
796 815 for output in cell.outputs:
797 816 conv_fn = self.dispatch(output.output_type)
798 817 outlines.extend(conv_fn(output))
799 818
800 819 # And then output of many possible types; use a frame for all of it.
801 820 if outlines:
802 821 lines.extend(self.in_env('codeoutput', outlines))
803 822
804 823 lines.append(ur'\end{codecell}')
805 824
806 825 return lines
807 826
808 827
809 828 @DocInherit
810 829 def _img_lines(self, img_file):
811 830 return self.in_env('center',
812 831 [r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])
813 832
814 833 def _svg_lines(self, img_file):
815 834 base_file = os.path.splitext(img_file)[0]
816 835 pdf_file = base_file + '.pdf'
817 836 subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,
818 837 img_file])
819 838 return self._img_lines(pdf_file)
820 839
821 840 @DocInherit
822 841 def render_markdown(self, cell):
823 842 return [markdown2latex(cell.source)]
824 843
825 844 @DocInherit
826 845 def render_pyout(self, output):
827 846 lines = []
828 847
829 848 # output is a dictionary like object with type as a key
830 849 if 'latex' in output:
831 850 lines.extend(output.latex)
832 851
833 852 if 'text' in output:
834 853 lines.extend(self.in_env('verbatim', output.text))
835 854
836 855 return lines
837 856
838 857 @DocInherit
839 858 def render_pyerr(self, output):
840 859 # Note: a traceback is a *list* of frames.
841 860 return self.in_env('traceback',
842 861 self.in_env('verbatim',
843 862 remove_ansi('\n'.join(output.traceback))))
844 863
845 864 @DocInherit
846 865 def render_raw(self, cell):
847 866 if self.raw_as_verbatim:
848 867 return self.in_env('verbatim', cell.source)
849 868 else:
850 869 return [cell.source]
851 870
852 871 @DocInherit
853 872 def _unknown_lines(self, data):
854 873 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
855 874 self.in_env('verbatim', data)
856 875
857 876
858 877 @DocInherit
859 878 def render_display_format_text(self, output):
860 879 lines = []
861 880
862 881 if 'text' in output:
863 882 lines.extend(self.in_env('verbatim', output.text.strip()))
864 883
865 884 return lines
866 885
867 886 def render_display_format_html(self, output):
868 887 """render the html part of an output
869 888
870 889 Returns [].
871 890 """
872 891 return []
873 892
874 893 def render_display_format_latex(self, output):
875 894 """render the latex part of an output
876 895
877 896 Returns list.
878 897 """
879 898 if type(output.latex) == type([]):
880 899 return output.latex
881 900 return [output.latex]
882 901
883 902 def render_display_format_json(self, output):
884 903 """render the json part of an output
885 904
886 905 Returns [].
887 906 """
888 907 # latex ignores json
889 908 return []
890 909
891 910
892 911 def render_display_format_javascript(self, output):
893 912 """render the javascript part of an output
894 913
895 914 Returns [].
896 915 """
897 916 # latex ignores javascript
898 917 return []
899 918
900 919 class ConverterNotebook(Converter):
901 920 """
902 921 A converter that is essentially a null-op.
903 922 This exists so it can be subclassed
904 923 for custom handlers of .ipynb files
905 924 that create new .ipynb files.
906 925
907 926 What distinguishes this from JSONWriter is that
908 927 subclasses can specify what to do with each type of cell.
909 928
910 929 Writes out a notebook file.
911 930
912 931 """
913 932 extension = 'ipynb'
914 933
915 934 def __init__(self, infile, outbase):
916 935 Converter.__init__(self, infile)
917 936 self.outbase = outbase
918 937 rmtree(self.files_dir)
919 938
920 939 def convert(self):
921 940 return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)
922 941
923 942 def optional_header(self):
924 943 s = \
925 944 """{
926 945 "metadata": {
927 946 "name": "%(name)s"
928 947 },
929 948 "nbformat": 3,
930 949 "worksheets": [
931 950 {
932 951 "cells": [""" % {'name':self.outbase}
933 952
934 953 return s.split('\n')
935 954
936 955 def optional_footer(self):
937 956 s = \
938 957 """]
939 958 }
940 959 ]
941 960 }"""
942 961 return s.split('\n')
943 962
944 963 @DocInherit
945 964 def render_heading(self, cell):
946 965 return cell_to_lines(cell)
947 966
948 967 @DocInherit
949 968 def render_code(self, cell):
950 969 return cell_to_lines(cell)
951 970
952 971 @DocInherit
953 972 def render_markdown(self, cell):
954 973 return cell_to_lines(cell)
955 974
956 975 @DocInherit
957 976 def render_raw(self, cell):
958 977 return cell_to_lines(cell)
959 978
960 979 @DocInherit
961 980 def render_pyout(self, output):
962 981 return cell_to_lines(cell)
963 982
964 983 @DocInherit
965 984 def render_pyerr(self, output):
966 985 return cell_to_lines(cell)
967 986
968 987 @DocInherit
969 988 def render_display_format_text(self, output):
970 989 return [output.text]
971 990
972 991 def render_display_format_html(self, output):
973 992 """render the html part of an output
974 993
975 994 Returns [].
976 995 """
977 996 return [output.html]
978 997
979 998 def render_display_format_latex(self, output):
980 999 """render the latex part of an output
981 1000
982 1001 Returns list.
983 1002 """
984 1003 return [output.latex]
985 1004
986 1005 def render_display_format_json(self, output):
987 1006 """render the json part of an output
988 1007
989 1008 Returns [].
990 1009 """
991 1010 return [output.json]
992 1011
993 1012
994 1013 def render_display_format_javascript(self, output):
995 1014 """render the javascript part of an output
996 1015
997 1016 Returns [].
998 1017 """
999 1018 return [output.javascript]
1000 1019
1001 1020 #-----------------------------------------------------------------------------
1002 1021 # Standalone conversion functions
1003 1022 #-----------------------------------------------------------------------------
1004 1023
1005 1024 def rst2simplehtml(infile):
1006 1025 """Convert a rst file to simplified html suitable for blogger.
1007 1026
1008 1027 This just runs rst2html with certain parameters to produce really simple
1009 1028 html and strips the document header, so the resulting file can be easily
1010 1029 pasted into a blogger edit window.
1011 1030 """
1012 1031
1013 1032 # This is the template for the rst2html call that produces the cleanest,
1014 1033 # simplest html I could find. This should help in making it easier to
1015 1034 # paste into the blogspot html window, though I'm still having problems
1016 1035 # with linebreaks there...
1017 1036 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
1018 1037 "--no-generator --no-datestamp --no-source-link "
1019 1038 "--no-toc-backlinks --no-section-numbering "
1020 1039 "--strip-comments ")
1021 1040
1022 1041 cmd = "%s %s" % (cmd_template, infile)
1023 1042 proc = subprocess.Popen(cmd,
1024 1043 stdout=subprocess.PIPE,
1025 1044 stderr=subprocess.PIPE,
1026 1045 shell=True)
1027 1046 html, stderr = proc.communicate()
1028 1047 if stderr:
1029 1048 raise IOError(stderr)
1030 1049
1031 1050 # Make an iterator so breaking out holds state. Our implementation of
1032 1051 # searching for the html body below is basically a trivial little state
1033 1052 # machine, so we need this.
1034 1053 walker = iter(html.splitlines())
1035 1054
1036 1055 # Find start of main text, break out to then print until we find end /div.
1037 1056 # This may only work if there's a real title defined so we get a 'div class'
1038 1057 # tag, I haven't really tried.
1039 1058 for line in walker:
1040 1059 if line.startswith('<body>'):
1041 1060 break
1042 1061
1043 1062 newfname = os.path.splitext(infile)[0] + '.html'
1044 1063 with open(newfname, 'w') as f:
1045 1064 for line in walker:
1046 1065 if line.startswith('</body>'):
1047 1066 break
1048 1067 f.write(line)
1049 1068 f.write('\n')
1050 1069
1051 1070 return newfname
1052 1071
1072
1073 def md2html(infile):
1074 """Convert a markdown file to simplified html suitable for blogger.
1075
1076 """
1077
1078 proc = subprocess.Popen(['markdown', infile],
1079 stdout=subprocess.PIPE,
1080 stderr=subprocess.PIPE)
1081 html, stderr = proc.communicate()
1082 if stderr:
1083 raise IOError(stderr)
1084
1085 from pygments.formatters import HtmlFormatter
1086 css = HtmlFormatter().get_style_defs('.highlight')
1087
1088 template = """
1089 <!DOCTYPE HTML>
1090 <html>
1091
1092 <head>
1093 <title>{infile}</title>
1094
1095 <style type="text/css">
1096 {css}
1097 </style>
1098
1099 </head>
1100
1101 <body>
1102 {html}
1103 </body>
1104
1105 </html>
1106 """
1107 full_html = template.format(**locals())
1108 newfname = os.path.splitext(infile)[0] + '.html'
1109 with open(newfname, 'w') as f:
1110 f.write(full_html)
1111
1112 return newfname
1113
1053 1114 #-----------------------------------------------------------------------------
1054 1115 # Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions
1055 1116 # but at cell level instead of whole notebook level
1056 1117 #-----------------------------------------------------------------------------
1057 1118
1058 1119 def writes_cell(cell, **kwargs):
1059 1120 kwargs['cls'] = BytesEncoder
1060 1121 kwargs['indent'] = 3
1061 1122 kwargs['sort_keys'] = True
1062 1123 kwargs['separators'] = (',',': ')
1063 1124 if kwargs.pop('split_lines', True):
1064 1125 cell = split_lines_cell(copy.deepcopy(cell))
1065 1126 return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')
1066 1127
1128
1067 1129 _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
1130
1131
1068 1132 def split_lines_cell(cell):
1069 1133 """
1070 1134 Split lines within a cell as in
1071 1135 IPython.nbformat.v3.rwbase.split_lines
1072 1136
1073 1137 """
1074 1138 if cell.cell_type == 'code':
1075 1139 if 'input' in cell and isinstance(cell.input, basestring):
1076 1140 cell.input = (cell.input + '\n').splitlines()
1077 1141 for output in cell.outputs:
1078 1142 for key in _multiline_outputs:
1079 1143 item = output.get(key, None)
1080 1144 if isinstance(item, basestring):
1081 1145 output[key] = (item + '\n').splitlines()
1082 1146 else: # text, heading cell
1083 1147 for key in ['source', 'rendered']:
1084 1148 item = cell.get(key, None)
1085 1149 if isinstance(item, basestring):
1086 1150 cell[key] = (item + '\n').splitlines()
1087 1151 return cell
1088 1152
1153
1089 1154 def cell_to_lines(cell):
1090 1155 '''
1091 1156 Write a cell to json, returning the split lines.
1092 1157 '''
1093 1158 split_lines_cell(cell)
1094 1159 s = writes_cell(cell).strip()
1095 1160 return s.split('\n')
1096 1161
1097 1162
1098 1163 known_formats = "rst (default), html, quick-html, latex, markdown"
1099 1164
1100 1165 def main(infile, format='rst'):
1101 1166 """Convert a notebook to html in one step"""
1102 1167 # XXX: this is just quick and dirty for now. When adding a new format,
1103 1168 # make sure to add it to the `known_formats` string above, which gets
1104 1169 # printed in in the catch-all else, as well as in the help
1105 1170 if format == 'rst':
1106 1171 converter = ConverterRST(infile)
1107 1172 converter.render()
1108 1173 elif format == 'markdown':
1109 1174 converter = ConverterMarkdown(infile)
1110 1175 converter.render()
1111 1176 elif format == 'html':
1112 #Currently, conversion to html is a 2 step process, nb->rst->html
1113 converter = ConverterRST(infile)
1114 rstfname = converter.render()
1115 rst2simplehtml(rstfname)
1177 #Currently, conversion to html is a 2 step process, nb->md->html
1178 converter = ConverterMarkdown(infile, True)
1179 mdfname = converter.render()
1180 md2html(mdfname)
1116 1181 elif format == 'quick-html':
1117 1182 converter = ConverterQuickHTML(infile)
1118 1183 rstfname = converter.render()
1119 1184 elif format == 'latex':
1120 1185 converter = ConverterLaTeX(infile)
1121 1186 latexfname = converter.render()
1122 1187 else:
1123 1188 raise SystemExit("Unknown format '%s', " % format +
1124 1189 "known formats are: " + known_formats)
1125 1190
1126 1191 #-----------------------------------------------------------------------------
1127 1192 # Script main
1128 1193 #-----------------------------------------------------------------------------
1129 1194
1130 1195 if __name__ == '__main__':
1131 1196 parser = argparse.ArgumentParser(description=__doc__,
1132 1197 formatter_class=argparse.RawTextHelpFormatter)
1133 1198 # TODO: consider passing file like object around, rather than filenames
1134 1199 # would allow us to process stdin, or even http streams
1135 1200 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
1136 1201
1137 1202 #Require a filename as a positional argument
1138 1203 parser.add_argument('infile', nargs=1)
1139 1204 parser.add_argument('-f', '--format', default='rst',
1140 1205 help='Output format. Supported formats: \n' +
1141 1206 known_formats)
1142 1207 args = parser.parse_args()
1143 1208 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now