##// END OF EJS Templates
Cleaning up markdown output.
Brian Granger -
Show More
@@ -1,1208 +1,1216 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format html file.ipynb
6 6
7 7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 10 pretty.
11 11 """
12 12 #-----------------------------------------------------------------------------
13 13 # Imports
14 14 #-----------------------------------------------------------------------------
15 15 from __future__ import print_function
16 16
17 17 # Stdlib
18 18 import codecs
19 19 import logging
20 20 import os
21 21 import pprint
22 22 import re
23 23 import subprocess
24 24 import sys
25 25 import json
26 26 import copy
27 27 from shutil import rmtree
28 28
29 29 inkscape = 'inkscape'
30 30 if sys.platform == 'darwin':
31 31 inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
32 32 if not os.path.exists(inkscape):
33 33 inkscape = None
34 34
35 35 # From IPython
36 36 from IPython.external import argparse
37 37 from IPython.nbformat import current as nbformat
38 38 from IPython.utils.text import indent
39 39 from decorators import DocInherit
40 40 from IPython.nbformat.v3.nbjson import BytesEncoder
41 41 from IPython.utils import py3compat
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Utility functions
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def DocInherit(f):
48 48 return f
49 49
50 50 def remove_fake_files_url(cell):
51 51 """Remove from the cell source the /files/ pseudo-path we use.
52 52 """
53 53 src = cell.source
54 54 cell.source = src.replace('/files/', '')
55 55
56 56
57 57 def remove_ansi(src):
58 58 """Strip all ANSI color escape sequences from input string.
59 59
60 60 Parameters
61 61 ----------
62 62 src : string
63 63
64 64 Returns
65 65 -------
66 66 string
67 67 """
68 68 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
69 69
70 70
71 71 # Pandoc-dependent code
72 72 def markdown2latex(src):
73 73 """Convert a markdown string to LaTeX via pandoc.
74 74
75 75 This function will raise an error if pandoc is not installed.
76 76
77 77 Any error messages generated by pandoc are printed to stderr.
78 78
79 79 Parameters
80 80 ----------
81 81 src : string
82 82 Input string, assumed to be valid markdown.
83 83
84 84 Returns
85 85 -------
86 86 out : string
87 87 Output as returned by pandoc.
88 88 """
89 89 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
90 90 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
91 91 out, err = p.communicate(src.encode('utf-8'))
92 92 if err:
93 93 print(err, file=sys.stderr)
94 94 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
95 95 return unicode(out,'utf-8')
96 96
97 97
98 98 def markdown2rst(src):
99 99 """Convert a markdown string to LaTeX via pandoc.
100 100
101 101 This function will raise an error if pandoc is not installed.
102 102
103 103 Any error messages generated by pandoc are printed to stderr.
104 104
105 105 Parameters
106 106 ----------
107 107 src : string
108 108 Input string, assumed to be valid markdown.
109 109
110 110 Returns
111 111 -------
112 112 out : string
113 113 Output as returned by pandoc.
114 114 """
115 115 p = subprocess.Popen('pandoc -f markdown -t rst'.split(),
116 116 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
117 117 out, err = p.communicate(src.encode('utf-8'))
118 118 if err:
119 119 print(err, file=sys.stderr)
120 120 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
121 121 return unicode(out,'utf-8')
122 122
123 123
124 124 def rst_directive(directive, text=''):
125 125 out = [directive, '']
126 126 if text:
127 127 out.extend([indent(text), ''])
128 128 return out
129 129
130 130 #-----------------------------------------------------------------------------
131 131 # Class declarations
132 132 #-----------------------------------------------------------------------------
133 133
134 134 class ConversionException(Exception):
135 135 pass
136 136
137 137
138 138 class Converter(object):
139 139 default_encoding = 'utf-8'
140 140 extension = str()
141 141 figures_counter = 0
142 142 infile = str()
143 143 infile_dir = str()
144 144 infile_root = str()
145 145 files_dir = str()
146 146 with_preamble = True
147 147 user_preamble = None
148 148 output = str()
149 149 raw_as_verbatim = False
150 150
151 151 def __init__(self, infile):
152 152 self.infile = infile
153 153 self.infile_dir, infile_root = os.path.split(infile)
154 154 infile_root = os.path.splitext(infile_root)[0]
155 155 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
156 156 if not os.path.isdir(files_dir):
157 157 os.mkdir(files_dir)
158 158 self.infile_root = infile_root
159 159 self.files_dir = files_dir
160 160 self.outbase = os.path.join(self.infile_dir, infile_root)
161 161
162 162 def dispatch(self, cell_type):
163 163 """return cell_type dependent render method, for example render_code
164 164 """
165 165 return getattr(self, 'render_' + cell_type, self.render_unknown)
166 166
167 167 def dispatch_display_format(self, format):
168 168 """return output_type dependent render method, for example render_output_text
169 169 """
170 170 return getattr(self, 'render_display_format_' + format, self.render_unknown)
171 171
172 172 def convert(self, cell_separator='\n'):
173 173 lines = []
174 174 lines.extend(self.optional_header())
175 175 converted_cells = []
176 176 for worksheet in self.nb.worksheets:
177 177 for cell in worksheet.cells:
178 178 #print(cell.cell_type) # dbg
179 179 conv_fn = self.dispatch(cell.cell_type)
180 180 if cell.cell_type in ('markdown', 'raw'):
181 181 remove_fake_files_url(cell)
182 182 converted_cells.append('\n'.join(conv_fn(cell)))
183 183 cell_lines = cell_separator.join(converted_cells).split('\n')
184 184 lines.extend(cell_lines)
185 185 lines.extend(self.optional_footer())
186 186 return u'\n'.join(lines)
187 187
188 188 def render(self):
189 189 "read, convert, and save self.infile"
190 190 if not hasattr(self, 'nb'):
191 191 self.read()
192 192 self.output = self.convert()
193 193 return self.save()
194 194
195 195 def read(self):
196 196 "read and parse notebook into NotebookNode called self.nb"
197 197 with open(self.infile) as f:
198 198 self.nb = nbformat.read(f, 'json')
199 199
200 200 def save(self, outfile=None, encoding=None):
201 201 "read and parse notebook into self.nb"
202 202 if outfile is None:
203 203 outfile = self.outbase + '.' + self.extension
204 204 if encoding is None:
205 205 encoding = self.default_encoding
206 206 with open(outfile, 'w') as f:
207 207 f.write(self.output.encode(encoding))
208 208 return os.path.abspath(outfile)
209 209
210 210 def optional_header(self):
211 211 return []
212 212
213 213 def optional_footer(self):
214 214 return []
215 215
216 216 def _new_figure(self, data, fmt):
217 217 """Create a new figure file in the given format.
218 218
219 219 Returns a path relative to the input file.
220 220 """
221 221 figname = '%s_fig_%02i.%s' % (self.infile_root,
222 222 self.figures_counter, fmt)
223 223 self.figures_counter += 1
224 224 fullname = os.path.join(self.files_dir, figname)
225 225
226 226 # Binary files are base64-encoded, SVG is already XML
227 227 if fmt in ('png', 'jpg', 'pdf'):
228 228 data = data.decode('base64')
229 229 fopen = lambda fname: open(fname, 'wb')
230 230 else:
231 231 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
232 232
233 233 with fopen(fullname) as f:
234 234 f.write(data)
235 235
236 236 return fullname
237 237
238 238 def render_heading(self, cell):
239 239 """convert a heading cell
240 240
241 241 Returns list."""
242 242 raise NotImplementedError
243 243
244 244 def render_code(self, cell):
245 245 """Convert a code cell
246 246
247 247 Returns list."""
248 248 raise NotImplementedError
249 249
250 250 def render_markdown(self, cell):
251 251 """convert a markdown cell
252 252
253 253 Returns list."""
254 254 raise NotImplementedError
255 255
256 256 def _img_lines(self, img_file):
257 257 """Return list of lines to include an image file."""
258 258 # Note: subclasses may choose to implement format-specific _FMT_lines
259 259 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
260 260 raise NotImplementedError
261 261
262 262 def render_display_data(self, output):
263 263 """convert display data from the output of a code cell
264 264
265 265 Returns list.
266 266 """
267 267 lines = []
268 268
269 269 for fmt in output.keys():
270 270 if fmt in ['png', 'svg', 'jpg', 'pdf']:
271 271 img_file = self._new_figure(output[fmt], fmt)
272 272 # Subclasses can have format-specific render functions (e.g.,
273 273 # latex has to auto-convert all SVG to PDF first).
274 274 lines_fun = getattr(self, '_%s_lines' % fmt, None)
275 275 if not lines_fun:
276 276 lines_fun = self._img_lines
277 277 lines.extend(lines_fun(img_file))
278 278 elif fmt != 'output_type':
279 279 conv_fn = self.dispatch_display_format(fmt)
280 280 lines.extend(conv_fn(output))
281 281 return lines
282 282
283 283 def render_raw(self, cell):
284 284 """convert a cell with raw text
285 285
286 286 Returns list."""
287 287 raise NotImplementedError
288 288
289 289 def render_unknown(self, cell):
290 290 """Render cells of unkown type
291 291
292 292 Returns list."""
293 293 data = pprint.pformat(cell)
294 294 logging.warning('Unknown cell:\n%s' % data)
295 295 return self._unknown_lines(data)
296 296
297 297 def render_stream(self, output):
298 298 """render the stream part of an output
299 299
300 300 Returns list.
301 301
302 302 Identical to render_display_format_text
303 303 """
304 304 return self.render_display_format_text(output)
305 305
306 306 def render_pyout(self, output):
307 307 """convert pyout part of a code cell
308 308
309 309 Returns list."""
310 310 raise NotImplementedError
311 311
312 312
313 313 def render_pyerr(self, output):
314 314 """convert pyerr part of a code cell
315 315
316 316 Returns list."""
317 317 raise NotImplementedError
318 318
319 319 def _unknown_lines(self, data):
320 320 """Return list of lines for an unknown cell.
321 321
322 322 Parameters
323 323 ----------
324 324 data : str
325 325 The content of the unknown data as a single string.
326 326 """
327 327 raise NotImplementedError
328 328
329 329 # These are the possible format types in an output node
330 330
331 331 def render_display_format_text(self, output):
332 332 """render the text part of an output
333 333
334 334 Returns list.
335 335 """
336 336 raise NotImplementedError
337 337
338 338 def render_display_format_html(self, output):
339 339 """render the html part of an output
340 340
341 341 Returns list.
342 342 """
343 343 raise NotImplementedError
344 344
345 345 def render_display_format_latex(self, output):
346 346 """render the latex part of an output
347 347
348 348 Returns list.
349 349 """
350 350 raise NotImplementedError
351 351
352 352 def render_display_format_json(self, output):
353 353 """render the json part of an output
354 354
355 355 Returns list.
356 356 """
357 357 raise NotImplementedError
358 358
359 359 def render_display_format_javascript(self, output):
360 360 """render the javascript part of an output
361 361
362 362 Returns list.
363 363 """
364 364 raise NotImplementedError
365 365
366 366
367 367 class ConverterRST(Converter):
368 368 extension = 'rst'
369 369 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
370 370
371 371 @DocInherit
372 372 def render_heading(self, cell):
373 373 marker = self.heading_level[cell.level]
374 374 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
375 375
376 376 @DocInherit
377 377 def render_code(self, cell):
378 378 if not cell.input:
379 379 return []
380 380
381 381 lines = ['In[%s]:' % cell.prompt_number, '']
382 382 lines.extend(rst_directive('.. code:: python', cell.input))
383 383
384 384 for output in cell.outputs:
385 385 conv_fn = self.dispatch(output.output_type)
386 386 lines.extend(conv_fn(output))
387 387
388 388 return lines
389 389
390 390 @DocInherit
391 391 def render_markdown(self, cell):
392 392 #return [cell.source]
393 393 return [markdown2rst(cell.source)]
394 394
395 395 @DocInherit
396 396 def render_raw(self, cell):
397 397 if self.raw_as_verbatim:
398 398 return ['::', '', indent(cell.source), '']
399 399 else:
400 400 return [cell.source]
401 401
402 402 @DocInherit
403 403 def render_pyout(self, output):
404 404 lines = ['Out[%s]:' % output.prompt_number, '']
405 405
406 406 # output is a dictionary like object with type as a key
407 407 if 'latex' in output:
408 408 lines.extend(rst_directive('.. math::', output.latex))
409 409
410 410 if 'text' in output:
411 411 lines.extend(rst_directive('.. parsed-literal::', output.text))
412 412
413 413 return lines
414 414
415 415 @DocInherit
416 416 def render_pyerr(self, output):
417 417 # Note: a traceback is a *list* of frames.
418 418 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
419 419
420 420 @DocInherit
421 421 def _img_lines(self, img_file):
422 422 return ['.. image:: %s' % img_file, '']
423 423
424 424 @DocInherit
425 425 def render_display_format_text(self, output):
426 426 return rst_directive('.. parsed-literal::', output.text)
427 427
428 428 @DocInherit
429 429 def _unknown_lines(self, data):
430 430 return rst_directive('.. warning:: Unknown cell') + [data]
431 431
432 432 def render_display_format_html(self, output):
433 433 """render the html part of an output
434 434
435 435 Returns list.
436 436 """
437 437 return rst_directive('.. raw:: html', output.html)
438 438
439 439 def render_display_format_latex(self, output):
440 440 """render the latex part of an output
441 441
442 442 Returns list.
443 443 """
444 444 return rst_directive('.. math::', output.latex)
445 445
446 446 def render_display_format_json(self, output):
447 447 """render the json part of an output
448 448
449 449 Returns list.
450 450 """
451 451 return rst_directive('.. raw:: json', output.json)
452 452
453 453
454 454 def render_display_format_javascript(self, output):
455 455 """render the javascript part of an output
456 456
457 457 Returns list.
458 458 """
459 459 return rst_directive('.. raw:: javascript', output.javascript)
460 460
461 461
462 462
463 463
464 464 def highlight(src, lang='python'):
465 465 """Return a syntax-highlighted version of the input source.
466 466 """
467 467 from pygments import highlight
468 468 from pygments.lexers import get_lexer_by_name
469 469 from pygments.formatters import HtmlFormatter
470 470
471 471 lexer = get_lexer_by_name(lang, stripall=True)
472 472 return highlight(src, lexer, HtmlFormatter())
473 473
474 474
475 475 class ConverterMarkdown(Converter):
476 476 extension = 'md'
477 477
478 def __init__(self, infile, highlight_source=False):
478 def __init__(self, infile, highlight_source=True, show_prompts=False,
479 inline_prompt=False):
479 480 super(ConverterMarkdown, self).__init__(infile)
480 481 self.highlight_source = highlight_source
482 self.show_prompts = show_prompts
483 self.inline_prompt = inline_prompt
481 484
482 485 @DocInherit
483 486 def render_heading(self, cell):
484 487 return ['{0} {1}'.format('#'*cell.level, cell.source), '']
485 488
486 489 @DocInherit
487 490 def render_code(self, cell):
488 491 if not cell.input:
489 492 return []
490 493 lines = []
491 #lines.append('----')
494 if self.show_prompts and not self.inline_prompt:
492 495 lines.extend(['*In[%s]:*' % cell.prompt_number, ''])
493 src = highlight(cell.input) if self.highlight_source else \
494 indent(cell.input)
496 if self.show_prompts and self.inline_prompt:
497 prompt = 'In[%s]: ' % cell.prompt_number
498 input_lines = cell.input.split('\n')
499 src = prompt + input_lines[0] + '\n' + indent('\n'.join(input_lines[1:]), nspaces=len(prompt))
500 else:
501 src = cell.input
502 src = highlight(src) if self.highlight_source else indent(src)
495 503 lines.extend([src, ''])
496 if cell.outputs:
497 lines.extend(['==>', ''])
504 if cell.outputs and self.show_prompts and not self.inline_prompt:
505 lines.extend(['*Out[%s]:*' % cell.prompt_number, ''])
498 506 for output in cell.outputs:
499 507 conv_fn = self.dispatch(output.output_type)
500 508 lines.extend(conv_fn(output))
501 509
502 510 #lines.append('----')
503 511 lines.append('')
504 512 return lines
505 513
506 514 @DocInherit
507 515 def render_markdown(self, cell):
508 516 return [cell.source, '']
509 517
510 518 @DocInherit
511 519 def render_raw(self, cell):
512 520 if self.raw_as_verbatim:
513 521 return [indent(cell.source), '']
514 522 else:
515 523 return [cell.source, '']
516 524
517 525 @DocInherit
518 526 def render_pyout(self, output):
519 527 lines = []
520 528
521 529 ## if 'text' in output:
522 530 ## lines.extend(['*Out[%s]:*' % output.prompt_number, ''])
523 531
524 532 # output is a dictionary like object with type as a key
525 533 if 'latex' in output:
526 534 pass
527 535
528 536 if 'text' in output:
529 537 lines.extend(['<pre>', indent(output.text), '</pre>'])
530 538
531 539 lines.append('')
532 540 return lines
533 541
534 542 @DocInherit
535 543 def render_pyerr(self, output):
536 544 # Note: a traceback is a *list* of frames.
537 545 return [indent(remove_ansi('\n'.join(output.traceback))), '']
538 546
539 547 @DocInherit
540 548 def _img_lines(self, img_file):
541 return ['', '![image](%s)' % img_file, '']
549 return ['', '![](%s)' % img_file, '']
542 550
543 551 @DocInherit
544 552 def render_display_format_text(self, output):
545 553 return [indent(output.text)]
546 554
547 555 @DocInherit
548 556 def _unknown_lines(self, data):
549 557 return ['Warning: Unknown cell', data]
550 558
551 559 def render_display_format_html(self, output):
552 560 """render the html part of an output
553 561
554 562 Returns list.
555 563 """
556 564 return [output.html]
557 565
558 566 def render_display_format_latex(self, output):
559 567 """render the latex part of an output
560 568
561 569 Returns list.
562 570 """
563 571 return ['LaTeX::', indent(output.latex)]
564 572
565 573 def render_display_format_json(self, output):
566 574 """render the json part of an output
567 575
568 576 Returns list.
569 577 """
570 578 return ['JSON:', indent(output.json)]
571 579
572 580
573 581 def render_display_format_javascript(self, output):
574 582 """render the javascript part of an output
575 583
576 584 Returns list.
577 585 """
578 586 return ['JavaScript:', indent(output.javascript)]
579 587
580 588
581 589 def return_list(x):
582 590 """Ensure that x is returned as a list or inside one"""
583 591 return x if isinstance(x, list) else [x]
584 592
585 593
586 594 class ConverterQuickHTML(Converter):
587 595 extension = 'html'
588 596
589 597 def in_tag(self, tag, src):
590 598 """Return a list of elements bracketed by the given tag"""
591 599 return ['<%s>' % tag, src, '</%s>' % tag]
592 600
593 601 def optional_header(self):
594 602 # XXX: inject the IPython standard CSS into here
595 603 s = """<html>
596 604 <head>
597 605 </head>
598 606
599 607 <body>
600 608 """
601 609 return s.splitlines()
602 610
603 611 def optional_footer(self):
604 612 s = """</body>
605 613 </html>
606 614 """
607 615 return s.splitlines()
608 616
609 617 @DocInherit
610 618 def render_heading(self, cell):
611 619 marker = cell.level
612 620 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
613 621
614 622 @DocInherit
615 623 def render_code(self, cell):
616 624 if not cell.input:
617 625 return []
618 626
619 627 lines = ['<table>']
620 628 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
621 629 lines.append("<br>\n".join(cell.input.splitlines()))
622 630 lines.append('</tt></td></tr>')
623 631
624 632 for output in cell.outputs:
625 633 lines.append('<tr><td></td><td>')
626 634 conv_fn = self.dispatch(output.output_type)
627 635 lines.extend(conv_fn(output))
628 636 lines.append('</td></tr>')
629 637
630 638 lines.append('</table>')
631 639 return lines
632 640
633 641 @DocInherit
634 642 def render_markdown(self, cell):
635 643 return self.in_tag('pre', cell.source)
636 644
637 645 @DocInherit
638 646 def render_raw(self, cell):
639 647 if self.raw_as_verbatim:
640 648 return self.in_tag('pre', cell.source)
641 649 else:
642 650 return [cell.source]
643 651
644 652 @DocInherit
645 653 def render_pyout(self, output):
646 654 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
647 655 output.prompt_number, '<td>']
648 656
649 657 # output is a dictionary like object with type as a key
650 658 for out_type in ('text', 'latex'):
651 659 if out_type in output:
652 660 lines.extend(self.in_tag('pre', indent(output[out_type])))
653 661
654 662 return lines
655 663
656 664 @DocInherit
657 665 def render_pyerr(self, output):
658 666 # Note: a traceback is a *list* of frames.
659 667 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
660 668
661 669 @DocInherit
662 670 def _img_lines(self, img_file):
663 671 return ['<img src="%s">' % img_file, '']
664 672
665 673 @DocInherit
666 674 def render_display_format_text(self, output):
667 675 return_list(output.text)
668 676
669 677 @DocInherit
670 678 def _unknown_lines(self, data):
671 679 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
672 680
673 681
674 682 def render_display_format_text(self, output):
675 683 """render the text part of an output
676 684
677 685 Returns list.
678 686 """
679 687 return_list(output.text)
680 688
681 689 def render_display_format_html(self, output):
682 690 """render the html part of an output
683 691
684 692 Returns list.
685 693 """
686 694 return_list(output.html)
687 695
688 696 def render_display_format_latex(self, output):
689 697 """render the latex part of an output
690 698
691 699 Returns [].
692 700 """
693 701 # quickhtml ignores latex
694 702 return []
695 703
696 704 def render_display_format_json(self, output):
697 705 """render the json part of an output
698 706
699 707 Returns [].
700 708 """
701 709 # quickhtml ignores json
702 710 return []
703 711
704 712
705 713 def render_display_format_javascript(self, output):
706 714 """render the javascript part of an output
707 715
708 716 Returns list.
709 717 """
710 718 return_list(output.javascript)
711 719
712 720
713 721 class ConverterLaTeX(Converter):
714 722 """Converts a notebook to a .tex file suitable for pdflatex.
715 723
716 724 Note: this converter *needs*:
717 725
718 726 - `pandoc`: for all conversion of markdown cells. If your notebook only
719 727 has Raw cells, pandoc will not be needed.
720 728
721 729 - `inkscape`: if your notebook has SVG figures. These need to be
722 730 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
723 731 understand SVG natively.
724 732
725 733 You will in general obtain much better final PDF results if you configure
726 734 the matplotlib backend to create SVG output with
727 735
728 736 %config InlineBackend.figure_format = 'svg'
729 737
730 738 (or set the equivalent flag at startup or in your configuration profile).
731 739 """
732 740 extension = 'tex'
733 741 documentclass = 'article'
734 742 documentclass_options = '11pt,english'
735 743 heading_map = {1: r'\section',
736 744 2: r'\subsection',
737 745 3: r'\subsubsection',
738 746 4: r'\paragraph',
739 747 5: r'\subparagraph',
740 748 6: r'\subparagraph'}
741 749
742 750 def in_env(self, environment, lines):
743 751 """Return list of environment lines for input lines
744 752
745 753 Parameters
746 754 ----------
747 755 env : string
748 756 Name of the environment to bracket with begin/end.
749 757
750 758 lines: """
751 759 out = [ur'\begin{%s}' % environment]
752 760 if isinstance(lines, basestring):
753 761 out.append(lines)
754 762 else: # list
755 763 out.extend(lines)
756 764 out.append(ur'\end{%s}' % environment)
757 765 return out
758 766
759 767 def convert(self):
760 768 # The main body is done by the logic in the parent class, and that's
761 769 # all we need if preamble support has been turned off.
762 770 body = super(ConverterLaTeX, self).convert()
763 771 if not self.with_preamble:
764 772 return body
765 773 # But if preamble is on, then we need to construct a proper, standalone
766 774 # tex file.
767 775
768 776 # Tag the document at the top and set latex class
769 777 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
770 778 r'%% Conversion from the original notebook file:',
771 779 r'%% {0}'.format(self.infile),
772 780 r'%%',
773 781 r'\documentclass[%s]{%s}' % (self.documentclass_options,
774 782 self.documentclass),
775 783 '',
776 784 ]
777 785 # Load our own preamble, which is stored next to the main file. We
778 786 # need to be careful in case the script entry point is a symlink
779 787 myfile = __file__ if not os.path.islink(__file__) else \
780 788 os.readlink(__file__)
781 789 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
782 790 final.append(f.read())
783 791
784 792 # Load any additional user-supplied preamble
785 793 if self.user_preamble:
786 794 final.extend(['', '%% Adding user preamble from file:',
787 795 '%% {0}'.format(self.user_preamble), ''])
788 796 with open(self.user_preamble) as f:
789 797 final.append(f.read())
790 798
791 799 # Include document body
792 800 final.extend([ r'\begin{document}', '',
793 801 body,
794 802 r'\end{document}', ''])
795 803 # Retun value must be a string
796 804 return '\n'.join(final)
797 805
798 806 @DocInherit
799 807 def render_heading(self, cell):
800 808 marker = self.heading_map[cell.level]
801 809 return ['%s{%s}' % (marker, cell.source) ]
802 810
803 811 @DocInherit
804 812 def render_code(self, cell):
805 813 if not cell.input:
806 814 return []
807 815
808 816 # Cell codes first carry input code, we use lstlisting for that
809 817 lines = [ur'\begin{codecell}']
810 818
811 819 lines.extend(self.in_env('codeinput',
812 820 self.in_env('lstlisting', cell.input)))
813 821
814 822 outlines = []
815 823 for output in cell.outputs:
816 824 conv_fn = self.dispatch(output.output_type)
817 825 outlines.extend(conv_fn(output))
818 826
819 827 # And then output of many possible types; use a frame for all of it.
820 828 if outlines:
821 829 lines.extend(self.in_env('codeoutput', outlines))
822 830
823 831 lines.append(ur'\end{codecell}')
824 832
825 833 return lines
826 834
827 835
828 836 @DocInherit
829 837 def _img_lines(self, img_file):
830 838 return self.in_env('center',
831 839 [r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])
832 840
833 841 def _svg_lines(self, img_file):
834 842 base_file = os.path.splitext(img_file)[0]
835 843 pdf_file = base_file + '.pdf'
836 844 subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,
837 845 img_file])
838 846 return self._img_lines(pdf_file)
839 847
840 848 @DocInherit
841 849 def render_markdown(self, cell):
842 850 return [markdown2latex(cell.source)]
843 851
844 852 @DocInherit
845 853 def render_pyout(self, output):
846 854 lines = []
847 855
848 856 # output is a dictionary like object with type as a key
849 857 if 'latex' in output:
850 858 lines.extend(output.latex)
851 859
852 860 if 'text' in output:
853 861 lines.extend(self.in_env('verbatim', output.text))
854 862
855 863 return lines
856 864
857 865 @DocInherit
858 866 def render_pyerr(self, output):
859 867 # Note: a traceback is a *list* of frames.
860 868 return self.in_env('traceback',
861 869 self.in_env('verbatim',
862 870 remove_ansi('\n'.join(output.traceback))))
863 871
864 872 @DocInherit
865 873 def render_raw(self, cell):
866 874 if self.raw_as_verbatim:
867 875 return self.in_env('verbatim', cell.source)
868 876 else:
869 877 return [cell.source]
870 878
871 879 @DocInherit
872 880 def _unknown_lines(self, data):
873 881 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
874 882 self.in_env('verbatim', data)
875 883
876 884
877 885 @DocInherit
878 886 def render_display_format_text(self, output):
879 887 lines = []
880 888
881 889 if 'text' in output:
882 890 lines.extend(self.in_env('verbatim', output.text.strip()))
883 891
884 892 return lines
885 893
886 894 def render_display_format_html(self, output):
887 895 """render the html part of an output
888 896
889 897 Returns [].
890 898 """
891 899 return []
892 900
893 901 def render_display_format_latex(self, output):
894 902 """render the latex part of an output
895 903
896 904 Returns list.
897 905 """
898 906 if type(output.latex) == type([]):
899 907 return output.latex
900 908 return [output.latex]
901 909
902 910 def render_display_format_json(self, output):
903 911 """render the json part of an output
904 912
905 913 Returns [].
906 914 """
907 915 # latex ignores json
908 916 return []
909 917
910 918
911 919 def render_display_format_javascript(self, output):
912 920 """render the javascript part of an output
913 921
914 922 Returns [].
915 923 """
916 924 # latex ignores javascript
917 925 return []
918 926
919 927 class ConverterNotebook(Converter):
920 928 """
921 929 A converter that is essentially a null-op.
922 930 This exists so it can be subclassed
923 931 for custom handlers of .ipynb files
924 932 that create new .ipynb files.
925 933
926 934 What distinguishes this from JSONWriter is that
927 935 subclasses can specify what to do with each type of cell.
928 936
929 937 Writes out a notebook file.
930 938
931 939 """
932 940 extension = 'ipynb'
933 941
934 942 def __init__(self, infile, outbase):
935 943 Converter.__init__(self, infile)
936 944 self.outbase = outbase
937 945 rmtree(self.files_dir)
938 946
939 947 def convert(self):
940 948 return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)
941 949
942 950 def optional_header(self):
943 951 s = \
944 952 """{
945 953 "metadata": {
946 954 "name": "%(name)s"
947 955 },
948 956 "nbformat": 3,
949 957 "worksheets": [
950 958 {
951 959 "cells": [""" % {'name':self.outbase}
952 960
953 961 return s.split('\n')
954 962
955 963 def optional_footer(self):
956 964 s = \
957 965 """]
958 966 }
959 967 ]
960 968 }"""
961 969 return s.split('\n')
962 970
963 971 @DocInherit
964 972 def render_heading(self, cell):
965 973 return cell_to_lines(cell)
966 974
967 975 @DocInherit
968 976 def render_code(self, cell):
969 977 return cell_to_lines(cell)
970 978
971 979 @DocInherit
972 980 def render_markdown(self, cell):
973 981 return cell_to_lines(cell)
974 982
975 983 @DocInherit
976 984 def render_raw(self, cell):
977 985 return cell_to_lines(cell)
978 986
979 987 @DocInherit
980 988 def render_pyout(self, output):
981 989 return cell_to_lines(cell)
982 990
983 991 @DocInherit
984 992 def render_pyerr(self, output):
985 993 return cell_to_lines(cell)
986 994
987 995 @DocInherit
988 996 def render_display_format_text(self, output):
989 997 return [output.text]
990 998
991 999 def render_display_format_html(self, output):
992 1000 """render the html part of an output
993 1001
994 1002 Returns [].
995 1003 """
996 1004 return [output.html]
997 1005
998 1006 def render_display_format_latex(self, output):
999 1007 """render the latex part of an output
1000 1008
1001 1009 Returns list.
1002 1010 """
1003 1011 return [output.latex]
1004 1012
1005 1013 def render_display_format_json(self, output):
1006 1014 """render the json part of an output
1007 1015
1008 1016 Returns [].
1009 1017 """
1010 1018 return [output.json]
1011 1019
1012 1020
1013 1021 def render_display_format_javascript(self, output):
1014 1022 """render the javascript part of an output
1015 1023
1016 1024 Returns [].
1017 1025 """
1018 1026 return [output.javascript]
1019 1027
1020 1028 #-----------------------------------------------------------------------------
1021 1029 # Standalone conversion functions
1022 1030 #-----------------------------------------------------------------------------
1023 1031
1024 1032 def rst2simplehtml(infile):
1025 1033 """Convert a rst file to simplified html suitable for blogger.
1026 1034
1027 1035 This just runs rst2html with certain parameters to produce really simple
1028 1036 html and strips the document header, so the resulting file can be easily
1029 1037 pasted into a blogger edit window.
1030 1038 """
1031 1039
1032 1040 # This is the template for the rst2html call that produces the cleanest,
1033 1041 # simplest html I could find. This should help in making it easier to
1034 1042 # paste into the blogspot html window, though I'm still having problems
1035 1043 # with linebreaks there...
1036 1044 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
1037 1045 "--no-generator --no-datestamp --no-source-link "
1038 1046 "--no-toc-backlinks --no-section-numbering "
1039 1047 "--strip-comments ")
1040 1048
1041 1049 cmd = "%s %s" % (cmd_template, infile)
1042 1050 proc = subprocess.Popen(cmd,
1043 1051 stdout=subprocess.PIPE,
1044 1052 stderr=subprocess.PIPE,
1045 1053 shell=True)
1046 1054 html, stderr = proc.communicate()
1047 1055 if stderr:
1048 1056 raise IOError(stderr)
1049 1057
1050 1058 # Make an iterator so breaking out holds state. Our implementation of
1051 1059 # searching for the html body below is basically a trivial little state
1052 1060 # machine, so we need this.
1053 1061 walker = iter(html.splitlines())
1054 1062
1055 1063 # Find start of main text, break out to then print until we find end /div.
1056 1064 # This may only work if there's a real title defined so we get a 'div class'
1057 1065 # tag, I haven't really tried.
1058 1066 for line in walker:
1059 1067 if line.startswith('<body>'):
1060 1068 break
1061 1069
1062 1070 newfname = os.path.splitext(infile)[0] + '.html'
1063 1071 with open(newfname, 'w') as f:
1064 1072 for line in walker:
1065 1073 if line.startswith('</body>'):
1066 1074 break
1067 1075 f.write(line)
1068 1076 f.write('\n')
1069 1077
1070 1078 return newfname
1071 1079
1072 1080
1073 1081 def md2html(infile):
1074 1082 """Convert a markdown file to simplified html suitable for blogger.
1075 1083
1076 1084 """
1077 1085
1078 1086 proc = subprocess.Popen(['markdown', infile],
1079 1087 stdout=subprocess.PIPE,
1080 1088 stderr=subprocess.PIPE)
1081 1089 html, stderr = proc.communicate()
1082 1090 if stderr:
1083 1091 raise IOError(stderr)
1084 1092
1085 1093 from pygments.formatters import HtmlFormatter
1086 1094 css = HtmlFormatter().get_style_defs('.highlight')
1087 1095
1088 1096 template = """
1089 1097 <!DOCTYPE HTML>
1090 1098 <html>
1091 1099
1092 1100 <head>
1093 1101 <title>{infile}</title>
1094 1102
1095 1103 <style type="text/css">
1096 1104 {css}
1097 1105 </style>
1098 1106
1099 1107 </head>
1100 1108
1101 1109 <body>
1102 1110 {html}
1103 1111 </body>
1104 1112
1105 1113 </html>
1106 1114 """
1107 1115 full_html = template.format(**locals())
1108 1116 newfname = os.path.splitext(infile)[0] + '.html'
1109 1117 with open(newfname, 'w') as f:
1110 1118 f.write(full_html)
1111 1119
1112 1120 return newfname
1113 1121
1114 1122 #-----------------------------------------------------------------------------
1115 1123 # Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions
1116 1124 # but at cell level instead of whole notebook level
1117 1125 #-----------------------------------------------------------------------------
1118 1126
1119 1127 def writes_cell(cell, **kwargs):
1120 1128 kwargs['cls'] = BytesEncoder
1121 1129 kwargs['indent'] = 3
1122 1130 kwargs['sort_keys'] = True
1123 1131 kwargs['separators'] = (',',': ')
1124 1132 if kwargs.pop('split_lines', True):
1125 1133 cell = split_lines_cell(copy.deepcopy(cell))
1126 1134 return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')
1127 1135
1128 1136
1129 1137 _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
1130 1138
1131 1139
1132 1140 def split_lines_cell(cell):
1133 1141 """
1134 1142 Split lines within a cell as in
1135 1143 IPython.nbformat.v3.rwbase.split_lines
1136 1144
1137 1145 """
1138 1146 if cell.cell_type == 'code':
1139 1147 if 'input' in cell and isinstance(cell.input, basestring):
1140 1148 cell.input = (cell.input + '\n').splitlines()
1141 1149 for output in cell.outputs:
1142 1150 for key in _multiline_outputs:
1143 1151 item = output.get(key, None)
1144 1152 if isinstance(item, basestring):
1145 1153 output[key] = (item + '\n').splitlines()
1146 1154 else: # text, heading cell
1147 1155 for key in ['source', 'rendered']:
1148 1156 item = cell.get(key, None)
1149 1157 if isinstance(item, basestring):
1150 1158 cell[key] = (item + '\n').splitlines()
1151 1159 return cell
1152 1160
1153 1161
1154 1162 def cell_to_lines(cell):
1155 1163 '''
1156 1164 Write a cell to json, returning the split lines.
1157 1165 '''
1158 1166 split_lines_cell(cell)
1159 1167 s = writes_cell(cell).strip()
1160 1168 return s.split('\n')
1161 1169
1162 1170
1163 1171 known_formats = "rst (default), html, quick-html, latex, markdown"
1164 1172
1165 1173 def main(infile, format='rst'):
1166 1174 """Convert a notebook to html in one step"""
1167 1175 # XXX: this is just quick and dirty for now. When adding a new format,
1168 1176 # make sure to add it to the `known_formats` string above, which gets
1169 1177 # printed in in the catch-all else, as well as in the help
1170 1178 if format == 'rst':
1171 1179 converter = ConverterRST(infile)
1172 1180 converter.render()
1173 1181 elif format == 'markdown':
1174 1182 converter = ConverterMarkdown(infile)
1175 1183 converter.render()
1176 1184 elif format == 'html':
1177 1185 #Currently, conversion to html is a 2 step process, nb->md->html
1178 1186 converter = ConverterMarkdown(infile, True)
1179 1187 mdfname = converter.render()
1180 1188 md2html(mdfname)
1181 1189 elif format == 'quick-html':
1182 1190 converter = ConverterQuickHTML(infile)
1183 1191 rstfname = converter.render()
1184 1192 elif format == 'latex':
1185 1193 converter = ConverterLaTeX(infile)
1186 1194 latexfname = converter.render()
1187 1195 else:
1188 1196 raise SystemExit("Unknown format '%s', " % format +
1189 1197 "known formats are: " + known_formats)
1190 1198
1191 1199 #-----------------------------------------------------------------------------
1192 1200 # Script main
1193 1201 #-----------------------------------------------------------------------------
1194 1202
1195 1203 if __name__ == '__main__':
1196 1204 parser = argparse.ArgumentParser(description=__doc__,
1197 1205 formatter_class=argparse.RawTextHelpFormatter)
1198 1206 # TODO: consider passing file like object around, rather than filenames
1199 1207 # would allow us to process stdin, or even http streams
1200 1208 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
1201 1209
1202 1210 #Require a filename as a positional argument
1203 1211 parser.add_argument('infile', nargs=1)
1204 1212 parser.add_argument('-f', '--format', default='rst',
1205 1213 help='Output format. Supported formats: \n' +
1206 1214 known_formats)
1207 1215 args = parser.parse_args()
1208 1216 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now