#!/usr/bin/env python """Convert IPython notebooks to other formats, such as ReST, and HTML. Example: ./nbconvert.py --format html file.ipynb Produces 'file.rst' and 'file.html', along with auto-generated figure files called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html, use '--format quick-html' which will do ipynb -> html, but won't look as pretty. """ #----------------------------------------------------------------------------- # Imports #----------------------------------------------------------------------------- from __future__ import print_function # Stdlib import codecs import logging import os import pprint import re import subprocess import sys import json import copy from shutil import rmtree inkscape = 'inkscape' if sys.platform == 'darwin': inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape' if not os.path.exists(inkscape): inkscape = None # From IPython from IPython.external import argparse from IPython.nbformat import current as nbformat from IPython.utils.text import indent from decorators import DocInherit from IPython.nbformat.v3.nbjson import BytesEncoder from IPython.utils import py3compat #----------------------------------------------------------------------------- # Utility functions #----------------------------------------------------------------------------- def DocInherit(f): return f def remove_fake_files_url(cell): """Remove from the cell source the /files/ pseudo-path we use. """ src = cell.source cell.source = src.replace('/files/', '') def remove_ansi(src): """Strip all ANSI color escape sequences from input string. Parameters ---------- src : string Returns ------- string """ return re.sub(r'\033\[(0|\d;\d\d)m', '', src) # Pandoc-dependent code def markdown2latex(src): """Convert a markdown string to LaTeX via pandoc. This function will raise an error if pandoc is not installed. Any error messages generated by pandoc are printed to stderr. Parameters ---------- src : string Input string, assumed to be valid markdown. Returns ------- out : string Output as returned by pandoc. """ p = subprocess.Popen('pandoc -f markdown -t latex'.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate(src.encode('utf-8')) if err: print(err, file=sys.stderr) #print('*'*20+'\n', out, '\n'+'*'*20) # dbg return unicode(out,'utf-8') def markdown2rst(src): """Convert a markdown string to LaTeX via pandoc. This function will raise an error if pandoc is not installed. Any error messages generated by pandoc are printed to stderr. Parameters ---------- src : string Input string, assumed to be valid markdown. Returns ------- out : string Output as returned by pandoc. """ p = subprocess.Popen('pandoc -f markdown -t rst'.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate(src.encode('utf-8')) if err: print(err, file=sys.stderr) #print('*'*20+'\n', out, '\n'+'*'*20) # dbg return unicode(out,'utf-8') def rst_directive(directive, text=''): out = [directive, ''] if text: out.extend([indent(text), '']) return out #----------------------------------------------------------------------------- # Class declarations #----------------------------------------------------------------------------- class ConversionException(Exception): pass class Converter(object): default_encoding = 'utf-8' extension = str() figures_counter = 0 infile = str() infile_dir = str() infile_root = str() files_dir = str() with_preamble = True user_preamble = None output = str() raw_as_verbatim = False def __init__(self, infile): self.infile = infile self.infile_dir, infile_root = os.path.split(infile) infile_root = os.path.splitext(infile_root)[0] files_dir = os.path.join(self.infile_dir, infile_root + '_files') if not os.path.isdir(files_dir): os.mkdir(files_dir) self.infile_root = infile_root self.files_dir = files_dir self.outbase = os.path.join(self.infile_dir, infile_root) def dispatch(self, cell_type): """return cell_type dependent render method, for example render_code """ return getattr(self, 'render_' + cell_type, self.render_unknown) def dispatch_display_format(self, format): """return output_type dependent render method, for example render_output_text """ return getattr(self, 'render_display_format_' + format, self.render_unknown) def convert(self, cell_separator='\n'): lines = [] lines.extend(self.optional_header()) converted_cells = [] for worksheet in self.nb.worksheets: for cell in worksheet.cells: #print(cell.cell_type) # dbg conv_fn = self.dispatch(cell.cell_type) if cell.cell_type in ('markdown', 'raw'): remove_fake_files_url(cell) converted_cells.append('\n'.join(conv_fn(cell))) cell_lines = cell_separator.join(converted_cells).split('\n') lines.extend(cell_lines) lines.extend(self.optional_footer()) return u'\n'.join(lines) def render(self): "read, convert, and save self.infile" if not hasattr(self, 'nb'): self.read() self.output = self.convert() return self.save() def read(self): "read and parse notebook into NotebookNode called self.nb" with open(self.infile) as f: self.nb = nbformat.read(f, 'json') def save(self, outfile=None, encoding=None): "read and parse notebook into self.nb" if outfile is None: outfile = self.outbase + '.' + self.extension if encoding is None: encoding = self.default_encoding with open(outfile, 'w') as f: f.write(self.output.encode(encoding)) return os.path.abspath(outfile) def optional_header(self): return [] def optional_footer(self): return [] def _new_figure(self, data, fmt): """Create a new figure file in the given format. Returns a path relative to the input file. """ figname = '%s_fig_%02i.%s' % (self.infile_root, self.figures_counter, fmt) self.figures_counter += 1 fullname = os.path.join(self.files_dir, figname) # Binary files are base64-encoded, SVG is already XML if fmt in ('png', 'jpg', 'pdf'): data = data.decode('base64') fopen = lambda fname: open(fname, 'wb') else: fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding) with fopen(fullname) as f: f.write(data) return fullname def render_heading(self, cell): """convert a heading cell Returns list.""" raise NotImplementedError def render_code(self, cell): """Convert a code cell Returns list.""" raise NotImplementedError def render_markdown(self, cell): """convert a markdown cell Returns list.""" raise NotImplementedError def _img_lines(self, img_file): """Return list of lines to include an image file.""" # Note: subclasses may choose to implement format-specific _FMT_lines # methods if they so choose (FMT in {png, svg, jpg, pdf}). raise NotImplementedError def render_display_data(self, output): """convert display data from the output of a code cell Returns list. """ lines = [] for fmt in output.keys(): if fmt in ['png', 'svg', 'jpg', 'pdf']: img_file = self._new_figure(output[fmt], fmt) # Subclasses can have format-specific render functions (e.g., # latex has to auto-convert all SVG to PDF first). lines_fun = getattr(self, '_%s_lines' % fmt, None) if not lines_fun: lines_fun = self._img_lines lines.extend(lines_fun(img_file)) elif fmt != 'output_type': conv_fn = self.dispatch_display_format(fmt) lines.extend(conv_fn(output)) return lines def render_raw(self, cell): """convert a cell with raw text Returns list.""" raise NotImplementedError def render_unknown(self, cell): """Render cells of unkown type Returns list.""" data = pprint.pformat(cell) logging.warning('Unknown cell:\n%s' % data) return self._unknown_lines(data) def render_stream(self, output): """render the stream part of an output Returns list. Identical to render_display_format_text """ return self.render_display_format_text(output) def render_pyout(self, output): """convert pyout part of a code cell Returns list.""" raise NotImplementedError def render_pyerr(self, output): """convert pyerr part of a code cell Returns list.""" raise NotImplementedError def _unknown_lines(self, data): """Return list of lines for an unknown cell. Parameters ---------- data : str The content of the unknown data as a single string. """ raise NotImplementedError # These are the possible format types in an output node def render_display_format_text(self, output): """render the text part of an output Returns list. """ raise NotImplementedError def render_display_format_html(self, output): """render the html part of an output Returns list. """ raise NotImplementedError def render_display_format_latex(self, output): """render the latex part of an output Returns list. """ raise NotImplementedError def render_display_format_json(self, output): """render the json part of an output Returns list. """ raise NotImplementedError def render_display_format_javascript(self, output): """render the javascript part of an output Returns list. """ raise NotImplementedError class ConverterRST(Converter): extension = 'rst' heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'} @DocInherit def render_heading(self, cell): marker = self.heading_level[cell.level] return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))] @DocInherit def render_code(self, cell): if not cell.input: return [] lines = ['In[%s]:' % cell.prompt_number, ''] lines.extend(rst_directive('.. code:: python', cell.input)) for output in cell.outputs: conv_fn = self.dispatch(output.output_type) lines.extend(conv_fn(output)) return lines @DocInherit def render_markdown(self, cell): #return [cell.source] return [markdown2rst(cell.source)] @DocInherit def render_raw(self, cell): if self.raw_as_verbatim: return ['::', '', indent(cell.source), ''] else: return [cell.source] @DocInherit def render_pyout(self, output): lines = ['Out[%s]:' % output.prompt_number, ''] # output is a dictionary like object with type as a key if 'latex' in output: lines.extend(rst_directive('.. math::', output.latex)) if 'text' in output: lines.extend(rst_directive('.. parsed-literal::', output.text)) return lines @DocInherit def render_pyerr(self, output): # Note: a traceback is a *list* of frames. return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), ''] @DocInherit def _img_lines(self, img_file): return ['.. image:: %s' % img_file, ''] @DocInherit def render_display_format_text(self, output): return rst_directive('.. parsed-literal::', output.text) @DocInherit def _unknown_lines(self, data): return rst_directive('.. warning:: Unknown cell') + [data] def render_display_format_html(self, output): """render the html part of an output Returns list. """ return rst_directive('.. raw:: html', output.html) def render_display_format_latex(self, output): """render the latex part of an output Returns list. """ return rst_directive('.. math::', output.latex) def render_display_format_json(self, output): """render the json part of an output Returns list. """ return rst_directive('.. raw:: json', output.json) def render_display_format_javascript(self, output): """render the javascript part of an output Returns list. """ return rst_directive('.. raw:: javascript', output.javascript) class ConverterMarkdown(Converter): extension = 'md' @DocInherit def render_heading(self, cell): return ['{0} {1}'.format('#'*cell.level, cell.source), ''] @DocInherit def render_code(self, cell): if not cell.input: return [] lines = [] #lines.append('----') lines.extend(['*In[%s]:*' % cell.prompt_number, '']) lines.extend([indent(cell.input), '']) if cell.outputs: lines.extend(['==>', '']) for output in cell.outputs: conv_fn = self.dispatch(output.output_type) lines.extend(conv_fn(output)) #lines.append('----') lines.append('') return lines @DocInherit def render_markdown(self, cell): return [cell.source, ''] #return [markdown2rst(cell.source)] @DocInherit def render_raw(self, cell): if self.raw_as_verbatim: return [indent(cell.source), ''] else: return [cell.source, ''] @DocInherit def render_pyout(self, output): lines = [] #lines.extend(['*Out[%s]:*' % output.prompt_number, '']) # output is a dictionary like object with type as a key if 'latex' in output: pass if 'text' in output: lines.extend([indent(output.text)]) lines.append('') return lines @DocInherit def render_pyerr(self, output): # Note: a traceback is a *list* of frames. return [indent(remove_ansi('\n'.join(output.traceback))), ''] @DocInherit def _img_lines(self, img_file): return ['', '![image](%s)' % img_file, ''] @DocInherit def render_display_format_text(self, output): return [indent(output.text)] @DocInherit def _unknown_lines(self, data): return ['Warning: Unknown cell', data] def render_display_format_html(self, output): """render the html part of an output Returns list. """ return [output.html] def render_display_format_latex(self, output): """render the latex part of an output Returns list. """ return ['LaTeX::', indent(output.latex)] def render_display_format_json(self, output): """render the json part of an output Returns list. """ return ['JSON:', indent(output.json)] def render_display_format_javascript(self, output): """render the javascript part of an output Returns list. """ return ['JavaScript:', indent(output.javascript)] class ConverterQuickHTML(Converter): extension = 'html' def in_tag(self, tag, src): """Return a list of elements bracketed by the given tag""" return ['<%s>' % tag, src, '%s>' % tag] def optional_header(self): # XXX: inject the IPython standard CSS into here s = """
""" return s.splitlines() def optional_footer(self): s = """ """ return s.splitlines() @DocInherit def render_heading(self, cell): marker = cell.level return ['In [%s]: | ' % cell.prompt_number)
lines.append(" \n".join(cell.input.splitlines())) lines.append(' |
') conv_fn = self.dispatch(output.output_type) lines.extend(conv_fn(output)) lines.append(' |