|
|
"""Base classes for the notebook conversion pipeline.
|
|
|
|
|
|
This module defines Converter, from which all objects designed to implement
|
|
|
a conversion of IPython notebooks to some other format should inherit.
|
|
|
"""
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Copyright (c) 2012, the IPython Development Team.
|
|
|
#
|
|
|
# Distributed under the terms of the Modified BSD License.
|
|
|
#
|
|
|
# The full license is in the file COPYING.txt, distributed with this software.
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Imports
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
from __future__ import print_function, absolute_import
|
|
|
|
|
|
# Stdlib imports
|
|
|
import codecs
|
|
|
import io
|
|
|
import logging
|
|
|
import os
|
|
|
import pprint
|
|
|
import re
|
|
|
from types import FunctionType
|
|
|
|
|
|
# IPython imports
|
|
|
from IPython.nbformat import current as nbformat
|
|
|
from IPython.config.configurable import Configurable, SingletonConfigurable
|
|
|
from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum
|
|
|
|
|
|
# Our own imports
|
|
|
from .utils import remove_fake_files_url
|
|
|
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Local utilities
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
def clean_filename(filename):
|
|
|
"""
|
|
|
Remove non-alphanumeric characters from filenames.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
filename : str
|
|
|
The filename to be sanitized.
|
|
|
|
|
|
Returns
|
|
|
-------
|
|
|
clean : str
|
|
|
A sanitized filename that contains only alphanumeric
|
|
|
characters and underscores.
|
|
|
"""
|
|
|
filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
|
|
|
return filename
|
|
|
|
|
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
# Class declarations
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
|
|
class ConversionException(Exception):
|
|
|
pass
|
|
|
|
|
|
|
|
|
class DocStringInheritor(type):
|
|
|
"""
|
|
|
This metaclass will walk the list of bases until the desired
|
|
|
superclass method is found AND if that method has a docstring and only
|
|
|
THEN does it attach the superdocstring to the derived class method.
|
|
|
|
|
|
Please use carefully, I just did the metaclass thing by following
|
|
|
Michael Foord's Metaclass tutorial
|
|
|
(http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
|
|
|
have missed a step or two.
|
|
|
|
|
|
source:
|
|
|
http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
|
|
|
by Paul McGuire
|
|
|
"""
|
|
|
def __new__(meta, classname, bases, classDict):
|
|
|
newClassDict = {}
|
|
|
for attributeName, attribute in classDict.items():
|
|
|
if type(attribute) == FunctionType:
|
|
|
# look through bases for matching function by name
|
|
|
for baseclass in bases:
|
|
|
if hasattr(baseclass, attributeName):
|
|
|
basefn = getattr(baseclass, attributeName)
|
|
|
if basefn.__doc__:
|
|
|
attribute.__doc__ = basefn.__doc__
|
|
|
break
|
|
|
newClassDict[attributeName] = attribute
|
|
|
return type.__new__(meta, classname, bases, newClassDict)
|
|
|
|
|
|
|
|
|
|
|
|
class Converter(Configurable):
|
|
|
#__metaclass__ = DocStringInheritor
|
|
|
#-------------------------------------------------------------------------
|
|
|
# Class-level attributes determining the behaviour of the class but
|
|
|
# probably not varying from instance to instance.
|
|
|
#-------------------------------------------------------------------------
|
|
|
default_encoding = 'utf-8'
|
|
|
extension = str()
|
|
|
blank_symbol = " "
|
|
|
# Which display data format is best? Subclasses can override if
|
|
|
# they have specific requirements.
|
|
|
display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
|
|
|
#-------------------------------------------------------------------------
|
|
|
# Instance-level attributes that are set in the constructor for this
|
|
|
# class.
|
|
|
#-------------------------------------------------------------------------
|
|
|
infile = Unicode()
|
|
|
|
|
|
highlight_source = Bool(True,
|
|
|
config=True,
|
|
|
help="Enable syntax highlighting for code blocks.")
|
|
|
|
|
|
preamble = Unicode("" ,
|
|
|
config=True,
|
|
|
help="Path to a user-specified preamble file")
|
|
|
|
|
|
infile_dir = Unicode()
|
|
|
infile_root = Unicode()
|
|
|
clean_name = Unicode()
|
|
|
files_dir = Unicode()
|
|
|
outbase = Unicode()
|
|
|
#-------------------------------------------------------------------------
|
|
|
# Instance-level attributes that are set by other methods in the base
|
|
|
# class.
|
|
|
#-------------------------------------------------------------------------
|
|
|
figures_counter = 0
|
|
|
output = Unicode()
|
|
|
#-------------------------------------------------------------------------
|
|
|
# Instance-level attributes that are not actually mentioned further
|
|
|
# in this class. TODO: Could they be usefully moved to a subclass?
|
|
|
#-------------------------------------------------------------------------
|
|
|
with_preamble = Bool(True,config=True)
|
|
|
user_preamble = None
|
|
|
raw_as_verbatim = False
|
|
|
|
|
|
|
|
|
def __init__(self, infile='', config=None, exclude=[], **kw):
|
|
|
super(Converter,self).__init__(config=config)
|
|
|
|
|
|
#DocStringInheritor.__init__(self=config)
|
|
|
# N.B. Initialized in the same order as defined above. Please try to
|
|
|
# keep in this way for readability's sake.
|
|
|
self.exclude_cells = exclude
|
|
|
self.infile = infile
|
|
|
self.infile_dir, infile_root = os.path.split(infile)
|
|
|
self.infile_root = os.path.splitext(infile_root)[0]
|
|
|
self.clean_name = clean_filename(self.infile_root)
|
|
|
# Handle the creation of a directory for ancillary files, for
|
|
|
# formats that need one.
|
|
|
files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
|
|
|
if not os.path.isdir(files_dir):
|
|
|
os.mkdir(files_dir)
|
|
|
self.files_dir = files_dir
|
|
|
self.outbase = os.path.join(self.infile_dir, self.infile_root)
|
|
|
|
|
|
def __del__(self):
|
|
|
if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
|
|
|
os.rmdir(self.files_dir)
|
|
|
|
|
|
def _get_prompt_number(self, cell):
|
|
|
return cell.prompt_number if hasattr(cell, 'prompt_number') \
|
|
|
else self.blank_symbol
|
|
|
|
|
|
def dispatch(self, cell_type):
|
|
|
"""return cell_type dependent render method, for example render_code
|
|
|
"""
|
|
|
return getattr(self, 'render_' + cell_type, self.render_unknown)
|
|
|
|
|
|
def dispatch_display_format(self, format):
|
|
|
"""
|
|
|
return output_type dependent render method, for example
|
|
|
render_output_text
|
|
|
"""
|
|
|
return getattr(self, 'render_display_format_' + format,
|
|
|
self.render_unknown_display)
|
|
|
|
|
|
def convert(self, cell_separator='\n'):
|
|
|
"""
|
|
|
Generic method to converts notebook to a string representation.
|
|
|
|
|
|
This is accomplished by dispatching on the cell_type, so subclasses of
|
|
|
Convereter class do not need to re-implement this method, but just
|
|
|
need implementation for the methods that will be dispatched.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
cell_separator : string
|
|
|
Character or string to join cells with. Default is "\n"
|
|
|
|
|
|
Returns
|
|
|
-------
|
|
|
out : string
|
|
|
"""
|
|
|
lines = []
|
|
|
lines.extend(self.optional_header())
|
|
|
lines.extend(self.main_body(cell_separator))
|
|
|
lines.extend(self.optional_footer())
|
|
|
return u'\n'.join(lines)
|
|
|
|
|
|
def main_body(self, cell_separator='\n'):
|
|
|
converted_cells = []
|
|
|
for worksheet in self.nb.worksheets:
|
|
|
for cell in worksheet.cells:
|
|
|
#print(cell.cell_type) # dbg
|
|
|
conv_fn = self.dispatch(cell.cell_type)
|
|
|
if cell.cell_type in ('markdown', 'raw'):
|
|
|
remove_fake_files_url(cell)
|
|
|
converted_cells.append('\n'.join(conv_fn(cell)))
|
|
|
cell_lines = cell_separator.join(converted_cells).split('\n')
|
|
|
return cell_lines
|
|
|
|
|
|
def render(self):
|
|
|
"read, convert, and save self.infile"
|
|
|
if not hasattr(self, 'nb'):
|
|
|
self.read()
|
|
|
self.output = self.convert()
|
|
|
assert(type(self.output) == unicode)
|
|
|
return self.save()
|
|
|
|
|
|
def read(self):
|
|
|
"read and parse notebook into NotebookNode called self.nb"
|
|
|
with open(self.infile) as f:
|
|
|
self.nb = nbformat.read(f, 'json')
|
|
|
|
|
|
def save(self, outfile=None, encoding=None):
|
|
|
"read and parse notebook into self.nb"
|
|
|
if outfile is None:
|
|
|
outfile = self.outbase + '.' + self.extension
|
|
|
if encoding is None:
|
|
|
encoding = self.default_encoding
|
|
|
with io.open(outfile, 'w', encoding=encoding) as f:
|
|
|
f.write(self.output)
|
|
|
return os.path.abspath(outfile)
|
|
|
|
|
|
def optional_header(self):
|
|
|
"""
|
|
|
Optional header to insert at the top of the converted notebook
|
|
|
|
|
|
Returns a list
|
|
|
"""
|
|
|
return []
|
|
|
|
|
|
def optional_footer(self):
|
|
|
"""
|
|
|
Optional footer to insert at the end of the converted notebook
|
|
|
|
|
|
Returns a list
|
|
|
"""
|
|
|
return []
|
|
|
|
|
|
def _new_figure(self, data, fmt):
|
|
|
"""Create a new figure file in the given format.
|
|
|
|
|
|
Returns a path relative to the input file.
|
|
|
"""
|
|
|
figname = '%s_fig_%02i.%s' % (self.clean_name,
|
|
|
self.figures_counter, fmt)
|
|
|
self.figures_counter += 1
|
|
|
fullname = os.path.join(self.files_dir, figname)
|
|
|
|
|
|
# Binary files are base64-encoded, SVG is already XML
|
|
|
if fmt in ('png', 'jpg', 'pdf'):
|
|
|
data = data.decode('base64')
|
|
|
fopen = lambda fname: open(fname, 'wb')
|
|
|
else:
|
|
|
fopen = lambda fname: codecs.open(fname, 'wb',
|
|
|
self.default_encoding)
|
|
|
|
|
|
with fopen(fullname) as f:
|
|
|
f.write(data)
|
|
|
|
|
|
return fullname
|
|
|
|
|
|
def render_heading(self, cell):
|
|
|
"""convert a heading cell
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_code(self, cell):
|
|
|
"""Convert a code cell
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_markdown(self, cell):
|
|
|
"""convert a markdown cell
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def _img_lines(self, img_file):
|
|
|
"""Return list of lines to include an image file."""
|
|
|
# Note: subclasses may choose to implement format-specific _FMT_lines
|
|
|
# methods if they so choose (FMT in {png, svg, jpg, pdf}).
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_display_data(self, output):
|
|
|
"""convert display data from the output of a code cell
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
for fmt in self.display_data_priority:
|
|
|
if fmt in output:
|
|
|
break
|
|
|
else:
|
|
|
for fmt in output:
|
|
|
if fmt != 'output_type':
|
|
|
break
|
|
|
else:
|
|
|
raise RuntimeError('no display data')
|
|
|
|
|
|
# Is it an image?
|
|
|
if fmt in ['png', 'svg', 'jpg', 'pdf']:
|
|
|
img_file = self._new_figure(output[fmt], fmt)
|
|
|
# Subclasses can have format-specific render functions (e.g.,
|
|
|
# latex has to auto-convert all SVG to PDF first).
|
|
|
lines_fun = getattr(self, '_%s_lines' % fmt, None)
|
|
|
if not lines_fun:
|
|
|
lines_fun = self._img_lines
|
|
|
lines = lines_fun(img_file)
|
|
|
else:
|
|
|
lines_fun = self.dispatch_display_format(fmt)
|
|
|
lines = lines_fun(output)
|
|
|
|
|
|
return lines
|
|
|
|
|
|
def render_raw(self, cell):
|
|
|
"""convert a cell with raw text
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_unknown(self, cell):
|
|
|
"""Render cells of unkown type
|
|
|
|
|
|
Returns list."""
|
|
|
data = pprint.pformat(cell)
|
|
|
logging.warning('Unknown cell: %s' % cell.cell_type)
|
|
|
return self._unknown_lines(data)
|
|
|
|
|
|
def render_unknown_display(self, output, type):
|
|
|
"""Render cells of unkown type
|
|
|
|
|
|
Returns list."""
|
|
|
data = pprint.pformat(output)
|
|
|
logging.warning('Unknown output: %s' % output.output_type)
|
|
|
return self._unknown_lines(data)
|
|
|
|
|
|
def render_stream(self, output):
|
|
|
"""render the stream part of an output
|
|
|
|
|
|
Returns list.
|
|
|
|
|
|
Identical to render_display_format_text
|
|
|
"""
|
|
|
return self.render_display_format_text(output)
|
|
|
|
|
|
def render_pyout(self, output):
|
|
|
"""convert pyout part of a code cell
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_pyerr(self, output):
|
|
|
"""convert pyerr part of a code cell
|
|
|
|
|
|
Returns list."""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def _unknown_lines(self, data):
|
|
|
"""Return list of lines for an unknown cell.
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
data : str
|
|
|
The content of the unknown data as a single string.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
# These are the possible format types in an output node
|
|
|
|
|
|
def render_display_format_text(self, output):
|
|
|
"""render the text part of an output
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_display_format_html(self, output):
|
|
|
"""render the html part of an output
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_display_format_latex(self, output):
|
|
|
"""render the latex part of an output
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_display_format_json(self, output):
|
|
|
"""render the json part of an output
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|
|
|
def render_display_format_javascript(self, output):
|
|
|
"""render the javascript part of an output
|
|
|
|
|
|
Returns list.
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
|