base.py
432 lines
| 14.2 KiB
| text/x-python
|
PythonLexer
/ converters / base.py
David Warde-Farley
|
r8789 | """Base classes for the notebook conversion pipeline. | ||
This module defines Converter, from which all objects designed to implement | ||||
a conversion of IPython notebooks to some other format should inherit. | ||||
""" | ||||
#----------------------------------------------------------------------------- | ||||
# Copyright (c) 2012, the IPython Development Team. | ||||
# | ||||
# Distributed under the terms of the Modified BSD License. | ||||
# | ||||
# The full license is in the file COPYING.txt, distributed with this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r8627 | from __future__ import print_function, absolute_import | ||
Matthias BUSSONNIER
|
r8618 | |||
David Warde-Farley
|
r8789 | # Stdlib imports | ||
Matthias BUSSONNIER
|
r8618 | import codecs | ||
import io | ||||
import logging | ||||
import os | ||||
import pprint | ||||
Rick Lupton
|
r8751 | import re | ||
Matthias BUSSONNIER
|
r8618 | from types import FunctionType | ||
David Warde-Farley
|
r8789 | # IPython imports | ||
Matthias BUSSONNIER
|
r8618 | from IPython.nbformat import current as nbformat | ||
Matthias BUSSONNIER
|
r9569 | from IPython.config.configurable import Configurable, SingletonConfigurable | ||
Matthias BUSSONNIER
|
r9571 | from IPython.utils.traitlets import (List, Unicode, Type, Bool, Dict, CaselessStrEnum, | ||
Any) | ||||
Matthias BUSSONNIER
|
r8618 | |||
David Warde-Farley
|
r8789 | # Our own imports | ||
Anthony Scopatz
|
r8933 | from .utils import remove_fake_files_url | ||
David Warde-Farley
|
r8789 | |||
#----------------------------------------------------------------------------- | ||||
# Local utilities | ||||
#----------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r8618 | |||
Rick Lupton
|
r8751 | def clean_filename(filename): | ||
David Warde-Farley
|
r8755 | """ | ||
Remove non-alphanumeric characters from filenames. | ||||
Parameters | ||||
---------- | ||||
filename : str | ||||
The filename to be sanitized. | ||||
Returns | ||||
------- | ||||
clean : str | ||||
A sanitized filename that contains only alphanumeric | ||||
characters and underscores. | ||||
""" | ||||
Rick Lupton
|
r8751 | filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename) | ||
return filename | ||||
David Warde-Farley
|
r8789 | |||
Matthias BUSSONNIER
|
r8618 | #----------------------------------------------------------------------------- | ||
# Class declarations | ||||
#----------------------------------------------------------------------------- | ||||
class ConversionException(Exception): | ||||
pass | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | class DocStringInheritor(type): | ||
""" | ||||
This metaclass will walk the list of bases until the desired | ||||
superclass method is found AND if that method has a docstring and only | ||||
THEN does it attach the superdocstring to the derived class method. | ||||
Please use carefully, I just did the metaclass thing by following | ||||
Michael Foord's Metaclass tutorial | ||||
(http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may | ||||
have missed a step or two. | ||||
source: | ||||
http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95 | ||||
by Paul McGuire | ||||
""" | ||||
def __new__(meta, classname, bases, classDict): | ||||
newClassDict = {} | ||||
for attributeName, attribute in classDict.items(): | ||||
if type(attribute) == FunctionType: | ||||
# look through bases for matching function by name | ||||
for baseclass in bases: | ||||
if hasattr(baseclass, attributeName): | ||||
Matthias BUSSONNIER
|
r8627 | basefn = getattr(baseclass, attributeName) | ||
Matthias BUSSONNIER
|
r8618 | if basefn.__doc__: | ||
attribute.__doc__ = basefn.__doc__ | ||||
break | ||||
newClassDict[attributeName] = attribute | ||||
return type.__new__(meta, classname, bases, newClassDict) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r9569 | class Converter(Configurable): | ||
#__metaclass__ = DocStringInheritor | ||||
David Warde-Farley
|
r8810 | #------------------------------------------------------------------------- | ||
# Class-level attributes determining the behaviour of the class but | ||||
# probably not varying from instance to instance. | ||||
#------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r8618 | default_encoding = 'utf-8' | ||
extension = str() | ||||
David Warde-Farley
|
r8810 | blank_symbol = " " | ||
# Which display data format is best? Subclasses can override if | ||||
# they have specific requirements. | ||||
display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text'] | ||||
#------------------------------------------------------------------------- | ||||
# Instance-level attributes that are set in the constructor for this | ||||
# class. | ||||
#------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r9571 | infile = Any() | ||
Matthias BUSSONNIER
|
r9569 | |||
highlight_source = Bool(True, | ||||
config=True, | ||||
help="Enable syntax highlighting for code blocks.") | ||||
Matthias BUSSONNIER
|
r9571 | preamble = Unicode( "" , | ||
Matthias BUSSONNIER
|
r9569 | config=True, | ||
help="Path to a user-specified preamble file") | ||||
Matthias BUSSONNIER
|
r9571 | |||
Matthias BUSSONNIER
|
r9570 | extract_figures = Bool( True, | ||
config=True, | ||||
help="""extract base-64 encoded figures of the notebook into separate files, | ||||
replace by link to corresponding file in source.""") | ||||
Matthias BUSSONNIER
|
r9569 | |||
infile_dir = Unicode() | ||||
infile_root = Unicode() | ||||
clean_name = Unicode() | ||||
files_dir = Unicode() | ||||
outbase = Unicode() | ||||
David Warde-Farley
|
r8810 | #------------------------------------------------------------------------- | ||
David Warde-Farley
|
r8811 | # Instance-level attributes that are set by other methods in the base | ||
David Warde-Farley
|
r8810 | # class. | ||
#------------------------------------------------------------------------- | ||||
figures_counter = 0 | ||||
Matthias BUSSONNIER
|
r9569 | output = Unicode() | ||
David Warde-Farley
|
r8810 | #------------------------------------------------------------------------- | ||
# Instance-level attributes that are not actually mentioned further | ||||
# in this class. TODO: Could they be usefully moved to a subclass? | ||||
#------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r9569 | with_preamble = Bool(True,config=True) | ||
Matthias BUSSONNIER
|
r8618 | user_preamble = None | ||
raw_as_verbatim = False | ||||
David Warde-Farley
|
r8718 | |||
Matthias BUSSONNIER
|
r9569 | |||
Matthias BUSSONNIER
|
r9573 | def __init__(self, infile=None, config=None, exclude=[], **kw): | ||
Matthias BUSSONNIER
|
r9569 | super(Converter,self).__init__(config=config) | ||
#DocStringInheritor.__init__(self=config) | ||||
David Warde-Farley
|
r8810 | # N.B. Initialized in the same order as defined above. Please try to | ||
# keep in this way for readability's sake. | ||||
Matthias BUSSONNIER
|
r8912 | self.exclude_cells = exclude | ||
Matthias BUSSONNIER
|
r8618 | self.infile = infile | ||
Matthias BUSSONNIER
|
r9571 | if infile: | ||
Matthias BUSSONNIER
|
r9572 | self.infile = infile | ||
Matthias BUSSONNIER
|
r9571 | self.infile_dir, infile_root = os.path.split(infile) | ||
self.infile_root = os.path.splitext(infile_root)[0] | ||||
self.clean_name = clean_filename(self.infile_root) | ||||
# Handle the creation of a directory for ancillary files, for | ||||
# formats that need one. | ||||
files_dir = os.path.join(self.infile_dir, self.clean_name + '_files') | ||||
if not os.path.isdir(files_dir): | ||||
os.mkdir(files_dir) | ||||
self.files_dir = files_dir | ||||
self.outbase = os.path.join(self.infile_dir, self.infile_root) | ||||
Matthias BUSSONNIER
|
r8618 | |||
def __del__(self): | ||||
Matthias BUSSONNIER
|
r8641 | if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir): | ||
Matthias BUSSONNIER
|
r8618 | os.rmdir(self.files_dir) | ||
Maximilian Albert
|
r8746 | def _get_prompt_number(self, cell): | ||
return cell.prompt_number if hasattr(cell, 'prompt_number') \ | ||||
else self.blank_symbol | ||||
Matthias BUSSONNIER
|
r8618 | def dispatch(self, cell_type): | ||
"""return cell_type dependent render method, for example render_code | ||||
""" | ||||
return getattr(self, 'render_' + cell_type, self.render_unknown) | ||||
def dispatch_display_format(self, format): | ||||
""" | ||||
David Warde-Farley
|
r8747 | return output_type dependent render method, for example | ||
render_output_text | ||||
""" | ||||
return getattr(self, 'render_display_format_' + format, | ||||
self.render_unknown_display) | ||||
Matthias BUSSONNIER
|
r8618 | |||
def convert(self, cell_separator='\n'): | ||||
""" | ||||
Generic method to converts notebook to a string representation. | ||||
This is accomplished by dispatching on the cell_type, so subclasses of | ||||
Convereter class do not need to re-implement this method, but just | ||||
need implementation for the methods that will be dispatched. | ||||
Parameters | ||||
---------- | ||||
cell_separator : string | ||||
Character or string to join cells with. Default is "\n" | ||||
Returns | ||||
------- | ||||
out : string | ||||
""" | ||||
lines = [] | ||||
lines.extend(self.optional_header()) | ||||
lines.extend(self.main_body(cell_separator)) | ||||
lines.extend(self.optional_footer()) | ||||
return u'\n'.join(lines) | ||||
def main_body(self, cell_separator='\n'): | ||||
converted_cells = [] | ||||
for worksheet in self.nb.worksheets: | ||||
for cell in worksheet.cells: | ||||
#print(cell.cell_type) # dbg | ||||
conv_fn = self.dispatch(cell.cell_type) | ||||
if cell.cell_type in ('markdown', 'raw'): | ||||
remove_fake_files_url(cell) | ||||
converted_cells.append('\n'.join(conv_fn(cell))) | ||||
cell_lines = cell_separator.join(converted_cells).split('\n') | ||||
return cell_lines | ||||
def render(self): | ||||
"read, convert, and save self.infile" | ||||
if not hasattr(self, 'nb'): | ||||
self.read() | ||||
self.output = self.convert() | ||||
Matthias BUSSONNIER
|
r8623 | assert(type(self.output) == unicode) | ||
Matthias BUSSONNIER
|
r8618 | return self.save() | ||
def read(self): | ||||
"read and parse notebook into NotebookNode called self.nb" | ||||
with open(self.infile) as f: | ||||
self.nb = nbformat.read(f, 'json') | ||||
def save(self, outfile=None, encoding=None): | ||||
"read and parse notebook into self.nb" | ||||
if outfile is None: | ||||
outfile = self.outbase + '.' + self.extension | ||||
if encoding is None: | ||||
encoding = self.default_encoding | ||||
with io.open(outfile, 'w', encoding=encoding) as f: | ||||
f.write(self.output) | ||||
return os.path.abspath(outfile) | ||||
def optional_header(self): | ||||
""" | ||||
Optional header to insert at the top of the converted notebook | ||||
Returns a list | ||||
""" | ||||
return [] | ||||
def optional_footer(self): | ||||
""" | ||||
Optional footer to insert at the end of the converted notebook | ||||
Returns a list | ||||
""" | ||||
return [] | ||||
def _new_figure(self, data, fmt): | ||||
"""Create a new figure file in the given format. | ||||
Returns a path relative to the input file. | ||||
""" | ||||
Rick Lupton
|
r8754 | figname = '%s_fig_%02i.%s' % (self.clean_name, | ||
Matthias BUSSONNIER
|
r8618 | self.figures_counter, fmt) | ||
self.figures_counter += 1 | ||||
fullname = os.path.join(self.files_dir, figname) | ||||
# Binary files are base64-encoded, SVG is already XML | ||||
if fmt in ('png', 'jpg', 'pdf'): | ||||
data = data.decode('base64') | ||||
fopen = lambda fname: open(fname, 'wb') | ||||
else: | ||||
David Warde-Farley
|
r8747 | fopen = lambda fname: codecs.open(fname, 'wb', | ||
self.default_encoding) | ||||
David Warde-Farley
|
r8718 | |||
Matthias BUSSONNIER
|
r8618 | with fopen(fullname) as f: | ||
f.write(data) | ||||
David Warde-Farley
|
r8718 | |||
Matthias BUSSONNIER
|
r8618 | return fullname | ||
def render_heading(self, cell): | ||||
"""convert a heading cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def render_code(self, cell): | ||||
"""Convert a code cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def render_markdown(self, cell): | ||||
"""convert a markdown cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def _img_lines(self, img_file): | ||||
"""Return list of lines to include an image file.""" | ||||
# Note: subclasses may choose to implement format-specific _FMT_lines | ||||
# methods if they so choose (FMT in {png, svg, jpg, pdf}). | ||||
raise NotImplementedError | ||||
def render_display_data(self, output): | ||||
"""convert display data from the output of a code cell | ||||
Returns list. | ||||
""" | ||||
David Warde-Farley
|
r8756 | for fmt in self.display_data_priority: | ||
if fmt in output: | ||||
break | ||||
Rick Lupton
|
r8752 | else: | ||
David Warde-Farley
|
r8756 | for fmt in output: | ||
if fmt != 'output_type': | ||||
break | ||||
Rick Lupton
|
r8752 | else: | ||
raise RuntimeError('no display data') | ||||
# Is it an image? | ||||
Matthias BUSSONNIER
|
r9570 | if fmt in ['png', 'svg', 'jpg', 'pdf'] and self.extract_figures: | ||
Rick Lupton
|
r8752 | img_file = self._new_figure(output[fmt], fmt) | ||
# Subclasses can have format-specific render functions (e.g., | ||||
# latex has to auto-convert all SVG to PDF first). | ||||
lines_fun = getattr(self, '_%s_lines' % fmt, None) | ||||
if not lines_fun: | ||||
lines_fun = self._img_lines | ||||
lines = lines_fun(img_file) | ||||
else: | ||||
lines_fun = self.dispatch_display_format(fmt) | ||||
lines = lines_fun(output) | ||||
Matthias BUSSONNIER
|
r8618 | |||
return lines | ||||
def render_raw(self, cell): | ||||
"""convert a cell with raw text | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def render_unknown(self, cell): | ||||
"""Render cells of unkown type | ||||
Returns list.""" | ||||
data = pprint.pformat(cell) | ||||
logging.warning('Unknown cell: %s' % cell.cell_type) | ||||
return self._unknown_lines(data) | ||||
def render_unknown_display(self, output, type): | ||||
"""Render cells of unkown type | ||||
Returns list.""" | ||||
data = pprint.pformat(output) | ||||
logging.warning('Unknown output: %s' % output.output_type) | ||||
return self._unknown_lines(data) | ||||
def render_stream(self, output): | ||||
"""render the stream part of an output | ||||
Returns list. | ||||
Identical to render_display_format_text | ||||
""" | ||||
return self.render_display_format_text(output) | ||||
def render_pyout(self, output): | ||||
"""convert pyout part of a code cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def render_pyerr(self, output): | ||||
"""convert pyerr part of a code cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def _unknown_lines(self, data): | ||||
"""Return list of lines for an unknown cell. | ||||
Parameters | ||||
---------- | ||||
data : str | ||||
The content of the unknown data as a single string. | ||||
""" | ||||
raise NotImplementedError | ||||
# These are the possible format types in an output node | ||||
def render_display_format_text(self, output): | ||||
"""render the text part of an output | ||||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
def render_display_format_html(self, output): | ||||
"""render the html part of an output | ||||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
def render_display_format_latex(self, output): | ||||
"""render the latex part of an output | ||||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
def render_display_format_json(self, output): | ||||
"""render the json part of an output | ||||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
def render_display_format_javascript(self, output): | ||||
"""render the javascript part of an output | ||||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||