##// END OF EJS Templates
Moved and renamed jinja template converter.
Moved and renamed jinja template converter.

File last commit:

r10386:6416b524
r10429:16ebf1a6
Show More
base.py
425 lines | 13.8 KiB | text/x-python | PythonLexer
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 """Base classes for the notebook conversion pipeline.
This module defines Converter, from which all objects designed to implement
a conversion of IPython notebooks to some other format should inherit.
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2012, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
pylinting 2
r8627 from __future__ import print_function, absolute_import
Matthias BUSSONNIER
latex working
r8618
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Stdlib imports
Matthias BUSSONNIER
latex working
r8618 import codecs
import io
import logging
import os
import pprint
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 import re
Matthias BUSSONNIER
latex working
r8618 from types import FunctionType
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # IPython imports
Matthias BUSSONNIER
latex working
r8618 from IPython.nbformat import current as nbformat
Matthias BUSSONNIER
working config
r9569 from IPython.config.configurable import Configurable, SingletonConfigurable
Matthias BUSSONNIER
working config
r9765 from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum
Matthias BUSSONNIER
latex working
r8618
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Our own imports
Anthony Scopatz
convters sub-package use relative imports
r8933 from .utils import remove_fake_files_url
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789
#-----------------------------------------------------------------------------
# Local utilities
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
latex working
r8618
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 def clean_filename(filename):
David Warde-Farley
Standard-compliant docstring for clean_filename.
r8755 """
Remove non-alphanumeric characters from filenames.
Parameters
----------
filename : str
The filename to be sanitized.
Returns
-------
clean : str
A sanitized filename that contains only alphanumeric
characters and underscores.
"""
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
return filename
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789
Matthias BUSSONNIER
latex working
r8618 #-----------------------------------------------------------------------------
# Class declarations
#-----------------------------------------------------------------------------
class ConversionException(Exception):
pass
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 class DocStringInheritor(type):
"""
This metaclass will walk the list of bases until the desired
superclass method is found AND if that method has a docstring and only
THEN does it attach the superdocstring to the derived class method.
Please use carefully, I just did the metaclass thing by following
Michael Foord's Metaclass tutorial
(http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
have missed a step or two.
source:
http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
by Paul McGuire
"""
def __new__(meta, classname, bases, classDict):
newClassDict = {}
for attributeName, attribute in classDict.items():
if type(attribute) == FunctionType:
# look through bases for matching function by name
for baseclass in bases:
if hasattr(baseclass, attributeName):
Matthias BUSSONNIER
pylinting 2
r8627 basefn = getattr(baseclass, attributeName)
Matthias BUSSONNIER
latex working
r8618 if basefn.__doc__:
attribute.__doc__ = basefn.__doc__
break
newClassDict[attributeName] = attribute
return type.__new__(meta, classname, bases, newClassDict)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
working config
r9765
Matthias BUSSONNIER
working config
r9569 class Converter(Configurable):
#__metaclass__ = DocStringInheritor
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
# Class-level attributes determining the behaviour of the class but
# probably not varying from instance to instance.
#-------------------------------------------------------------------------
Matthias BUSSONNIER
latex working
r8618 default_encoding = 'utf-8'
extension = str()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 blank_symbol = " "
# Which display data format is best? Subclasses can override if
# they have specific requirements.
display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
#-------------------------------------------------------------------------
# Instance-level attributes that are set in the constructor for this
# class.
#-------------------------------------------------------------------------
Matthias BUSSONNIER
working config
r9765 infile = Unicode()
Matthias BUSSONNIER
working config
r9569
highlight_source = Bool(True,
config=True,
help="Enable syntax highlighting for code blocks.")
Matthias BUSSONNIER
working config
r9765 preamble = Unicode("" ,
Matthias BUSSONNIER
working config
r9569 config=True,
help="Path to a user-specified preamble file")
Matthias BUSSONNIER
Allow to build a converter without input file
r9571
Matthias BUSSONNIER
working config
r9569 infile_dir = Unicode()
infile_root = Unicode()
clean_name = Unicode()
files_dir = Unicode()
outbase = Unicode()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
David Warde-Farley
More documentation.
r8811 # Instance-level attributes that are set by other methods in the base
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 # class.
#-------------------------------------------------------------------------
figures_counter = 0
Matthias BUSSONNIER
working config
r9569 output = Unicode()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
# Instance-level attributes that are not actually mentioned further
# in this class. TODO: Could they be usefully moved to a subclass?
#-------------------------------------------------------------------------
Matthias BUSSONNIER
working config
r9569 with_preamble = Bool(True,config=True)
Matthias BUSSONNIER
latex working
r8618 user_preamble = None
raw_as_verbatim = False
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
working config
r9569
Matthias BUSSONNIER
fix failing deprecated test
r9881 def __init__(self, infile='', config=None, exclude=[], **kw):
Matthias BUSSONNIER
working config
r9569 super(Converter,self).__init__(config=config)
#DocStringInheritor.__init__(self=config)
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 # N.B. Initialized in the same order as defined above. Please try to
# keep in this way for readability's sake.
Matthias BUSSONNIER
restore --exclude flag
r8912 self.exclude_cells = exclude
Matthias BUSSONNIER
latex working
r8618 self.infile = infile
Matthias BUSSONNIER
working config
r9765 self.infile_dir, infile_root = os.path.split(infile)
self.infile_root = os.path.splitext(infile_root)[0]
self.clean_name = clean_filename(self.infile_root)
# Handle the creation of a directory for ancillary files, for
# formats that need one.
files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
if not os.path.isdir(files_dir):
os.mkdir(files_dir)
self.files_dir = files_dir
self.outbase = os.path.join(self.infile_dir, self.infile_root)
Matthias BUSSONNIER
latex working
r8618
def __del__(self):
Matthias BUSSONNIER
cleaner file deletion...
r8641 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
Matthias BUSSONNIER
latex working
r8618 os.rmdir(self.files_dir)
Maximilian Albert
Replace ad-hoc/broken code with safe method to extract the prompt_number of a cell....
r8746 def _get_prompt_number(self, cell):
return cell.prompt_number if hasattr(cell, 'prompt_number') \
else self.blank_symbol
Matthias BUSSONNIER
latex working
r8618 def dispatch(self, cell_type):
"""return cell_type dependent render method, for example render_code
"""
return getattr(self, 'render_' + cell_type, self.render_unknown)
def dispatch_display_format(self, format):
"""
David Warde-Farley
PEP8-ify several files
r8747 return output_type dependent render method, for example
render_output_text
"""
return getattr(self, 'render_display_format_' + format,
self.render_unknown_display)
Matthias BUSSONNIER
latex working
r8618
def convert(self, cell_separator='\n'):
"""
Generic method to converts notebook to a string representation.
This is accomplished by dispatching on the cell_type, so subclasses of
Convereter class do not need to re-implement this method, but just
need implementation for the methods that will be dispatched.
Parameters
----------
cell_separator : string
Character or string to join cells with. Default is "\n"
Returns
-------
out : string
"""
lines = []
lines.extend(self.optional_header())
lines.extend(self.main_body(cell_separator))
lines.extend(self.optional_footer())
return u'\n'.join(lines)
def main_body(self, cell_separator='\n'):
converted_cells = []
for worksheet in self.nb.worksheets:
for cell in worksheet.cells:
#print(cell.cell_type) # dbg
conv_fn = self.dispatch(cell.cell_type)
if cell.cell_type in ('markdown', 'raw'):
remove_fake_files_url(cell)
converted_cells.append('\n'.join(conv_fn(cell)))
cell_lines = cell_separator.join(converted_cells).split('\n')
return cell_lines
def render(self):
"read, convert, and save self.infile"
if not hasattr(self, 'nb'):
self.read()
self.output = self.convert()
Matthias BUSSONNIER
more test fixed
r8623 assert(type(self.output) == unicode)
Matthias BUSSONNIER
latex working
r8618 return self.save()
def read(self):
"read and parse notebook into NotebookNode called self.nb"
with open(self.infile) as f:
self.nb = nbformat.read(f, 'json')
def save(self, outfile=None, encoding=None):
"read and parse notebook into self.nb"
if outfile is None:
outfile = self.outbase + '.' + self.extension
if encoding is None:
encoding = self.default_encoding
with io.open(outfile, 'w', encoding=encoding) as f:
f.write(self.output)
return os.path.abspath(outfile)
def optional_header(self):
"""
Optional header to insert at the top of the converted notebook
Returns a list
"""
return []
def optional_footer(self):
"""
Optional footer to insert at the end of the converted notebook
Returns a list
"""
return []
def _new_figure(self, data, fmt):
"""Create a new figure file in the given format.
Returns a path relative to the input file.
"""
Rick Lupton
Store clean_filename.
r8754 figname = '%s_fig_%02i.%s' % (self.clean_name,
Matthias BUSSONNIER
latex working
r8618 self.figures_counter, fmt)
self.figures_counter += 1
fullname = os.path.join(self.files_dir, figname)
# Binary files are base64-encoded, SVG is already XML
if fmt in ('png', 'jpg', 'pdf'):
data = data.decode('base64')
fopen = lambda fname: open(fname, 'wb')
else:
David Warde-Farley
PEP8-ify several files
r8747 fopen = lambda fname: codecs.open(fname, 'wb',
self.default_encoding)
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
latex working
r8618 with fopen(fullname) as f:
f.write(data)
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
latex working
r8618 return fullname
def render_heading(self, cell):
"""convert a heading cell
Returns list."""
raise NotImplementedError
def render_code(self, cell):
"""Convert a code cell
Returns list."""
raise NotImplementedError
def render_markdown(self, cell):
"""convert a markdown cell
Returns list."""
raise NotImplementedError
def _img_lines(self, img_file):
"""Return list of lines to include an image file."""
# Note: subclasses may choose to implement format-specific _FMT_lines
# methods if they so choose (FMT in {png, svg, jpg, pdf}).
raise NotImplementedError
def render_display_data(self, output):
"""convert display data from the output of a code cell
Returns list.
"""
David Warde-Farley
Clearer/simpler code suggested by @fperez....
r8756 for fmt in self.display_data_priority:
if fmt in output:
break
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 else:
David Warde-Farley
Clearer/simpler code suggested by @fperez....
r8756 for fmt in output:
if fmt != 'output_type':
break
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 else:
raise RuntimeError('no display data')
# Is it an image?
Matthias BUSSONNIER
working config
r9765 if fmt in ['png', 'svg', 'jpg', 'pdf']:
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 img_file = self._new_figure(output[fmt], fmt)
# Subclasses can have format-specific render functions (e.g.,
# latex has to auto-convert all SVG to PDF first).
lines_fun = getattr(self, '_%s_lines' % fmt, None)
if not lines_fun:
lines_fun = self._img_lines
lines = lines_fun(img_file)
else:
lines_fun = self.dispatch_display_format(fmt)
lines = lines_fun(output)
Matthias BUSSONNIER
latex working
r8618
return lines
def render_raw(self, cell):
"""convert a cell with raw text
Returns list."""
raise NotImplementedError
def render_unknown(self, cell):
"""Render cells of unkown type
Returns list."""
data = pprint.pformat(cell)
logging.warning('Unknown cell: %s' % cell.cell_type)
return self._unknown_lines(data)
def render_unknown_display(self, output, type):
"""Render cells of unkown type
Returns list."""
data = pprint.pformat(output)
logging.warning('Unknown output: %s' % output.output_type)
return self._unknown_lines(data)
def render_stream(self, output):
"""render the stream part of an output
Returns list.
Identical to render_display_format_text
"""
return self.render_display_format_text(output)
def render_pyout(self, output):
"""convert pyout part of a code cell
Returns list."""
raise NotImplementedError
def render_pyerr(self, output):
"""convert pyerr part of a code cell
Returns list."""
raise NotImplementedError
def _unknown_lines(self, data):
"""Return list of lines for an unknown cell.
Parameters
----------
data : str
The content of the unknown data as a single string.
"""
raise NotImplementedError
# These are the possible format types in an output node
def render_display_format_text(self, output):
"""render the text part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_html(self, output):
"""render the html part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_latex(self, output):
"""render the latex part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_json(self, output):
"""render the json part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_javascript(self, output):
"""render the javascript part of an output
Returns list.
"""
raise NotImplementedError