##// END OF EJS Templates
test_file extractor
test_file extractor

File last commit:

r8984:8e16bbc4
r9217:7d835e8e
Show More
base.py
432 lines | 14.2 KiB | text/x-python | PythonLexer
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 """Base classes for the notebook conversion pipeline.
This module defines Converter, from which all objects designed to implement
a conversion of IPython notebooks to some other format should inherit.
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2012, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
pylinting 2
r8627 from __future__ import print_function, absolute_import
Matthias BUSSONNIER
latex working
r8618
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Stdlib imports
Matthias BUSSONNIER
latex working
r8618 import codecs
import io
import logging
import os
import pprint
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 import re
Matthias BUSSONNIER
latex working
r8618 from types import FunctionType
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # IPython imports
Matthias BUSSONNIER
latex working
r8618 from IPython.nbformat import current as nbformat
Matthias BUSSONNIER
working config
r8980 from IPython.config.configurable import Configurable, SingletonConfigurable
Matthias BUSSONNIER
Allow to build a converter without input file
r8982 from IPython.utils.traitlets import (List, Unicode, Type, Bool, Dict, CaselessStrEnum,
Any)
Matthias BUSSONNIER
latex working
r8618
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Our own imports
Anthony Scopatz
convters sub-package use relative imports
r8933 from .utils import remove_fake_files_url
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789
#-----------------------------------------------------------------------------
# Local utilities
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
latex working
r8618
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 def clean_filename(filename):
David Warde-Farley
Standard-compliant docstring for clean_filename.
r8755 """
Remove non-alphanumeric characters from filenames.
Parameters
----------
filename : str
The filename to be sanitized.
Returns
-------
clean : str
A sanitized filename that contains only alphanumeric
characters and underscores.
"""
Rick Lupton
LaTeX converter: remove problematic characters from filenames
r8751 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
return filename
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789
Matthias BUSSONNIER
latex working
r8618 #-----------------------------------------------------------------------------
# Class declarations
#-----------------------------------------------------------------------------
class ConversionException(Exception):
pass
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 class DocStringInheritor(type):
"""
This metaclass will walk the list of bases until the desired
superclass method is found AND if that method has a docstring and only
THEN does it attach the superdocstring to the derived class method.
Please use carefully, I just did the metaclass thing by following
Michael Foord's Metaclass tutorial
(http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
have missed a step or two.
source:
http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
by Paul McGuire
"""
def __new__(meta, classname, bases, classDict):
newClassDict = {}
for attributeName, attribute in classDict.items():
if type(attribute) == FunctionType:
# look through bases for matching function by name
for baseclass in bases:
if hasattr(baseclass, attributeName):
Matthias BUSSONNIER
pylinting 2
r8627 basefn = getattr(baseclass, attributeName)
Matthias BUSSONNIER
latex working
r8618 if basefn.__doc__:
attribute.__doc__ = basefn.__doc__
break
newClassDict[attributeName] = attribute
return type.__new__(meta, classname, bases, newClassDict)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
working config
r8980 class Converter(Configurable):
#__metaclass__ = DocStringInheritor
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
# Class-level attributes determining the behaviour of the class but
# probably not varying from instance to instance.
#-------------------------------------------------------------------------
Matthias BUSSONNIER
latex working
r8618 default_encoding = 'utf-8'
extension = str()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 blank_symbol = " "
# Which display data format is best? Subclasses can override if
# they have specific requirements.
display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
#-------------------------------------------------------------------------
# Instance-level attributes that are set in the constructor for this
# class.
#-------------------------------------------------------------------------
Matthias BUSSONNIER
Allow to build a converter without input file
r8982 infile = Any()
Matthias BUSSONNIER
working config
r8980
highlight_source = Bool(True,
config=True,
help="Enable syntax highlighting for code blocks.")
Matthias BUSSONNIER
Allow to build a converter without input file
r8982 preamble = Unicode( "" ,
Matthias BUSSONNIER
working config
r8980 config=True,
help="Path to a user-specified preamble file")
Matthias BUSSONNIER
Allow to build a converter without input file
r8982
Matthias BUSSONNIER
add extract_figures configurable....
r8981 extract_figures = Bool( True,
config=True,
help="""extract base-64 encoded figures of the notebook into separate files,
replace by link to corresponding file in source.""")
Matthias BUSSONNIER
working config
r8980
infile_dir = Unicode()
infile_root = Unicode()
clean_name = Unicode()
files_dir = Unicode()
outbase = Unicode()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
David Warde-Farley
More documentation.
r8811 # Instance-level attributes that are set by other methods in the base
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 # class.
#-------------------------------------------------------------------------
figures_counter = 0
Matthias BUSSONNIER
working config
r8980 output = Unicode()
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 #-------------------------------------------------------------------------
# Instance-level attributes that are not actually mentioned further
# in this class. TODO: Could they be usefully moved to a subclass?
#-------------------------------------------------------------------------
Matthias BUSSONNIER
working config
r8980 with_preamble = Bool(True,config=True)
Matthias BUSSONNIER
latex working
r8618 user_preamble = None
raw_as_verbatim = False
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
working config
r8980
Matthias BUSSONNIER
missing comma
r8984 def __init__(self, infile=None, config=None, exclude=[], **kw):
Matthias BUSSONNIER
working config
r8980 super(Converter,self).__init__(config=config)
#DocStringInheritor.__init__(self=config)
David Warde-Farley
Clean up definition/constructor of Converter.
r8810 # N.B. Initialized in the same order as defined above. Please try to
# keep in this way for readability's sake.
Matthias BUSSONNIER
restore --exclude flag
r8912 self.exclude_cells = exclude
Matthias BUSSONNIER
latex working
r8618 self.infile = infile
Matthias BUSSONNIER
Allow to build a converter without input file
r8982 if infile:
Matthias BUSSONNIER
fix tests
r8983 self.infile = infile
Matthias BUSSONNIER
Allow to build a converter without input file
r8982 self.infile_dir, infile_root = os.path.split(infile)
self.infile_root = os.path.splitext(infile_root)[0]
self.clean_name = clean_filename(self.infile_root)
# Handle the creation of a directory for ancillary files, for
# formats that need one.
files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
if not os.path.isdir(files_dir):
os.mkdir(files_dir)
self.files_dir = files_dir
self.outbase = os.path.join(self.infile_dir, self.infile_root)
Matthias BUSSONNIER
latex working
r8618
def __del__(self):
Matthias BUSSONNIER
cleaner file deletion...
r8641 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
Matthias BUSSONNIER
latex working
r8618 os.rmdir(self.files_dir)
Maximilian Albert
Replace ad-hoc/broken code with safe method to extract the prompt_number of a cell....
r8746 def _get_prompt_number(self, cell):
return cell.prompt_number if hasattr(cell, 'prompt_number') \
else self.blank_symbol
Matthias BUSSONNIER
latex working
r8618 def dispatch(self, cell_type):
"""return cell_type dependent render method, for example render_code
"""
return getattr(self, 'render_' + cell_type, self.render_unknown)
def dispatch_display_format(self, format):
"""
David Warde-Farley
PEP8-ify several files
r8747 return output_type dependent render method, for example
render_output_text
"""
return getattr(self, 'render_display_format_' + format,
self.render_unknown_display)
Matthias BUSSONNIER
latex working
r8618
def convert(self, cell_separator='\n'):
"""
Generic method to converts notebook to a string representation.
This is accomplished by dispatching on the cell_type, so subclasses of
Convereter class do not need to re-implement this method, but just
need implementation for the methods that will be dispatched.
Parameters
----------
cell_separator : string
Character or string to join cells with. Default is "\n"
Returns
-------
out : string
"""
lines = []
lines.extend(self.optional_header())
lines.extend(self.main_body(cell_separator))
lines.extend(self.optional_footer())
return u'\n'.join(lines)
def main_body(self, cell_separator='\n'):
converted_cells = []
for worksheet in self.nb.worksheets:
for cell in worksheet.cells:
#print(cell.cell_type) # dbg
conv_fn = self.dispatch(cell.cell_type)
if cell.cell_type in ('markdown', 'raw'):
remove_fake_files_url(cell)
converted_cells.append('\n'.join(conv_fn(cell)))
cell_lines = cell_separator.join(converted_cells).split('\n')
return cell_lines
def render(self):
"read, convert, and save self.infile"
if not hasattr(self, 'nb'):
self.read()
self.output = self.convert()
Matthias BUSSONNIER
more test fixed
r8623 assert(type(self.output) == unicode)
Matthias BUSSONNIER
latex working
r8618 return self.save()
def read(self):
"read and parse notebook into NotebookNode called self.nb"
with open(self.infile) as f:
self.nb = nbformat.read(f, 'json')
def save(self, outfile=None, encoding=None):
"read and parse notebook into self.nb"
if outfile is None:
outfile = self.outbase + '.' + self.extension
if encoding is None:
encoding = self.default_encoding
with io.open(outfile, 'w', encoding=encoding) as f:
f.write(self.output)
return os.path.abspath(outfile)
def optional_header(self):
"""
Optional header to insert at the top of the converted notebook
Returns a list
"""
return []
def optional_footer(self):
"""
Optional footer to insert at the end of the converted notebook
Returns a list
"""
return []
def _new_figure(self, data, fmt):
"""Create a new figure file in the given format.
Returns a path relative to the input file.
"""
Rick Lupton
Store clean_filename.
r8754 figname = '%s_fig_%02i.%s' % (self.clean_name,
Matthias BUSSONNIER
latex working
r8618 self.figures_counter, fmt)
self.figures_counter += 1
fullname = os.path.join(self.files_dir, figname)
# Binary files are base64-encoded, SVG is already XML
if fmt in ('png', 'jpg', 'pdf'):
data = data.decode('base64')
fopen = lambda fname: open(fname, 'wb')
else:
David Warde-Farley
PEP8-ify several files
r8747 fopen = lambda fname: codecs.open(fname, 'wb',
self.default_encoding)
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
latex working
r8618 with fopen(fullname) as f:
f.write(data)
David Warde-Farley
PEP8
r8718
Matthias BUSSONNIER
latex working
r8618 return fullname
def render_heading(self, cell):
"""convert a heading cell
Returns list."""
raise NotImplementedError
def render_code(self, cell):
"""Convert a code cell
Returns list."""
raise NotImplementedError
def render_markdown(self, cell):
"""convert a markdown cell
Returns list."""
raise NotImplementedError
def _img_lines(self, img_file):
"""Return list of lines to include an image file."""
# Note: subclasses may choose to implement format-specific _FMT_lines
# methods if they so choose (FMT in {png, svg, jpg, pdf}).
raise NotImplementedError
def render_display_data(self, output):
"""convert display data from the output of a code cell
Returns list.
"""
David Warde-Farley
Clearer/simpler code suggested by @fperez....
r8756 for fmt in self.display_data_priority:
if fmt in output:
break
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 else:
David Warde-Farley
Clearer/simpler code suggested by @fperez....
r8756 for fmt in output:
if fmt != 'output_type':
break
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 else:
raise RuntimeError('no display data')
# Is it an image?
Matthias BUSSONNIER
add extract_figures configurable....
r8981 if fmt in ['png', 'svg', 'jpg', 'pdf'] and self.extract_figures:
Rick Lupton
Choose the best format available for display_data and only show that one
r8752 img_file = self._new_figure(output[fmt], fmt)
# Subclasses can have format-specific render functions (e.g.,
# latex has to auto-convert all SVG to PDF first).
lines_fun = getattr(self, '_%s_lines' % fmt, None)
if not lines_fun:
lines_fun = self._img_lines
lines = lines_fun(img_file)
else:
lines_fun = self.dispatch_display_format(fmt)
lines = lines_fun(output)
Matthias BUSSONNIER
latex working
r8618
return lines
def render_raw(self, cell):
"""convert a cell with raw text
Returns list."""
raise NotImplementedError
def render_unknown(self, cell):
"""Render cells of unkown type
Returns list."""
data = pprint.pformat(cell)
logging.warning('Unknown cell: %s' % cell.cell_type)
return self._unknown_lines(data)
def render_unknown_display(self, output, type):
"""Render cells of unkown type
Returns list."""
data = pprint.pformat(output)
logging.warning('Unknown output: %s' % output.output_type)
return self._unknown_lines(data)
def render_stream(self, output):
"""render the stream part of an output
Returns list.
Identical to render_display_format_text
"""
return self.render_display_format_text(output)
def render_pyout(self, output):
"""convert pyout part of a code cell
Returns list."""
raise NotImplementedError
def render_pyerr(self, output):
"""convert pyerr part of a code cell
Returns list."""
raise NotImplementedError
def _unknown_lines(self, data):
"""Return list of lines for an unknown cell.
Parameters
----------
data : str
The content of the unknown data as a single string.
"""
raise NotImplementedError
# These are the possible format types in an output node
def render_display_format_text(self, output):
"""render the text part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_html(self, output):
"""render the html part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_latex(self, output):
"""render the latex part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_json(self, output):
"""render the json part of an output
Returns list.
"""
raise NotImplementedError
def render_display_format_javascript(self, output):
"""render the javascript part of an output
Returns list.
"""
raise NotImplementedError