upstream/ipython Commit - r8982:0d6ebd24

Allow to build a converter without input file

Matthias BUSSONNIER -

r8982:0d6ebd24

parent child

converters/base.py

0 +16 -13

              """Base classes for the notebook conversion pipeline.
              This module defines Converter, from which all objects designed to implement
              a conversion of IPython notebooks to some other format should inherit.
              """
              #-----------------------------------------------------------------------------
              # Copyright (c) 2012, the IPython Development Team.
              #
              # Distributed under the terms of the Modified BSD License.
              #
              # The full license is in the file COPYING.txt, distributed with this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              from __future__ import print_function, absolute_import
              # Stdlib imports
              import codecs
              import io
              import logging
              import os
              import pprint
              import re
              from types import FunctionType
              # IPython imports
              from IPython.nbformat import current as nbformat
              from IPython.config.configurable import Configurable, SingletonConfigurable
-             from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum
+             from IPython.utils.traitlets import (List, Unicode, Type, Bool, Dict, CaselessStrEnum,
+                                                 Any)
              # Our own imports
              from .utils import remove_fake_files_url
              #-----------------------------------------------------------------------------
              # Local utilities
              #-----------------------------------------------------------------------------
              def clean_filename(filename):
                  """
                  Remove non-alphanumeric characters from filenames.
                  Parameters
                  ----------
                  filename : str
                      The filename to be sanitized.
                  Returns
                  -------
                  clean : str
                      A sanitized filename that contains only alphanumeric
                      characters and underscores.
                  """
                  filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
                  return filename
              #-----------------------------------------------------------------------------
              # Class declarations
              #-----------------------------------------------------------------------------
              class ConversionException(Exception):
                  pass
              class DocStringInheritor(type):
                  """
                  This metaclass will walk the list of bases until the desired
                  superclass method is found AND if that method has a docstring and only
                  THEN does it attach the superdocstring to the derived class method.
                  Please use carefully, I just did the metaclass thing by following
                  Michael Foord's Metaclass tutorial
                  (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
                  have missed a step or two.
                  source:
                  http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
                  by Paul McGuire
                  """
                  def __new__(meta, classname, bases, classDict):
                      newClassDict = {}
                      for attributeName, attribute in classDict.items():
                          if type(attribute) == FunctionType:
                              # look through bases for matching function by name
                              for baseclass in bases:
                                  if hasattr(baseclass, attributeName):
                                      basefn = getattr(baseclass, attributeName)
                                      if basefn.__doc__:
                                          attribute.__doc__ = basefn.__doc__
                                          break
                          newClassDict[attributeName] = attribute
                      return type.__new__(meta, classname, bases, newClassDict)
              class Converter(Configurable):
                  #__metaclass__ = DocStringInheritor
                  #-------------------------------------------------------------------------
                  # Class-level attributes determining the behaviour of the class but
                  # probably not varying from instance to instance.
                  #-------------------------------------------------------------------------
                  default_encoding = 'utf-8'
                  extension = str()
                  blank_symbol = " "
                  # Which display data format is best? Subclasses can override if
                  # they have specific requirements.
                  display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
                  #-------------------------------------------------------------------------
                  # Instance-level attributes that are set in the constructor for this
                  # class.
                  #-------------------------------------------------------------------------
-                 infile = Unicode()
+                 infile = Any()
                  highlight_source = Bool(True,
                                   config=True,
                                   help="Enable syntax highlighting for code blocks.")
-                 preamble = Unicode("" ,
+                 preamble = Unicode( "" ,
                                      config=True,
                                      help="Path to a user-specified preamble file")
                  extract_figures = Bool( True,
                                          config=True,
                                          help="""extract base-64 encoded figures of the notebook into separate files,
                                               replace by link to corresponding file in source.""")
                  infile_dir = Unicode()
                  infile_root = Unicode()
                  clean_name = Unicode()
                  files_dir = Unicode()
                  outbase = Unicode()
                  #-------------------------------------------------------------------------
                  # Instance-level attributes that are set by other methods in the base
                  # class.
                  #-------------------------------------------------------------------------
                  figures_counter = 0
                  output = Unicode()
                  #-------------------------------------------------------------------------
                  # Instance-level attributes that are not actually mentioned further
                  # in this class. TODO: Could they be usefully moved to a subclass?
                  #-------------------------------------------------------------------------
                  with_preamble = Bool(True,config=True)
                  user_preamble = None
                  raw_as_verbatim = False
                  def __init__(self, infile=None, config=None, exclude=[] **kw):
                      super(Converter,self).__init__(config=config)
                      #DocStringInheritor.__init__(self=config)
                      # N.B. Initialized in the same order as defined above. Please try to
                      # keep in this way for readability's sake.
                      self.exclude_cells = exclude
                      self.infile = infile
-                     self.infile_dir, infile_root = os.path.split(infile)
-                     self.infile_root = os.path.splitext(infile_root)[0]
-                     self.clean_name = clean_filename(self.infile_root)
-                     # Handle the creation of a directory for ancillary files, for
-                     # formats that need one.
-                     files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
-                     if not os.path.isdir(files_dir):
-                         os.mkdir(files_dir)
-                     self.files_dir = files_dir
-                     self.outbase = os.path.join(self.infile_dir, self.infile_root)
+                     if infile:
+                         self.infile_dir, infile_root = os.path.split(infile)
+                         self.infile_root = os.path.splitext(infile_root)[0]
+                         self.clean_name = clean_filename(self.infile_root)
+                         # Handle the creation of a directory for ancillary files, for
+                         # formats that need one.
+                         files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
+                         if not os.path.isdir(files_dir):
+                             os.mkdir(files_dir)
+                         self.files_dir = files_dir
+                         self.outbase = os.path.join(self.infile_dir, self.infile_root)
                  def __del__(self):
                      if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
                          os.rmdir(self.files_dir)
                  def _get_prompt_number(self, cell):
                      return cell.prompt_number if hasattr(cell, 'prompt_number') \
                          else self.blank_symbol
                  def dispatch(self, cell_type):
                      """return cell_type dependent render method,  for example render_code
                      """
                      return getattr(self, 'render_' + cell_type, self.render_unknown)
                  def dispatch_display_format(self, format):
                      """
                      return output_type dependent render method,  for example
                      render_output_text
                      """
                      return getattr(self, 'render_display_format_' + format,
                                     self.render_unknown_display)
                  def convert(self, cell_separator='\n'):
                      """
                      Generic method to converts notebook to a string representation.
                      This is accomplished by dispatching on the cell_type, so subclasses of
                      Convereter class do not need to re-implement this method, but just
                      need implementation for the methods that will be dispatched.
                      Parameters
                      ----------
                      cell_separator : string
                        Character or string to join cells with. Default is "\n"
                      Returns
                      -------
                      out : string
                      """
                      lines = []
                      lines.extend(self.optional_header())
                      lines.extend(self.main_body(cell_separator))
                      lines.extend(self.optional_footer())
                      return u'\n'.join(lines)
                  def main_body(self, cell_separator='\n'):
                      converted_cells = []
                      for worksheet in self.nb.worksheets:
                          for cell in worksheet.cells:
                              #print(cell.cell_type)  # dbg
                              conv_fn = self.dispatch(cell.cell_type)
                              if cell.cell_type in ('markdown', 'raw'):
                                  remove_fake_files_url(cell)
                              converted_cells.append('\n'.join(conv_fn(cell)))
                      cell_lines = cell_separator.join(converted_cells).split('\n')
                      return cell_lines
                  def render(self):
                      "read, convert, and save self.infile"
                      if not hasattr(self, 'nb'):
                          self.read()
                      self.output = self.convert()
                      assert(type(self.output) == unicode)
                      return self.save()
                  def read(self):
                      "read and parse notebook into NotebookNode called self.nb"
                      with open(self.infile) as f:
                          self.nb = nbformat.read(f, 'json')
                  def save(self, outfile=None, encoding=None):
                      "read and parse notebook into self.nb"
                      if outfile is None:
                          outfile = self.outbase + '.' + self.extension
                      if encoding is None:
                          encoding = self.default_encoding
                      with io.open(outfile, 'w', encoding=encoding) as f:
                          f.write(self.output)
                      return os.path.abspath(outfile)
                  def optional_header(self):
                      """
                      Optional header to insert at the top of the converted notebook
                      Returns a list
                      """
                      return []
                  def optional_footer(self):
                      """
                      Optional footer to insert at the end of the converted notebook
                      Returns a list
                      """
                      return []
                  def _new_figure(self, data, fmt):
                      """Create a new figure file in the given format.
                      Returns a path relative to the input file.
                      """
                      figname = '%s_fig_%02i.%s' % (self.clean_name,
                                                    self.figures_counter, fmt)
                      self.figures_counter += 1
                      fullname = os.path.join(self.files_dir, figname)
                      # Binary files are base64-encoded, SVG is already XML
                      if fmt in ('png', 'jpg', 'pdf'):
                          data = data.decode('base64')
                          fopen = lambda fname: open(fname, 'wb')
                      else:
                          fopen = lambda fname: codecs.open(fname, 'wb',
                                                            self.default_encoding)
                      with fopen(fullname) as f:
                          f.write(data)
                      return fullname
                  def render_heading(self, cell):
                      """convert a heading cell
                      Returns list."""
                      raise NotImplementedError
                  def render_code(self, cell):
                      """Convert a code cell
                      Returns list."""
                      raise NotImplementedError
                  def render_markdown(self, cell):
                      """convert a markdown cell
                      Returns list."""
                      raise NotImplementedError
                  def _img_lines(self, img_file):
                      """Return list of lines to include an image file."""
                      # Note: subclasses may choose to implement format-specific _FMT_lines
                      # methods if they so choose (FMT in {png, svg, jpg, pdf}).
                      raise NotImplementedError
                  def render_display_data(self, output):
                      """convert display data from the output of a code cell
                      Returns list.
                      """
                      for fmt in self.display_data_priority:
                          if fmt in output:
                              break
                      else:
                          for fmt in output:
                              if fmt != 'output_type':
                                  break
                          else:
                              raise RuntimeError('no display data')
                      # Is it an image?
                      if fmt in ['png', 'svg', 'jpg', 'pdf'] and self.extract_figures:
                          print('I will extract this', fmt)
                          img_file = self._new_figure(output[fmt], fmt)
                          # Subclasses can have format-specific render functions (e.g.,
                          # latex has to auto-convert all SVG to PDF first).
                          lines_fun = getattr(self, '_%s_lines' % fmt, None)
                          if not lines_fun:
                              lines_fun = self._img_lines
                          lines = lines_fun(img_file)
                      else:
                          print('I will NOT extract this', fmt)
                          lines_fun = self.dispatch_display_format(fmt)
                          lines = lines_fun(output)
                      return lines
                  def render_raw(self, cell):
                      """convert a cell with raw text
                      Returns list."""
                      raise NotImplementedError
                  def render_unknown(self, cell):
                      """Render cells of unkown type
                      Returns list."""
                      data = pprint.pformat(cell)
                      logging.warning('Unknown cell: %s' % cell.cell_type)
                      return self._unknown_lines(data)
                  def render_unknown_display(self, output, type):
                      """Render cells of unkown type
                      Returns list."""
                      data = pprint.pformat(output)
                      logging.warning('Unknown output: %s' % output.output_type)
                      return self._unknown_lines(data)
                  def render_stream(self, output):
                      """render the stream part of an output
                      Returns list.
                      Identical to render_display_format_text
                      """
                      return self.render_display_format_text(output)
                  def render_pyout(self, output):
                      """convert pyout part of a code cell
                      Returns list."""
                      raise NotImplementedError
                  def render_pyerr(self, output):
                      """convert pyerr part of a code cell
                      Returns list."""
                      raise NotImplementedError
                  def _unknown_lines(self, data):
                      """Return list of lines for an unknown cell.
                      Parameters
                      ----------
                      data : str
                        The content of the unknown data as a single string.
                      """
                      raise NotImplementedError
                  # These are the possible format types in an output node
                  def render_display_format_text(self, output):
                      """render the text part of an output
                      Returns list.
                      """
                      raise NotImplementedError
                  def render_display_format_html(self, output):
                      """render the html part of an output
                      Returns list.
                      """
                      raise NotImplementedError
                  def render_display_format_latex(self, output):
                      """render the latex part of an output
                      Returns list.
                      """
                      raise NotImplementedError
                  def render_display_format_json(self, output):
                      """render the json part of an output
                      Returns list.
                      """
                      raise NotImplementedError
                  def render_display_format_javascript(self, output):
                      """render the javascript part of an output
                      Returns list.
                      """
                      raise NotImplementedError

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages