##// END OF EJS Templates
remove PDF post processor
remove PDF post processor

File last commit:

r15536:f51640d8
r16418:b55d2748
Show More
extractoutput.py
112 lines | 4.6 KiB | text/x-python | PythonLexer
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 """Module containing a preprocessor that extracts all of the outputs from the
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 notebook file. The extracted outputs are returned in the 'resources' dictionary.
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
#-----------------------------------------------------------------------------
# Copyright (c) 2013, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
MinRK
fix b64-handling of data in Python 3
r11549 import base64
Jonathan Frederic
Missing sys import
r11370 import sys
Jonathan Frederic
Move extracted files into their own subdir
r11631 import os
MinRK
fix file extension when extracting PDF figures
r15385 from mimetypes import guess_extension
MinRK
fix b64-handling of data in Python 3
r11549
Thomas Kluyver
Extract output preprocessor only extracts specified formats
r13921 from IPython.utils.traitlets import Unicode, Set
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 from .base import Preprocessor
Jonathan Frederic
Fixes for Py3.3
r11547 from IPython.utils import py3compat
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Cleanup and refactor, transformers
r10674 #-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 class ExtractOutputPreprocessor(Preprocessor):
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 Extracts all of the outputs from the notebook file. The extracted
outputs are returned in the 'resources' dictionary.
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
s/figure/output
r12014 output_filename_template = Unicode(
MinRK
fix file extension when extracting PDF figures
r15385 "{unique_key}_{cell_index}_{index}{extension}", config=True)
Zbigniew Jędrzejewski-Szmek
Fix problem with missing .index_generator...
r10795
MinRK
fix jpeg/jpg typo in ExtractOutput
r15536 extract_output_types = Set({'png', 'jpeg', 'svg', 'application/pdf'}, config=True)
Zbigniew Jędrzejewski-Szmek
Fix problem with missing .index_generator...
r10795
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 def preprocess_cell(self, cell, resources, cell_index):
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Apply a transformation on each cell,
Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 preprocessors to pass variables into the Jinja engine.
Jonathan Frederic
Added writers and supporting code.
r11367 cell_index : int
Jonathan Frederic
Generator used to create figure indicies
r10772 Index of the cell being processed (see base.py)
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
Added writers and supporting code.
r11367
#Get the unique key from the resource dict if it exists. If it does not
Jonathan Frederic
figure renamed to output
r12010 #exist, use 'output' as the default. Also, get files directory if it
Jonathan Frederic
Move extracted files into their own subdir
r11631 #has been specified
Jonathan Frederic
figure renamed to output
r12010 unique_key = resources.get('unique_key', 'output')
Jonathan Frederic
Move extracted files into their own subdir
r11631 output_files_dir = resources.get('output_files_dir', None)
Jonathan Frederic
Cleanup and refactor, transformers
r10674
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 #Make sure outputs key exists
Jonathan Frederic
Fixed, don't check using in since resources is a default dict.
r12143 if not isinstance(resources['outputs'], dict):
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 resources['outputs'] = {}
Jonathan Frederic
Cleanup and refactor, transformers
r10674
Jonathan Frederic
Added writers and supporting code.
r11367 #Loop through all of the outputs in the cell
for index, out in enumerate(cell.get('outputs', [])):
Thomas Kluyver
Extract output preprocessor only extracts specified formats
r13921 #Get the output in data formats that the template needs extracted
for out_type in self.extract_output_types:
MinRK
fix check for non-attr output keys
r15388 if out_type in out:
Jonathan Frederic
Added writers and supporting code.
r11367 data = out[out_type]
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Added writers and supporting code.
r11367 #Binary files are base64-encoded, SVG is already XML
MinRK
fix jpeg/jpg typo in ExtractOutput
r15536 if out_type in {'png', 'jpeg', 'application/pdf'}:
Jonathan Frederic
Removed invalid comment (from previous commit) and added a new line
r12026
MinRK
fix b64-handling of data in Python 3
r11549 # data is b64-encoded as text (str, unicode)
# decodestring only accepts bytes
data = py3compat.cast_bytes(data)
data = base64.decodestring(data)
Jonathan Frederic
Changes after in person review with @ellisonbg including TODO tags
r11379 elif sys.platform == 'win32':
Jonathan Frederic
Fixed unicode data not written to files properly.
r11371 data = data.replace('\n', '\r\n').encode("UTF-8")
else:
data = data.encode("UTF-8")
Jonathan Frederic
Added writers and supporting code.
r11367
MinRK
fix file extension when extracting PDF figures
r15385 # Build an output name
# filthy hack while we have some mimetype output, and some not
if '/' in out_type:
ext = guess_extension(out_type)
if ext is None:
ext = '.' + out_type.rsplit('/')[-1]
else:
ext = '.' + out_type
filename = self.output_filename_template.format(
Jonathan Frederic
Added writers and supporting code.
r11367 unique_key=unique_key,
cell_index=cell_index,
index=index,
MinRK
fix file extension when extracting PDF figures
r15385 extension=ext)
Jonathan Frederic
Added writers and supporting code.
r11367
#On the cell, make the figure available via
# cell.outputs[i].svg_filename ... etc (svg in example)
# Where
# cell.outputs[i].svg contains the data
Jonathan Frederic
Move extracted files into their own subdir
r11631 if output_files_dir is not None:
filename = os.path.join(output_files_dir, filename)
out[out_type + '_filename'] = filename
Jonathan Frederic
Added writers and supporting code.
r11367
#In the resources, make the figure available via
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 # resources['outputs']['filename'] = data
resources['outputs'][filename] = data
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Added writers and supporting code.
r11367 return cell, resources