##// END OF EJS Templates
set default mimetype to octet-stream for binary files...
set default mimetype to octet-stream for binary files mimetype only specified if content=True

File last commit:

r18602:2377691a
r19346:22350094
Show More
extractoutput.py
98 lines | 4.0 KiB | text/x-python | PythonLexer
MinRK
update nbconvert to nbformat 4
r18580 """A preprocessor that extracts all of the outputs from the
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 notebook file. The extracted outputs are returned in the 'resources' dictionary.
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
MinRK
update nbconvert to nbformat 4
r18580 # Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
Jonathan Frederic
Cleanup and refactor, transformers
r10674
MinRK
fix b64-handling of data in Python 3
r11549 import base64
Jonathan Frederic
Missing sys import
r11370 import sys
Jonathan Frederic
Move extracted files into their own subdir
r11631 import os
MinRK
fix file extension when extracting PDF figures
r15385 from mimetypes import guess_extension
MinRK
fix b64-handling of data in Python 3
r11549
Thomas Kluyver
Extract output preprocessor only extracts specified formats
r13921 from IPython.utils.traitlets import Unicode, Set
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 from .base import Preprocessor
Jonathan Frederic
Fixes for Py3.3
r11547 from IPython.utils import py3compat
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Cleanup and refactor, transformers
r10674
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 class ExtractOutputPreprocessor(Preprocessor):
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 Extracts all of the outputs from the notebook file. The extracted
outputs are returned in the 'resources' dictionary.
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
s/figure/output
r12014 output_filename_template = Unicode(
MinRK
fix file extension when extracting PDF figures
r15385 "{unique_key}_{cell_index}_{index}{extension}", config=True)
Zbigniew Jędrzejewski-Szmek
Fix problem with missing .index_generator...
r10795
MinRK
update nbconvert to nbformat 4
r18580 extract_output_types = Set({'image/png', 'image/jpeg', 'image/svg+xml', 'application/pdf'}, config=True)
Zbigniew Jędrzejewski-Szmek
Fix problem with missing .index_generator...
r10795
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 def preprocess_cell(self, cell, resources, cell_index):
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Apply a transformation on each cell,
Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
Paul Ivanov
replace 'transformer' with 'preprocessor'
r12219 preprocessors to pass variables into the Jinja engine.
Jonathan Frederic
Added writers and supporting code.
r11367 cell_index : int
Jonathan Frederic
Generator used to create figure indicies
r10772 Index of the cell being processed (see base.py)
Jonathan Frederic
Cleanup and refactor, transformers
r10674 """
Jonathan Frederic
Added writers and supporting code.
r11367
#Get the unique key from the resource dict if it exists. If it does not
Jonathan Frederic
figure renamed to output
r12010 #exist, use 'output' as the default. Also, get files directory if it
Jonathan Frederic
Move extracted files into their own subdir
r11631 #has been specified
Jonathan Frederic
figure renamed to output
r12010 unique_key = resources.get('unique_key', 'output')
Jonathan Frederic
Move extracted files into their own subdir
r11631 output_files_dir = resources.get('output_files_dir', None)
Jonathan Frederic
Cleanup and refactor, transformers
r10674
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 #Make sure outputs key exists
Jonathan Frederic
Fixed, don't check using in since resources is a default dict.
r12143 if not isinstance(resources['outputs'], dict):
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 resources['outputs'] = {}
Jonathan Frederic
Cleanup and refactor, transformers
r10674
Jonathan Frederic
Added writers and supporting code.
r11367 #Loop through all of the outputs in the cell
for index, out in enumerate(cell.get('outputs', [])):
MinRK
move mime-bundle data to rich output.data...
r18589 if out.output_type not in {'display_data', 'execute_result'}:
continue
Thomas Kluyver
Extract output preprocessor only extracts specified formats
r13921 #Get the output in data formats that the template needs extracted
MinRK
move mime-bundle data to rich output.data...
r18589 for mime_type in self.extract_output_types:
if mime_type in out.data:
data = out.data[mime_type]
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Added writers and supporting code.
r11367 #Binary files are base64-encoded, SVG is already XML
MinRK
move mime-bundle data to rich output.data...
r18589 if mime_type in {'image/png', 'image/jpeg', 'application/pdf'}:
Jonathan Frederic
Removed invalid comment (from previous commit) and added a new line
r12026
MinRK
fix b64-handling of data in Python 3
r11549 # data is b64-encoded as text (str, unicode)
# decodestring only accepts bytes
data = py3compat.cast_bytes(data)
data = base64.decodestring(data)
Jonathan Frederic
Changes after in person review with @ellisonbg including TODO tags
r11379 elif sys.platform == 'win32':
Jonathan Frederic
Fixed unicode data not written to files properly.
r11371 data = data.replace('\n', '\r\n').encode("UTF-8")
else:
data = data.encode("UTF-8")
Jonathan Frederic
Added writers and supporting code.
r11367
MinRK
address review from takluyver...
r18602 ext = guess_extension(mime_type)
if ext is None:
ext = '.' + mime_type.rsplit('/')[-1]
MinRK
fix file extension when extracting PDF figures
r15385
filename = self.output_filename_template.format(
Jonathan Frederic
Added writers and supporting code.
r11367 unique_key=unique_key,
cell_index=cell_index,
index=index,
MinRK
fix file extension when extracting PDF figures
r15385 extension=ext)
Jonathan Frederic
Added writers and supporting code.
r11367
MinRK
address review from takluyver...
r18602 # On the cell, make the figure available via
MinRK
move mime-bundle data to rich output.data...
r18589 # cell.outputs[i].metadata.filenames['mime/type']
MinRK
address review from takluyver...
r18602 # where
# cell.outputs[i].data['mime/type'] contains the data
Jonathan Frederic
Move extracted files into their own subdir
r11631 if output_files_dir is not None:
filename = os.path.join(output_files_dir, filename)
MinRK
move mime-bundle data to rich output.data...
r18589 out.metadata.setdefault('filenames', {})
out.metadata['filenames'][mime_type] = filename
Jonathan Frederic
Added writers and supporting code.
r11367
#In the resources, make the figure available via
Jonathan Frederic
Rename ExtractFigureTransformer to ExtractOutputTransformer
r11634 # resources['outputs']['filename'] = data
resources['outputs'][filename] = data
Jonathan Frederic
Split transformer code
r10437
Jonathan Frederic
Added writers and supporting code.
r11367 return cell, resources