extractoutput.py
112 lines
| 4.6 KiB
| text/x-python
|
PythonLexer
Paul Ivanov
|
r12219 | """Module containing a preprocessor that extracts all of the outputs from the | ||
Jonathan Frederic
|
r11634 | notebook file. The extracted outputs are returned in the 'resources' dictionary. | ||
Jonathan Frederic
|
r10674 | """ | ||
#----------------------------------------------------------------------------- | ||||
# Copyright (c) 2013, the IPython Development Team. | ||||
# | ||||
# Distributed under the terms of the Modified BSD License. | ||||
# | ||||
# The full license is in the file COPYING.txt, distributed with this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
MinRK
|
r11549 | import base64 | ||
Jonathan Frederic
|
r11370 | import sys | ||
Jonathan Frederic
|
r11631 | import os | ||
MinRK
|
r15385 | from mimetypes import guess_extension | ||
MinRK
|
r11549 | |||
Thomas Kluyver
|
r13921 | from IPython.utils.traitlets import Unicode, Set | ||
Paul Ivanov
|
r12219 | from .base import Preprocessor | ||
Jonathan Frederic
|
r11547 | from IPython.utils import py3compat | ||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r10674 | #----------------------------------------------------------------------------- | ||
# Classes | ||||
#----------------------------------------------------------------------------- | ||||
Paul Ivanov
|
r12219 | class ExtractOutputPreprocessor(Preprocessor): | ||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r11634 | Extracts all of the outputs from the notebook file. The extracted | ||
outputs are returned in the 'resources' dictionary. | ||||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r12014 | output_filename_template = Unicode( | ||
MinRK
|
r15385 | "{unique_key}_{cell_index}_{index}{extension}", config=True) | ||
Zbigniew Jędrzejewski-Szmek
|
r10795 | |||
MinRK
|
r15536 | extract_output_types = Set({'png', 'jpeg', 'svg', 'application/pdf'}, config=True) | ||
Zbigniew Jędrzejewski-Szmek
|
r10795 | |||
Paul Ivanov
|
r12219 | def preprocess_cell(self, cell, resources, cell_index): | ||
Jonathan Frederic
|
r10674 | """ | ||
Apply a transformation on each cell, | ||||
Parameters | ||||
---------- | ||||
cell : NotebookNode cell | ||||
Notebook cell being processed | ||||
resources : dictionary | ||||
Additional resources used in the conversion process. Allows | ||||
Paul Ivanov
|
r12219 | preprocessors to pass variables into the Jinja engine. | ||
Jonathan Frederic
|
r11367 | cell_index : int | ||
Jonathan Frederic
|
r10772 | Index of the cell being processed (see base.py) | ||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r11367 | |||
#Get the unique key from the resource dict if it exists. If it does not | ||||
Jonathan Frederic
|
r12010 | #exist, use 'output' as the default. Also, get files directory if it | ||
Jonathan Frederic
|
r11631 | #has been specified | ||
Jonathan Frederic
|
r12010 | unique_key = resources.get('unique_key', 'output') | ||
Jonathan Frederic
|
r11631 | output_files_dir = resources.get('output_files_dir', None) | ||
Jonathan Frederic
|
r10674 | |||
Jonathan Frederic
|
r11634 | #Make sure outputs key exists | ||
Jonathan Frederic
|
r12143 | if not isinstance(resources['outputs'], dict): | ||
Jonathan Frederic
|
r11634 | resources['outputs'] = {} | ||
Jonathan Frederic
|
r10674 | |||
Jonathan Frederic
|
r11367 | #Loop through all of the outputs in the cell | ||
for index, out in enumerate(cell.get('outputs', [])): | ||||
Thomas Kluyver
|
r13921 | #Get the output in data formats that the template needs extracted | ||
for out_type in self.extract_output_types: | ||||
MinRK
|
r15388 | if out_type in out: | ||
Jonathan Frederic
|
r11367 | data = out[out_type] | ||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r11367 | #Binary files are base64-encoded, SVG is already XML | ||
MinRK
|
r15536 | if out_type in {'png', 'jpeg', 'application/pdf'}: | ||
Jonathan Frederic
|
r12026 | |||
MinRK
|
r11549 | # data is b64-encoded as text (str, unicode) | ||
# decodestring only accepts bytes | ||||
data = py3compat.cast_bytes(data) | ||||
data = base64.decodestring(data) | ||||
Jonathan Frederic
|
r11379 | elif sys.platform == 'win32': | ||
Jonathan Frederic
|
r11371 | data = data.replace('\n', '\r\n').encode("UTF-8") | ||
else: | ||||
data = data.encode("UTF-8") | ||||
Jonathan Frederic
|
r11367 | |||
MinRK
|
r15385 | # Build an output name | ||
# filthy hack while we have some mimetype output, and some not | ||||
if '/' in out_type: | ||||
ext = guess_extension(out_type) | ||||
if ext is None: | ||||
ext = '.' + out_type.rsplit('/')[-1] | ||||
else: | ||||
ext = '.' + out_type | ||||
filename = self.output_filename_template.format( | ||||
Jonathan Frederic
|
r11367 | unique_key=unique_key, | ||
cell_index=cell_index, | ||||
index=index, | ||||
MinRK
|
r15385 | extension=ext) | ||
Jonathan Frederic
|
r11367 | |||
#On the cell, make the figure available via | ||||
# cell.outputs[i].svg_filename ... etc (svg in example) | ||||
# Where | ||||
# cell.outputs[i].svg contains the data | ||||
Jonathan Frederic
|
r11631 | if output_files_dir is not None: | ||
filename = os.path.join(output_files_dir, filename) | ||||
out[out_type + '_filename'] = filename | ||||
Jonathan Frederic
|
r11367 | |||
#In the resources, make the figure available via | ||||
Jonathan Frederic
|
r11634 | # resources['outputs']['filename'] = data | ||
resources['outputs'][filename] = data | ||||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r11367 | return cell, resources | ||