extractoutput.py
98 lines
| 4.0 KiB
| text/x-python
|
PythonLexer
MinRK
|
r18580 | """A preprocessor that extracts all of the outputs from the | ||
Jonathan Frederic
|
r11634 | notebook file. The extracted outputs are returned in the 'resources' dictionary. | ||
Jonathan Frederic
|
r10674 | """ | ||
MinRK
|
r18580 | # Copyright (c) IPython Development Team. | ||
# Distributed under the terms of the Modified BSD License. | ||||
Jonathan Frederic
|
r10674 | |||
MinRK
|
r11549 | import base64 | ||
Jonathan Frederic
|
r11370 | import sys | ||
Jonathan Frederic
|
r11631 | import os | ||
MinRK
|
r15385 | from mimetypes import guess_extension | ||
MinRK
|
r11549 | |||
Thomas Kluyver
|
r13921 | from IPython.utils.traitlets import Unicode, Set | ||
Paul Ivanov
|
r12219 | from .base import Preprocessor | ||
Jonathan Frederic
|
r11547 | from IPython.utils import py3compat | ||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r10674 | |||
Paul Ivanov
|
r12219 | class ExtractOutputPreprocessor(Preprocessor): | ||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r11634 | Extracts all of the outputs from the notebook file. The extracted | ||
outputs are returned in the 'resources' dictionary. | ||||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r12014 | output_filename_template = Unicode( | ||
MinRK
|
r15385 | "{unique_key}_{cell_index}_{index}{extension}", config=True) | ||
Zbigniew Jędrzejewski-Szmek
|
r10795 | |||
MinRK
|
r18580 | extract_output_types = Set({'image/png', 'image/jpeg', 'image/svg+xml', 'application/pdf'}, config=True) | ||
Zbigniew Jędrzejewski-Szmek
|
r10795 | |||
Paul Ivanov
|
r12219 | def preprocess_cell(self, cell, resources, cell_index): | ||
Jonathan Frederic
|
r10674 | """ | ||
Apply a transformation on each cell, | ||||
Parameters | ||||
---------- | ||||
cell : NotebookNode cell | ||||
Notebook cell being processed | ||||
resources : dictionary | ||||
Additional resources used in the conversion process. Allows | ||||
Paul Ivanov
|
r12219 | preprocessors to pass variables into the Jinja engine. | ||
Jonathan Frederic
|
r11367 | cell_index : int | ||
Jonathan Frederic
|
r10772 | Index of the cell being processed (see base.py) | ||
Jonathan Frederic
|
r10674 | """ | ||
Jonathan Frederic
|
r11367 | |||
#Get the unique key from the resource dict if it exists. If it does not | ||||
Jonathan Frederic
|
r12010 | #exist, use 'output' as the default. Also, get files directory if it | ||
Jonathan Frederic
|
r11631 | #has been specified | ||
Jonathan Frederic
|
r12010 | unique_key = resources.get('unique_key', 'output') | ||
Jonathan Frederic
|
r11631 | output_files_dir = resources.get('output_files_dir', None) | ||
Jonathan Frederic
|
r10674 | |||
Jonathan Frederic
|
r11634 | #Make sure outputs key exists | ||
Jonathan Frederic
|
r12143 | if not isinstance(resources['outputs'], dict): | ||
Jonathan Frederic
|
r11634 | resources['outputs'] = {} | ||
Jonathan Frederic
|
r10674 | |||
Jonathan Frederic
|
r11367 | #Loop through all of the outputs in the cell | ||
for index, out in enumerate(cell.get('outputs', [])): | ||||
MinRK
|
r18589 | if out.output_type not in {'display_data', 'execute_result'}: | ||
continue | ||||
Thomas Kluyver
|
r13921 | #Get the output in data formats that the template needs extracted | ||
MinRK
|
r18589 | for mime_type in self.extract_output_types: | ||
if mime_type in out.data: | ||||
data = out.data[mime_type] | ||||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r11367 | #Binary files are base64-encoded, SVG is already XML | ||
MinRK
|
r18589 | if mime_type in {'image/png', 'image/jpeg', 'application/pdf'}: | ||
Jonathan Frederic
|
r12026 | |||
MinRK
|
r11549 | # data is b64-encoded as text (str, unicode) | ||
# decodestring only accepts bytes | ||||
data = py3compat.cast_bytes(data) | ||||
data = base64.decodestring(data) | ||||
Jonathan Frederic
|
r11379 | elif sys.platform == 'win32': | ||
Jonathan Frederic
|
r11371 | data = data.replace('\n', '\r\n').encode("UTF-8") | ||
else: | ||||
data = data.encode("UTF-8") | ||||
Jonathan Frederic
|
r11367 | |||
MinRK
|
r18602 | ext = guess_extension(mime_type) | ||
if ext is None: | ||||
ext = '.' + mime_type.rsplit('/')[-1] | ||||
MinRK
|
r15385 | |||
filename = self.output_filename_template.format( | ||||
Jonathan Frederic
|
r11367 | unique_key=unique_key, | ||
cell_index=cell_index, | ||||
index=index, | ||||
MinRK
|
r15385 | extension=ext) | ||
Jonathan Frederic
|
r11367 | |||
MinRK
|
r18602 | # On the cell, make the figure available via | ||
MinRK
|
r18589 | # cell.outputs[i].metadata.filenames['mime/type'] | ||
MinRK
|
r18602 | # where | ||
# cell.outputs[i].data['mime/type'] contains the data | ||||
Jonathan Frederic
|
r11631 | if output_files_dir is not None: | ||
filename = os.path.join(output_files_dir, filename) | ||||
MinRK
|
r18589 | out.metadata.setdefault('filenames', {}) | ||
out.metadata['filenames'][mime_type] = filename | ||||
Jonathan Frederic
|
r11367 | |||
#In the resources, make the figure available via | ||||
Jonathan Frederic
|
r11634 | # resources['outputs']['filename'] = data | ||
resources['outputs'][filename] = data | ||||
Jonathan Frederic
|
r10437 | |||
Jonathan Frederic
|
r11367 | return cell, resources | ||