convert.py
249 lines
| 8.2 KiB
| text/x-python
|
PythonLexer
MinRK
|
r18573 | """Code for converting notebooks to and from v3.""" | ||
MinRK
|
r18568 | |||
MinRK
|
r18573 | # Copyright (c) IPython Development Team. | ||
# Distributed under the terms of the Modified BSD License. | ||||
MinRK
|
r18568 | |||
MinRK
|
r18577 | import json | ||
MinRK
|
r18596 | import re | ||
MinRK
|
r18577 | |||
MinRK
|
r18568 | from .nbbase import ( | ||
MinRK
|
r18573 | nbformat, nbformat_minor, | ||
MinRK
|
r18577 | NotebookNode, | ||
MinRK
|
r18568 | ) | ||
MinRK
|
r18573 | from IPython.nbformat import v3 | ||
MinRK
|
r18578 | from IPython.utils.log import get_logger | ||
MinRK
|
r18568 | |||
MinRK
|
r18594 | def _warn_if_invalid(nb, version): | ||
"""Log validation errors, if there are any.""" | ||||
MinRK
|
r18603 | from IPython.nbformat import validate, ValidationError | ||
MinRK
|
r18594 | try: | ||
validate(nb, version=version) | ||||
except ValidationError as e: | ||||
get_logger().error("Notebook JSON is not valid v%i: %s", version, e) | ||||
MinRK
|
r18568 | |||
MinRK
|
r18573 | def upgrade(nb, from_version=3, from_minor=0): | ||
"""Convert a notebook to v4. | ||||
MinRK
|
r18568 | |||
Parameters | ||||
---------- | ||||
nb : NotebookNode | ||||
The Python representation of the notebook to convert. | ||||
from_version : int | ||||
The original version of the notebook to convert. | ||||
from_minor : int | ||||
The original minor version of the notebook to convert (only relevant for v >= 3). | ||||
""" | ||||
MinRK
|
r18573 | if from_version == 3: | ||
MinRK
|
r18577 | # Validate the notebook before conversion | ||
MinRK
|
r18594 | _warn_if_invalid(nb, from_version) | ||
MinRK
|
r18577 | |||
MinRK
|
r18583 | # Mark the original nbformat so consumers know it has been converted | ||
orig_nbformat = nb.pop('orig_nbformat', None) | ||||
nb.metadata.orig_nbformat = orig_nbformat or 3 | ||||
MinRK
|
r18577 | # Mark the new format | ||
MinRK
|
r18568 | nb.nbformat = nbformat | ||
nb.nbformat_minor = nbformat_minor | ||||
MinRK
|
r18573 | # remove worksheet(s) | ||
nb['cells'] = cells = [] | ||||
# In the unlikely event of multiple worksheets, | ||||
# they will be flattened | ||||
for ws in nb.pop('worksheets', []): | ||||
# upgrade each cell | ||||
for cell in ws['cells']: | ||||
cells.append(upgrade_cell(cell)) | ||||
MinRK
|
r18577 | # upgrade metadata | ||
MinRK
|
r18573 | nb.metadata.pop('name', '') | ||
MinRK
|
r18577 | # Validate the converted notebook before returning it | ||
MinRK
|
r18594 | _warn_if_invalid(nb, nbformat) | ||
MinRK
|
r18568 | return nb | ||
MinRK
|
r18573 | elif from_version == 4: | ||
# nothing to do | ||||
MinRK
|
r18568 | if from_minor != nbformat_minor: | ||
MinRK
|
r18577 | nb.metadata.orig_nbformat_minor = from_minor | ||
MinRK
|
r18568 | nb.nbformat_minor = nbformat_minor | ||
MinRK
|
r18577 | |||
MinRK
|
r18568 | return nb | ||
else: | ||||
MinRK
|
r18573 | raise ValueError('Cannot convert a notebook directly from v%s to v4. ' \ | ||
MinRK
|
r18568 | 'Try using the IPython.nbformat.convert module.' % from_version) | ||
MinRK
|
r18573 | def upgrade_cell(cell): | ||
"""upgrade a cell from v3 to v4 | ||||
MinRK
|
r18568 | |||
MinRK
|
r18596 | heading cell -> markdown heading | ||
MinRK
|
r18573 | code cell: | ||
- remove language metadata | ||||
- cell.input -> cell.source | ||||
MinRK
|
r18587 | - cell.prompt_number -> cell.execution_count | ||
MinRK
|
r18573 | - update outputs | ||
""" | ||||
MinRK
|
r18577 | cell.setdefault('metadata', NotebookNode()) | ||
MinRK
|
r18573 | if cell.cell_type == 'code': | ||
MinRK
|
r18577 | cell.pop('language', '') | ||
MinRK
|
r18600 | if 'collapsed' in cell: | ||
cell.metadata['collapsed'] = cell.pop('collapsed') | ||||
MinRK
|
r18596 | cell.source = cell.pop('input', '') | ||
MinRK
|
r18587 | cell.execution_count = cell.pop('prompt_number', None) | ||
MinRK
|
r18577 | cell.outputs = upgrade_outputs(cell.outputs) | ||
MinRK
|
r18596 | elif cell.cell_type == 'heading': | ||
cell.cell_type = 'markdown' | ||||
level = cell.pop('level', 1) | ||||
cell.source = '{hashes} {single_line}'.format( | ||||
hashes='#' * level, | ||||
single_line = ' '.join(cell.get('source', '').splitlines()), | ||||
) | ||||
MinRK
|
r18577 | elif cell.cell_type == 'html': | ||
# Technically, this exists. It will never happen in practice. | ||||
cell.cell_type = 'markdown' | ||||
MinRK
|
r18573 | return cell | ||
def downgrade_cell(cell): | ||||
MinRK
|
r18587 | """downgrade a cell from v4 to v3 | ||
code cell: | ||||
- set cell.language | ||||
- cell.input <- cell.source | ||||
- cell.prompt_number <- cell.execution_count | ||||
- update outputs | ||||
MinRK
|
r18596 | markdown cell: | ||
- single-line heading -> heading cell | ||||
MinRK
|
r18587 | """ | ||
MinRK
|
r18573 | if cell.cell_type == 'code': | ||
MinRK
|
r18577 | cell.language = 'python' | ||
cell.input = cell.pop('source', '') | ||||
MinRK
|
r18587 | cell.prompt_number = cell.pop('execution_count', None) | ||
MinRK
|
r18577 | cell.collapsed = cell.metadata.pop('collapsed', False) | ||
cell.outputs = downgrade_outputs(cell.outputs) | ||||
MinRK
|
r18596 | elif cell.cell_type == 'markdown': | ||
source = cell.get('source', '') | ||||
if '\n' not in source and source.startswith('#'): | ||||
prefix, text = re.match(r'(#+)\s*(.*)', source).groups() | ||||
cell.cell_type = 'heading' | ||||
cell.source = text | ||||
cell.level = len(prefix) | ||||
MinRK
|
r18573 | return cell | ||
_mime_map = { | ||||
"text" : "text/plain", | ||||
"html" : "text/html", | ||||
"svg" : "image/svg+xml", | ||||
"png" : "image/png", | ||||
"jpeg" : "image/jpeg", | ||||
"latex" : "text/latex", | ||||
"json" : "application/json", | ||||
"javascript" : "application/javascript", | ||||
}; | ||||
def to_mime_key(d): | ||||
"""convert dict with v3 aliases to plain mime-type keys""" | ||||
for alias, mime in _mime_map.items(): | ||||
if alias in d: | ||||
d[mime] = d.pop(alias) | ||||
return d | ||||
def from_mime_key(d): | ||||
"""convert dict with mime-type keys to v3 aliases""" | ||||
for alias, mime in _mime_map.items(): | ||||
if mime in d: | ||||
d[alias] = d.pop(mime) | ||||
return d | ||||
def upgrade_output(output): | ||||
"""upgrade a single code cell output from v3 to v4 | ||||
- pyout -> execute_result | ||||
- pyerr -> error | ||||
MinRK
|
r18589 | - output.type -> output.data.mime/type | ||
MinRK
|
r18573 | - mime-type keys | ||
MinRK
|
r18577 | - stream.stream -> stream.name | ||
MinRK
|
r18573 | """ | ||
MinRK
|
r18577 | if output['output_type'] in {'pyout', 'display_data'}: | ||
MinRK
|
r18590 | output.setdefault('metadata', NotebookNode()) | ||
MinRK
|
r18577 | if output['output_type'] == 'pyout': | ||
output['output_type'] = 'execute_result' | ||||
MinRK
|
r18587 | output['execution_count'] = output.pop('prompt_number', None) | ||
MinRK
|
r18589 | |||
# move output data into data sub-dict | ||||
data = {} | ||||
for key in list(output): | ||||
if key in {'output_type', 'execution_count', 'metadata'}: | ||||
continue | ||||
data[key] = output.pop(key) | ||||
to_mime_key(data) | ||||
output['data'] = data | ||||
MinRK
|
r18577 | to_mime_key(output.metadata) | ||
MinRK
|
r18589 | if 'application/json' in data: | ||
data['application/json'] = json.loads(data['application/json']) | ||||
MinRK
|
r18577 | # promote ascii bytes (from v2) to unicode | ||
for key in ('image/png', 'image/jpeg'): | ||||
MinRK
|
r18589 | if key in data and isinstance(data[key], bytes): | ||
data[key] = data[key].decode('ascii') | ||||
MinRK
|
r18573 | elif output['output_type'] == 'pyerr': | ||
output['output_type'] = 'error' | ||||
MinRK
|
r18577 | elif output['output_type'] == 'stream': | ||
output['name'] = output.pop('stream') | ||||
MinRK
|
r18573 | return output | ||
def downgrade_output(output): | ||||
"""downgrade a single code cell output to v3 from v4 | ||||
- pyout <- execute_result | ||||
- pyerr <- error | ||||
MinRK
|
r18589 | - output.data.mime/type -> output.type | ||
MinRK
|
r18573 | - un-mime-type keys | ||
MinRK
|
r18577 | - stream.stream <- stream.name | ||
MinRK
|
r18573 | """ | ||
MinRK
|
r18589 | if output['output_type'] in {'execute_result', 'display_data'}: | ||
if output['output_type'] == 'execute_result': | ||||
output['output_type'] = 'pyout' | ||||
output['prompt_number'] = output.pop('execution_count', None) | ||||
# promote data dict to top-level output namespace | ||||
data = output.pop('data', {}) | ||||
if 'application/json' in data: | ||||
data['application/json'] = json.dumps(data['application/json']) | ||||
from_mime_key(data) | ||||
output.update(data) | ||||
MinRK
|
r18573 | from_mime_key(output.get('metadata', {})) | ||
elif output['output_type'] == 'error': | ||||
output['output_type'] = 'pyerr' | ||||
MinRK
|
r18577 | elif output['output_type'] == 'stream': | ||
output['stream'] = output.pop('name') | ||||
MinRK
|
r18573 | return output | ||
def upgrade_outputs(outputs): | ||||
"""upgrade outputs of a code cell from v3 to v4""" | ||||
return [upgrade_output(op) for op in outputs] | ||||
def downgrade_outputs(outputs): | ||||
"""downgrade outputs of a code cell to v3 from v4""" | ||||
return [downgrade_output(op) for op in outputs] | ||||
MinRK
|
r18568 | |||
def downgrade(nb): | ||||
MinRK
|
r18573 | """Convert a v4 notebook to v3. | ||
MinRK
|
r18568 | |||
Parameters | ||||
---------- | ||||
nb : NotebookNode | ||||
The Python representation of the notebook to convert. | ||||
""" | ||||
MinRK
|
r18594 | if nb.nbformat != nbformat: | ||
return nb | ||||
MinRK
|
r18577 | # Validate the notebook before conversion | ||
MinRK
|
r18594 | _warn_if_invalid(nb, nbformat) | ||
MinRK
|
r18577 | |||
MinRK
|
r18573 | nb.nbformat = v3.nbformat | ||
nb.nbformat_minor = v3.nbformat_minor | ||||
MinRK
|
r18577 | cells = [ downgrade_cell(cell) for cell in nb.pop('cells') ] | ||
MinRK
|
r18573 | nb.worksheets = [v3.new_worksheet(cells=cells)] | ||
nb.metadata.setdefault('name', '') | ||||
MinRK
|
r18577 | nb.metadata.pop('orig_nbformat', None) | ||
nb.metadata.pop('orig_nbformat_minor', None) | ||||
# Validate the converted notebook before returning it | ||||
MinRK
|
r18594 | _warn_if_invalid(nb, v3.nbformat) | ||
MinRK
|
r18577 | return nb | ||