rwbase.py
190 lines
| 6.6 KiB
| text/x-python
|
PythonLexer
MinRK
|
r18257 | """Base classes and utilities for readers and writers.""" | ||
Brian E. Granger
|
r4609 | |||
MinRK
|
r18257 | # Copyright (c) IPython Development Team. | ||
# Distributed under the terms of the Modified BSD License. | ||||
Brian E. Granger
|
r4609 | |||
Brian E. Granger
|
r4392 | from base64 import encodestring, decodestring | ||
MinRK
|
r6210 | from IPython.utils import py3compat | ||
Thomas Kluyver
|
r13353 | from IPython.utils.py3compat import str_to_bytes, unicode_type, string_types | ||
MinRK
|
r5175 | |||
Brian E. Granger
|
r4609 | |||
MinRK
|
r5175 | def restore_bytes(nb): | ||
"""Restore bytes of image data from unicode-only formats. | ||||
Base64 encoding is handled elsewhere. Bytes objects in the notebook are | ||||
always b64-encoded. We DO NOT encode/decode around file formats. | ||||
MinRK
|
r12393 | |||
Note: this is never used | ||||
MinRK
|
r5175 | """ | ||
for ws in nb.worksheets: | ||||
for cell in ws.cells: | ||||
if cell.cell_type == 'code': | ||||
for output in cell.outputs: | ||||
if 'png' in output: | ||||
output.png = str_to_bytes(output.png, 'ascii') | ||||
if 'jpeg' in output: | ||||
output.jpeg = str_to_bytes(output.jpeg, 'ascii') | ||||
return nb | ||||
MinRK
|
r5278 | # output keys that are likely to have multiline values | ||
_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json'] | ||||
MinRK
|
r7335 | |||
# FIXME: workaround for old splitlines() | ||||
def _join_lines(lines): | ||||
"""join lines that have been written by splitlines() | ||||
Has logic to protect against `splitlines()`, which | ||||
should have been `splitlines(True)` | ||||
""" | ||||
if lines and lines[0].endswith(('\n', '\r')): | ||||
# created by splitlines(True) | ||||
return u''.join(lines) | ||||
else: | ||||
# created by splitlines() | ||||
return u'\n'.join(lines) | ||||
MinRK
|
r5278 | def rejoin_lines(nb): | ||
"""rejoin multiline text into strings | ||||
For reversing effects of ``split_lines(nb)``. | ||||
This only rejoins lines that have been split, so if text objects were not split | ||||
they will pass through unchanged. | ||||
Used when reading JSON files that may have been passed through split_lines. | ||||
""" | ||||
for ws in nb.worksheets: | ||||
for cell in ws.cells: | ||||
if cell.cell_type == 'code': | ||||
if 'input' in cell and isinstance(cell.input, list): | ||||
MinRK
|
r7335 | cell.input = _join_lines(cell.input) | ||
MinRK
|
r5278 | for output in cell.outputs: | ||
for key in _multiline_outputs: | ||||
item = output.get(key, None) | ||||
if isinstance(item, list): | ||||
MinRK
|
r7335 | output[key] = _join_lines(item) | ||
Brian Granger
|
r6020 | else: # text, heading cell | ||
MinRK
|
r5278 | for key in ['source', 'rendered']: | ||
item = cell.get(key, None) | ||||
if isinstance(item, list): | ||||
MinRK
|
r7335 | cell[key] = _join_lines(item) | ||
MinRK
|
r5278 | return nb | ||
def split_lines(nb): | ||||
"""split likely multiline text into lists of strings | ||||
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will | ||||
reverse the effects of ``split_lines(nb)``. | ||||
Used when writing JSON files. | ||||
""" | ||||
for ws in nb.worksheets: | ||||
for cell in ws.cells: | ||||
if cell.cell_type == 'code': | ||||
Thomas Kluyver
|
r13353 | if 'input' in cell and isinstance(cell.input, string_types): | ||
MinRK
|
r7335 | cell.input = cell.input.splitlines(True) | ||
MinRK
|
r5278 | for output in cell.outputs: | ||
for key in _multiline_outputs: | ||||
item = output.get(key, None) | ||||
Thomas Kluyver
|
r13353 | if isinstance(item, string_types): | ||
MinRK
|
r7335 | output[key] = item.splitlines(True) | ||
Brian Granger
|
r6020 | else: # text, heading cell | ||
MinRK
|
r5278 | for key in ['source', 'rendered']: | ||
item = cell.get(key, None) | ||||
Thomas Kluyver
|
r13353 | if isinstance(item, string_types): | ||
MinRK
|
r7335 | cell[key] = item.splitlines(True) | ||
MinRK
|
r5278 | return nb | ||
MinRK
|
r5175 | |||
# b64 encode/decode are never actually used, because all bytes objects in | ||||
# the notebook are already b64-encoded, and we don't need/want to double-encode | ||||
Brian E. Granger
|
r4401 | def base64_decode(nb): | ||
MinRK
|
r5175 | """Restore all bytes objects in the notebook from base64-encoded strings. | ||
Note: This is never used | ||||
""" | ||||
Brian E. Granger
|
r4406 | for ws in nb.worksheets: | ||
for cell in ws.cells: | ||||
if cell.cell_type == 'code': | ||||
MinRK
|
r5175 | for output in cell.outputs: | ||
if 'png' in output: | ||||
Thomas Kluyver
|
r13353 | if isinstance(output.png, unicode_type): | ||
MinRK
|
r5175 | output.png = output.png.encode('ascii') | ||
output.png = decodestring(output.png) | ||||
if 'jpeg' in output: | ||||
Thomas Kluyver
|
r13353 | if isinstance(output.jpeg, unicode_type): | ||
MinRK
|
r5175 | output.jpeg = output.jpeg.encode('ascii') | ||
output.jpeg = decodestring(output.jpeg) | ||||
Brian E. Granger
|
r4392 | return nb | ||
Brian E. Granger
|
r4401 | def base64_encode(nb): | ||
MinRK
|
r5175 | """Base64 encode all bytes objects in the notebook. | ||
These will be b64-encoded unicode strings | ||||
Note: This is never used | ||||
""" | ||||
Brian E. Granger
|
r4406 | for ws in nb.worksheets: | ||
for cell in ws.cells: | ||||
if cell.cell_type == 'code': | ||||
MinRK
|
r5175 | for output in cell.outputs: | ||
if 'png' in output: | ||||
output.png = encodestring(output.png).decode('ascii') | ||||
if 'jpeg' in output: | ||||
output.jpeg = encodestring(output.jpeg).decode('ascii') | ||||
Brian E. Granger
|
r4392 | return nb | ||
MinRK
|
r18257 | def strip_transient(nb): | ||
"""Strip transient values that shouldn't be stored in files. | ||||
This should be called in *both* read and write. | ||||
""" | ||||
nb.pop('orig_nbformat', None) | ||||
nb.pop('orig_nbformat_minor', None) | ||||
for ws in nb['worksheets']: | ||||
for cell in ws['cells']: | ||||
cell.get('metadata', {}).pop('trusted', None) | ||||
MinRK
|
r18292 | # strip cell.trusted even though it shouldn't be used, | ||
# since it's where the transient value used to be stored. | ||||
cell.pop('trusted', None) | ||||
MinRK
|
r18257 | return nb | ||
Brian E. Granger
|
r4392 | class NotebookReader(object): | ||
Brian E. Granger
|
r4609 | """A class for reading notebooks.""" | ||
Brian E. Granger
|
r4392 | |||
def reads(self, s, **kwargs): | ||||
"""Read a notebook from a string.""" | ||||
raise NotImplementedError("loads must be implemented in a subclass") | ||||
def read(self, fp, **kwargs): | ||||
"""Read a notebook from a file like object""" | ||||
MinRK
|
r6210 | nbs = fp.read() | ||
Thomas Kluyver
|
r13353 | if not py3compat.PY3 and not isinstance(nbs, unicode_type): | ||
MinRK
|
r6210 | nbs = py3compat.str_to_unicode(nbs) | ||
return self.reads(nbs, **kwargs) | ||||
Brian E. Granger
|
r4392 | |||
class NotebookWriter(object): | ||||
Brian E. Granger
|
r4609 | """A class for writing notebooks.""" | ||
Brian E. Granger
|
r4392 | |||
def writes(self, nb, **kwargs): | ||||
"""Write a notebook to a string.""" | ||||
raise NotImplementedError("loads must be implemented in a subclass") | ||||
def write(self, nb, fp, **kwargs): | ||||
"""Write a notebook to a file like object""" | ||||
MinRK
|
r6210 | nbs = self.writes(nb,**kwargs) | ||
Thomas Kluyver
|
r13353 | if not py3compat.PY3 and not isinstance(nbs, unicode_type): | ||
MinRK
|
r6210 | # this branch is likely only taken for JSON on Python 2 | ||
nbs = py3compat.str_to_unicode(nbs) | ||||
return fp.write(nbs) | ||||
Brian E. Granger
|
r4392 | |||
Brian E. Granger
|
r4401 | |||