From 5d9a42c2c858ba111e076e01820587a1900aef7e 2011-11-09 20:10:03 From: MinRK Date: 2011-11-09 20:10:03 Subject: [PATCH] split likely multiline strings when writing to/from JSON --- diff --git a/IPython/frontend/html/notebook/notebookmanager.py b/IPython/frontend/html/notebook/notebookmanager.py index 6aed5a4..89c0c40 100644 --- a/IPython/frontend/html/notebook/notebookmanager.py +++ b/IPython/frontend/html/notebook/notebookmanager.py @@ -118,7 +118,12 @@ class NotebookManager(LoggingConfigurable): if format not in self.allowed_formats: raise web.HTTPError(415, u'Invalid notebook format: %s' % format) last_modified, nb = self.get_notebook_object(notebook_id) - data = current.writes(nb, format) + kwargs = {} + if format == 'json': + # don't split lines for sending over the wire, because it + # should match the Python in-memory format. + kwargs['split_lines'] = False + data = current.writes(nb, format, **kwargs) name = nb.get('name','notebook') return last_modified, name, data diff --git a/IPython/nbformat/v2/nbjson.py b/IPython/nbformat/v2/nbjson.py index e67bec3..e207466 100644 --- a/IPython/nbformat/v2/nbjson.py +++ b/IPython/nbformat/v2/nbjson.py @@ -16,10 +16,14 @@ Authors: # Imports #----------------------------------------------------------------------------- -from .nbbase import from_dict -from .rwbase import NotebookReader, NotebookWriter, restore_bytes +import copy import json +from .nbbase import from_dict +from .rwbase import ( + NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines +) + #----------------------------------------------------------------------------- # Code #----------------------------------------------------------------------------- @@ -40,7 +44,7 @@ class JSONReader(NotebookReader): return nb def to_notebook(self, d, **kwargs): - return restore_bytes(from_dict(d)) + return restore_bytes(rejoin_lines(from_dict(d))) class JSONWriter(NotebookWriter): @@ -49,8 +53,10 @@ class JSONWriter(NotebookWriter): kwargs['cls'] = BytesEncoder kwargs['indent'] = 4 kwargs['sort_keys'] = True + if kwargs.pop('split_lines', True): + nb = split_lines(copy.deepcopy(nb)) return json.dumps(nb, **kwargs) - + _reader = JSONReader() _writer = JSONWriter() diff --git a/IPython/nbformat/v2/rwbase.py b/IPython/nbformat/v2/rwbase.py index cbcb50a..39d54b2 100644 --- a/IPython/nbformat/v2/rwbase.py +++ b/IPython/nbformat/v2/rwbase.py @@ -41,6 +41,61 @@ def restore_bytes(nb): output.jpeg = str_to_bytes(output.jpeg, 'ascii') return nb +# output keys that are likely to have multiline values +_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json'] + +def rejoin_lines(nb): + """rejoin multiline text into strings + + For reversing effects of ``split_lines(nb)``. + + This only rejoins lines that have been split, so if text objects were not split + they will pass through unchanged. + + Used when reading JSON files that may have been passed through split_lines. + """ + for ws in nb.worksheets: + for cell in ws.cells: + if cell.cell_type == 'code': + if 'input' in cell and isinstance(cell.input, list): + cell.input = u'\n'.join(cell.input) + for output in cell.outputs: + for key in _multiline_outputs: + item = output.get(key, None) + if isinstance(item, list): + output[key] = u'\n'.join(item) + else: # text cell + for key in ['source', 'rendered']: + item = cell.get(key, None) + if isinstance(item, list): + cell[key] = u'\n'.join(item) + return nb + + +def split_lines(nb): + """split likely multiline text into lists of strings + + For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will + reverse the effects of ``split_lines(nb)``. + + Used when writing JSON files. + """ + for ws in nb.worksheets: + for cell in ws.cells: + if cell.cell_type == 'code': + if 'input' in cell and isinstance(cell.input, basestring): + cell.input = cell.input.splitlines() + for output in cell.outputs: + for key in _multiline_outputs: + item = output.get(key, None) + if isinstance(item, basestring): + output[key] = item.splitlines() + else: # text cell + for key in ['source', 'rendered']: + item = cell.get(key, None) + if isinstance(item, basestring): + cell[key] = item.splitlines() + return nb # b64 encode/decode are never actually used, because all bytes objects in # the notebook are already b64-encoded, and we don't need/want to double-encode diff --git a/IPython/nbformat/v2/tests/test_json.py b/IPython/nbformat/v2/tests/test_json.py index 1d05fa0..d6cc5d3 100644 --- a/IPython/nbformat/v2/tests/test_json.py +++ b/IPython/nbformat/v2/tests/test_json.py @@ -16,6 +16,19 @@ class TestJSON(TestCase): # print # print s self.assertEquals(reads(s),nb0) + + def test_roundtrip_nosplit(self): + """Ensure that multiline blobs are still readable""" + # ensures that notebooks written prior to splitlines change + # are still readable. + s = writes(nb0, split_lines=False) + self.assertEquals(reads(s),nb0) + + def test_roundtrip_split(self): + """Ensure that splitting multiline blocks is safe""" + # This won't differ from test_roundtrip unless the default changes + s = writes(nb0, split_lines=True) + self.assertEquals(reads(s),nb0)