##// END OF EJS Templates
split likely multiline strings when writing to/from JSON
MinRK -
Show More
@@ -118,7 +118,12 b' class NotebookManager(LoggingConfigurable):'
118 if format not in self.allowed_formats:
118 if format not in self.allowed_formats:
119 raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
119 raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
120 last_modified, nb = self.get_notebook_object(notebook_id)
120 last_modified, nb = self.get_notebook_object(notebook_id)
121 data = current.writes(nb, format)
121 kwargs = {}
122 if format == 'json':
123 # don't split lines for sending over the wire, because it
124 # should match the Python in-memory format.
125 kwargs['split_lines'] = False
126 data = current.writes(nb, format, **kwargs)
122 name = nb.get('name','notebook')
127 name = nb.get('name','notebook')
123 return last_modified, name, data
128 return last_modified, name, data
124
129
@@ -16,10 +16,14 b' Authors:'
16 # Imports
16 # Imports
17 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
18
18
19 from .nbbase import from_dict
19 import copy
20 from .rwbase import NotebookReader, NotebookWriter, restore_bytes
21 import json
20 import json
22
21
22 from .nbbase import from_dict
23 from .rwbase import (
24 NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
25 )
26
23 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
24 # Code
28 # Code
25 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
@@ -40,7 +44,7 b' class JSONReader(NotebookReader):'
40 return nb
44 return nb
41
45
42 def to_notebook(self, d, **kwargs):
46 def to_notebook(self, d, **kwargs):
43 return restore_bytes(from_dict(d))
47 return restore_bytes(rejoin_lines(from_dict(d)))
44
48
45
49
46 class JSONWriter(NotebookWriter):
50 class JSONWriter(NotebookWriter):
@@ -49,8 +53,10 b' class JSONWriter(NotebookWriter):'
49 kwargs['cls'] = BytesEncoder
53 kwargs['cls'] = BytesEncoder
50 kwargs['indent'] = 4
54 kwargs['indent'] = 4
51 kwargs['sort_keys'] = True
55 kwargs['sort_keys'] = True
56 if kwargs.pop('split_lines', True):
57 nb = split_lines(copy.deepcopy(nb))
52 return json.dumps(nb, **kwargs)
58 return json.dumps(nb, **kwargs)
53
59
54
60
55 _reader = JSONReader()
61 _reader = JSONReader()
56 _writer = JSONWriter()
62 _writer = JSONWriter()
@@ -41,6 +41,61 b' def restore_bytes(nb):'
41 output.jpeg = str_to_bytes(output.jpeg, 'ascii')
41 output.jpeg = str_to_bytes(output.jpeg, 'ascii')
42 return nb
42 return nb
43
43
44 # output keys that are likely to have multiline values
45 _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
46
47 def rejoin_lines(nb):
48 """rejoin multiline text into strings
49
50 For reversing effects of ``split_lines(nb)``.
51
52 This only rejoins lines that have been split, so if text objects were not split
53 they will pass through unchanged.
54
55 Used when reading JSON files that may have been passed through split_lines.
56 """
57 for ws in nb.worksheets:
58 for cell in ws.cells:
59 if cell.cell_type == 'code':
60 if 'input' in cell and isinstance(cell.input, list):
61 cell.input = u'\n'.join(cell.input)
62 for output in cell.outputs:
63 for key in _multiline_outputs:
64 item = output.get(key, None)
65 if isinstance(item, list):
66 output[key] = u'\n'.join(item)
67 else: # text cell
68 for key in ['source', 'rendered']:
69 item = cell.get(key, None)
70 if isinstance(item, list):
71 cell[key] = u'\n'.join(item)
72 return nb
73
74
75 def split_lines(nb):
76 """split likely multiline text into lists of strings
77
78 For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
79 reverse the effects of ``split_lines(nb)``.
80
81 Used when writing JSON files.
82 """
83 for ws in nb.worksheets:
84 for cell in ws.cells:
85 if cell.cell_type == 'code':
86 if 'input' in cell and isinstance(cell.input, basestring):
87 cell.input = cell.input.splitlines()
88 for output in cell.outputs:
89 for key in _multiline_outputs:
90 item = output.get(key, None)
91 if isinstance(item, basestring):
92 output[key] = item.splitlines()
93 else: # text cell
94 for key in ['source', 'rendered']:
95 item = cell.get(key, None)
96 if isinstance(item, basestring):
97 cell[key] = item.splitlines()
98 return nb
44
99
45 # b64 encode/decode are never actually used, because all bytes objects in
100 # b64 encode/decode are never actually used, because all bytes objects in
46 # the notebook are already b64-encoded, and we don't need/want to double-encode
101 # the notebook are already b64-encoded, and we don't need/want to double-encode
@@ -16,6 +16,19 b' class TestJSON(TestCase):'
16 # print
16 # print
17 # print s
17 # print s
18 self.assertEquals(reads(s),nb0)
18 self.assertEquals(reads(s),nb0)
19
20 def test_roundtrip_nosplit(self):
21 """Ensure that multiline blobs are still readable"""
22 # ensures that notebooks written prior to splitlines change
23 # are still readable.
24 s = writes(nb0, split_lines=False)
25 self.assertEquals(reads(s),nb0)
26
27 def test_roundtrip_split(self):
28 """Ensure that splitting multiline blocks is safe"""
29 # This won't differ from test_roundtrip unless the default changes
30 s = writes(nb0, split_lines=True)
31 self.assertEquals(reads(s),nb0)
19
32
20
33
21
34
General Comments 0
You need to be logged in to leave comments. Login now