upstream/ipython Commit - r5278:5d9a42c2

split likely multiline strings when writing to/from JSON

MinRK -

r5278:5d9a42c2

parent child

IPython/frontend/html/notebook/notebookmanager.py

0 +6 -1

                      if format not in self.allowed_formats:
                          raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
                      last_modified, nb = self.get_notebook_object(notebook_id)
-                     data = current.writes(nb, format)
+                     kwargs = {}
+                     if format == 'json':
+                         # don't split lines for sending over the wire, because it
+                         # should match the Python in-memory format.
+                         kwargs['split_lines'] = False
+                     data = current.writes(nb, format, **kwargs)
                      name = nb.get('name','notebook')
                      return last_modified, name, data

IPython/nbformat/v2/nbjson.py

0 +10 -4

              # Imports
              #-----------------------------------------------------------------------------
-             from .nbbase import from_dict
-             from .rwbase import NotebookReader, NotebookWriter, restore_bytes
+             import copy
              import json
+             from .nbbase import from_dict
+             from .rwbase import (
+                 NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
+             )
              #-----------------------------------------------------------------------------
              # Code
              #-----------------------------------------------------------------------------
                      return nb
                  def to_notebook(self, d, **kwargs):
-                     return restore_bytes(from_dict(d))
+                     return restore_bytes(rejoin_lines(from_dict(d)))
              class JSONWriter(NotebookWriter):
                      kwargs['cls'] = BytesEncoder
                      kwargs['indent'] = 4
                      kwargs['sort_keys'] = True
+                     if kwargs.pop('split_lines', True):
+                         nb = split_lines(copy.deepcopy(nb))
                      return json.dumps(nb, **kwargs)
              _reader = JSONReader()
              _writer = JSONWriter()

IPython/nbformat/v2/rwbase.py

0 +55 0

		@@ -41,6 +41,61 b' def restore_bytes(nb):'
41	41	output.jpeg = str_to_bytes(output.jpeg, 'ascii')
42	42	return nb
43	43
	44	# output keys that are likely to have multiline values
	45	_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
	46
	47	def rejoin_lines(nb):
	48	"""rejoin multiline text into strings
	49
	50	For reversing effects of ``split_lines(nb)``.
	51
	52	This only rejoins lines that have been split, so if text objects were not split
	53	they will pass through unchanged.
	54
	55	Used when reading JSON files that may have been passed through split_lines.
	56	"""
	57	for ws in nb.worksheets:
	58	for cell in ws.cells:
	59	if cell.cell_type == 'code':
	60	if 'input' in cell and isinstance(cell.input, list):
	61	cell.input = u'\n'.join(cell.input)
	62	for output in cell.outputs:
	63	for key in _multiline_outputs:
	64	item = output.get(key, None)
	65	if isinstance(item, list):
	66	output[key] = u'\n'.join(item)
	67	else: # text cell
	68	for key in ['source', 'rendered']:
	69	item = cell.get(key, None)
	70	if isinstance(item, list):
	71	cell[key] = u'\n'.join(item)
	72	return nb
	73
	74
	75	def split_lines(nb):
	76	"""split likely multiline text into lists of strings
	77
	78	For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
	79	reverse the effects of ``split_lines(nb)``.
	80
	81	Used when writing JSON files.
	82	"""
	83	for ws in nb.worksheets:
	84	for cell in ws.cells:
	85	if cell.cell_type == 'code':
	86	if 'input' in cell and isinstance(cell.input, basestring):
	87	cell.input = cell.input.splitlines()
	88	for output in cell.outputs:
	89	for key in _multiline_outputs:
	90	item = output.get(key, None)
	91	if isinstance(item, basestring):
	92	output[key] = item.splitlines()
	93	else: # text cell
	94	for key in ['source', 'rendered']:
	95	item = cell.get(key, None)
	96	if isinstance(item, basestring):
	97	cell[key] = item.splitlines()
	98	return nb
44	99
45	100	# b64 encode/decode are never actually used, because all bytes objects in
46	101	# the notebook are already b64-encoded, and we don't need/want to double-encode

IPython/nbformat/v2/tests/test_json.py

0 +13 0

              #        print
              #        print s
                      self.assertEquals(reads(s),nb0)
+                 def test_roundtrip_nosplit(self):
+                     """Ensure that multiline blobs are still readable"""
+                     # ensures that notebooks written prior to splitlines change
+                     # are still readable.
+                     s = writes(nb0, split_lines=False)
+                     self.assertEquals(reads(s),nb0)
+                 def test_roundtrip_split(self):
+                     """Ensure that splitting multiline blocks is safe"""
+                     # This won't differ from test_roundtrip unless the default changes
+                     s = writes(nb0, split_lines=True)
+                     self.assertEquals(reads(s),nb0)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages