upstream/ipython Commit - r5278:5d9a42c2

split likely multiline strings when writing to/from JSON

MinRK -

r5278:5d9a42c2

parent child

IPython/frontend/html/notebook/notebookmanager.py

0 +6 -1

              """A notebook manager that uses the local file system for storage.
              Authors:
              * Brian Granger
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2008-2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              import datetime
              import os
              import uuid
              import glob
              from tornado import web
              from IPython.config.configurable import LoggingConfigurable
              from IPython.nbformat import current
              from IPython.utils.traitlets import Unicode, List, Dict
              #-----------------------------------------------------------------------------
              # Code
              #-----------------------------------------------------------------------------
              class NotebookManager(LoggingConfigurable):
                  notebook_dir = Unicode(os.getcwd(), config=True, help="""
                      The directory to use for notebooks.
                  """)
                  filename_ext = Unicode(u'.ipynb')
                  allowed_formats = List([u'json',u'py'])
                  # Map notebook_ids to notebook names
                  mapping = Dict()
                  # Map notebook names to notebook_ids
                  rev_mapping = Dict()
                  def list_notebooks(self):
                      """List all notebooks in the notebook dir.
                      This returns a list of dicts of the form::
                          dict(notebook_id=notebook,name=name)
                      """
                      names = glob.glob(os.path.join(self.notebook_dir,
                                                     '*' + self.filename_ext))
                      names = [os.path.splitext(os.path.basename(name))[0]
                               for name in names]
                      data = []
                      for name in names:
                          if name not in self.rev_mapping:
                              notebook_id = self.new_notebook_id(name)
                          else:
                              notebook_id = self.rev_mapping[name]
                          data.append(dict(notebook_id=notebook_id,name=name))
                      data = sorted(data, key=lambda item: item['name'])
                      return data
                  def new_notebook_id(self, name):
                      """Generate a new notebook_id for a name and store its mappings."""
                      # TODO: the following will give stable urls for notebooks, but unless
                      # the notebooks are immediately redirected to their new urls when their
                      # filemname changes, nasty inconsistencies result.  So for now it's
                      # disabled and instead we use a random uuid4() call.  But we leave the
                      # logic here so that we can later reactivate it, whhen the necessary
                      # url redirection code is written.
                      #notebook_id = unicode(uuid.uuid5(uuid.NAMESPACE_URL,
                      #                 'file://'+self.get_path_by_name(name).encode('utf-8')))
                      notebook_id = unicode(uuid.uuid4())
                      self.mapping[notebook_id] = name
                      self.rev_mapping[name] = notebook_id
                      return notebook_id
                  def delete_notebook_id(self, notebook_id):
                      """Delete a notebook's id only. This doesn't delete the actual notebook."""
                      name = self.mapping[notebook_id]
                      del self.mapping[notebook_id]
                      del self.rev_mapping[name]
                  def notebook_exists(self, notebook_id):
                      """Does a notebook exist?"""
                      if notebook_id not in self.mapping:
                          return False
                      path = self.get_path_by_name(self.mapping[notebook_id])
                      return os.path.isfile(path)
                  def find_path(self, notebook_id):
                      """Return a full path to a notebook given its notebook_id."""
                      try:
                          name = self.mapping[notebook_id]
                      except KeyError:
                          raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
                      return self.get_path_by_name(name)
                  def get_path_by_name(self, name):
                      """Return a full path to a notebook given its name."""
                      filename = name + self.filename_ext
                      path = os.path.join(self.notebook_dir, filename)
                      return path
                  def get_notebook(self, notebook_id, format=u'json'):
                      """Get the representation of a notebook in format by notebook_id."""
                      format = unicode(format)
                      if format not in self.allowed_formats:
                          raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
                      last_modified, nb = self.get_notebook_object(notebook_id)
-                     data = current.writes(nb, format)
+                     kwargs = {}
+                     if format == 'json':
+                         # don't split lines for sending over the wire, because it
+                         # should match the Python in-memory format.
+                         kwargs['split_lines'] = False
+                     data = current.writes(nb, format, **kwargs)
                      name = nb.get('name','notebook')
                      return last_modified, name, data
                  def get_notebook_object(self, notebook_id):
                      """Get the NotebookNode representation of a notebook by notebook_id."""
                      path = self.find_path(notebook_id)
                      if not os.path.isfile(path):
                          raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
                      info = os.stat(path)
                      last_modified = datetime.datetime.utcfromtimestamp(info.st_mtime)
                      with open(path,'r') as f:
                          s = f.read()
                          try:
                              # v1 and v2 and json in the .ipynb files.
                              nb = current.reads(s, u'json')
                          except:
                              raise web.HTTPError(500, u'Unreadable JSON notebook.')
                      if 'name' not in nb:
                          nb.name = os.path.split(path)[-1].split(u'.')[0]
                      return last_modified, nb
                  def save_new_notebook(self, data, name=None, format=u'json'):
                      """Save a new notebook and return its notebook_id.
                      If a name is passed in, it overrides any values in the notebook data
                      and the value in the data is updated to use that value.
                      """
                      if format not in self.allowed_formats:
                          raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
                      try:
                          nb = current.reads(data.decode('utf-8'), format)
                      except:
                          raise web.HTTPError(400, u'Invalid JSON data')
                      if name is None:
                          try:
                              name = nb.metadata.name
                          except AttributeError:
                              raise web.HTTPError(400, u'Missing notebook name')
                      nb.metadata.name = name
                      notebook_id = self.new_notebook_id(name)
                      self.save_notebook_object(notebook_id, nb)
                      return notebook_id
                  def save_notebook(self, notebook_id, data, name=None, format=u'json'):
                      """Save an existing notebook by notebook_id."""
                      if format not in self.allowed_formats:
                          raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
                      try:
                          nb = current.reads(data.decode('utf-8'), format)
                      except:
                          raise web.HTTPError(400, u'Invalid JSON data')
                      if name is not None:
                          nb.metadata.name = name
                      self.save_notebook_object(notebook_id, nb)
                  def save_notebook_object(self, notebook_id, nb):
                      """Save an existing notebook object by notebook_id."""
                      if notebook_id not in self.mapping:
                          raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
                      old_name = self.mapping[notebook_id]
                      try:
                          new_name = nb.metadata.name
                      except AttributeError:
                          raise web.HTTPError(400, u'Missing notebook name')
                      path = self.get_path_by_name(new_name)
                      try:
                          with open(path,'w') as f:
                              current.write(nb, f, u'json')
                      except:
                          raise web.HTTPError(400, u'Unexpected error while saving notebook')
                      if old_name != new_name:
                          old_path = self.get_path_by_name(old_name)
                          if os.path.isfile(old_path):
                              os.unlink(old_path)
                          self.mapping[notebook_id] = new_name
                          self.rev_mapping[new_name] = notebook_id
                  def delete_notebook(self, notebook_id):
                      """Delete notebook by notebook_id."""
                      path = self.find_path(notebook_id)
                      if not os.path.isfile(path):
                          raise web.HTTPError(404, u'Notebook does not exist: %s' % notebook_id)
                      os.unlink(path)
                      self.delete_notebook_id(notebook_id)
                  def new_notebook(self):
                      """Create a new notebook and returns its notebook_id."""
                      i = 0
                      while True:
                          name = u'Untitled%i' % i
                          path = self.get_path_by_name(name)
                          if not os.path.isfile(path):
                              break
                          else:
                              i = i+1
                      notebook_id = self.new_notebook_id(name)
                      metadata = current.new_metadata(name=name)
                      nb = current.new_notebook(metadata=metadata)
                      with open(path,'w') as f:
                          current.write(nb, f, u'json')
                      return notebook_id

IPython/nbformat/v2/nbjson.py

0 +10 -4

              """Read and write notebooks in JSON format.
              Authors:
              * Brian Granger
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2008-2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
-             from .nbbase import from_dict
-             from .rwbase import NotebookReader, NotebookWriter, restore_bytes
+             import copy
              import json
+             from .nbbase import from_dict
+             from .rwbase import (
+                 NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
+             )
              #-----------------------------------------------------------------------------
              # Code
              #-----------------------------------------------------------------------------
              class BytesEncoder(json.JSONEncoder):
                  """A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
                  def default(self, obj):
                      if isinstance(obj, bytes):
                          return obj.decode('ascii')
                      return json.JSONEncoder.default(self, obj)
              class JSONReader(NotebookReader):
                  def reads(self, s, **kwargs):
                      nb = json.loads(s, **kwargs)
                      nb = self.to_notebook(nb, **kwargs)
                      return nb
                  def to_notebook(self, d, **kwargs):
-                     return restore_bytes(from_dict(d))
+                     return restore_bytes(rejoin_lines(from_dict(d)))
              class JSONWriter(NotebookWriter):
                  def writes(self, nb, **kwargs):
                      kwargs['cls'] = BytesEncoder
                      kwargs['indent'] = 4
                      kwargs['sort_keys'] = True
+                     if kwargs.pop('split_lines', True):
+                         nb = split_lines(copy.deepcopy(nb))
                      return json.dumps(nb, **kwargs)
              _reader = JSONReader()
              _writer = JSONWriter()
              reads = _reader.reads
              read = _reader.read
              to_notebook = _reader.to_notebook
              write = _writer.write
              writes = _writer.writes

IPython/nbformat/v2/rwbase.py

0 +55 0

              """Base classes and utilities for readers and writers.
              Authors:
              * Brian Granger
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2008-2011  The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              from base64 import encodestring, decodestring
              import pprint
              from IPython.utils.py3compat import str_to_bytes
              #-----------------------------------------------------------------------------
              # Code
              #-----------------------------------------------------------------------------
              def restore_bytes(nb):
                  """Restore bytes of image data from unicode-only formats.
                  Base64 encoding is handled elsewhere.  Bytes objects in the notebook are
                  always b64-encoded. We DO NOT encode/decode around file formats.
                  """
                  for ws in nb.worksheets:
                      for cell in ws.cells:
                          if cell.cell_type == 'code':
                              for output in cell.outputs:
                                  if 'png' in output:
                                      output.png = str_to_bytes(output.png, 'ascii')
                                  if 'jpeg' in output:
                                      output.jpeg = str_to_bytes(output.jpeg, 'ascii')
                  return nb
+             # output keys that are likely to have multiline values
+             _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
+             def rejoin_lines(nb):
+                 """rejoin multiline text into strings
+                 For reversing effects of ``split_lines(nb)``.
+                 This only rejoins lines that have been split, so if text objects were not split
+                 they will pass through unchanged.
+                 Used when reading JSON files that may have been passed through split_lines.
+                 """
+                 for ws in nb.worksheets:
+                     for cell in ws.cells:
+                         if cell.cell_type == 'code':
+                             if 'input' in cell and isinstance(cell.input, list):
+                                 cell.input = u'\n'.join(cell.input)
+                             for output in cell.outputs:
+                                 for key in _multiline_outputs:
+                                     item = output.get(key, None)
+                                     if isinstance(item, list):
+                                         output[key] = u'\n'.join(item)
+                         else: # text cell
+                             for key in ['source', 'rendered']:
+                                 item = cell.get(key, None)
+                                 if isinstance(item, list):
+                                     cell[key] = u'\n'.join(item)
+                 return nb
+             def split_lines(nb):
+                 """split likely multiline text into lists of strings
+                 For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
+                 reverse the effects of ``split_lines(nb)``.
+                 Used when writing JSON files.
+                 """
+                 for ws in nb.worksheets:
+                     for cell in ws.cells:
+                         if cell.cell_type == 'code':
+                             if 'input' in cell and isinstance(cell.input, basestring):
+                                 cell.input = cell.input.splitlines()
+                             for output in cell.outputs:
+                                 for key in _multiline_outputs:
+                                     item = output.get(key, None)
+                                     if isinstance(item, basestring):
+                                         output[key] = item.splitlines()
+                         else: # text cell
+                             for key in ['source', 'rendered']:
+                                 item = cell.get(key, None)
+                                 if isinstance(item, basestring):
+                                     cell[key] = item.splitlines()
+                 return nb
              # b64 encode/decode are never actually used, because all bytes objects in
              # the notebook are already b64-encoded, and we don't need/want to double-encode
              def base64_decode(nb):
                  """Restore all bytes objects in the notebook from base64-encoded strings.
                  Note: This is never used
                  """
                  for ws in nb.worksheets:
                      for cell in ws.cells:
                          if cell.cell_type == 'code':
                              for output in cell.outputs:
                                  if 'png' in output:
                                      if isinstance(output.png, unicode):
                                          output.png = output.png.encode('ascii')
                                      output.png = decodestring(output.png)
                                  if 'jpeg' in output:
                                      if isinstance(output.jpeg, unicode):
                                          output.jpeg = output.jpeg.encode('ascii')
                                      output.jpeg = decodestring(output.jpeg)
                  return nb
              def base64_encode(nb):
                  """Base64 encode all bytes objects in the notebook.
                  These will be b64-encoded unicode strings
                  Note: This is never used
                  """
                  for ws in nb.worksheets:
                      for cell in ws.cells:
                          if cell.cell_type == 'code':
                              for output in cell.outputs:
                                  if 'png' in output:
                                      output.png = encodestring(output.png).decode('ascii')
                                  if 'jpeg' in output:
                                      output.jpeg = encodestring(output.jpeg).decode('ascii')
                  return nb
              class NotebookReader(object):
                  """A class for reading notebooks."""
                  def reads(self, s, **kwargs):
                      """Read a notebook from a string."""
                      raise NotImplementedError("loads must be implemented in a subclass")
                  def read(self, fp, **kwargs):
                      """Read a notebook from a file like object"""
                      return self.read(fp.read(), **kwargs)
              class NotebookWriter(object):
                  """A class for writing notebooks."""
                  def writes(self, nb, **kwargs):
                      """Write a notebook to a string."""
                      raise NotImplementedError("loads must be implemented in a subclass")
                  def write(self, nb, fp, **kwargs):
                      """Write a notebook to a file like object"""
                      return fp.write(self.writes(nb,**kwargs))

IPython/nbformat/v2/tests/test_json.py

0 +13 0

              import pprint
              from unittest import TestCase
              from ..nbjson import reads, writes
              from .nbexamples import nb0
              class TestJSON(TestCase):
                  def test_roundtrip(self):
                      s = writes(nb0)
              #        print
              #        print pprint.pformat(nb0,indent=2)
              #        print
              #        print pprint.pformat(reads(s),indent=2)
              #        print
              #        print s
                      self.assertEquals(reads(s),nb0)
+                 def test_roundtrip_nosplit(self):
+                     """Ensure that multiline blobs are still readable"""
+                     # ensures that notebooks written prior to splitlines change
+                     # are still readable.
+                     s = writes(nb0, split_lines=False)
+                     self.assertEquals(reads(s),nb0)
+                 def test_roundtrip_split(self):
+                     """Ensure that splitting multiline blocks is safe"""
+                     # This won't differ from test_roundtrip unless the default changes
+                     s = writes(nb0, split_lines=True)
+                     self.assertEquals(reads(s),nb0)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages