upstream/ipython Files · IPython/nbformat/v3/rwbase.py

Backport PR : Unicode content crashes the pager (console)...

Backport PR : Unicode content crashes the pager (console) We've run into an interesting bug in the astropy project. https://github.com/astropy/astropy/issues/600 When displaying a docstring that contains Unicode and is also long enough that it gets sent to the pager it fails since the docstring can't be sent to the pager as ascii. This crashes in the middle of sending content to the pager, so the shell ends up in an inconsistent state and stops echoing the keyboard etc. The fix (attached) is merely to encode the content sent to the pager in the same encoding as the terminal (`sys.stdout.encoding`). Strictly speaking, this isn't always the right thing to do, since the pager may be configured to expect a different encoding than the terminal, but that is sort of an irrational way to configure a machine... ;) For example, `less`, in the absence of any special environment variables to tell it otherwise, uses the standard `LC*` environment variables to determine what to do, which should be the same mechanism the terminal also uses by default. If anyone can suggest a better fix, I'm all for it. Perhaps it should be configurable, defaulting to `sys.stdout.encoding`?

MinRK - - Load All Authors

File last commit:

r7335:7ec2b0aa


                r9853:7f9a133e

Download file

             rwbase.py
        
                    190 lines
            
             | 6.6 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / nbformat / v3 / rwbase.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Base classes and utilities for readers and writers.

      Authors:

      * Brian Granger

      """

      #-----------------------------------------------------------------------------

      #  Copyright (C) 2008-2011  The IPython Development Team

      #

      #  Distributed under the terms of the BSD License.  The full license is in

      #  the file COPYING, distributed as part of this software.

      #-----------------------------------------------------------------------------

      #-----------------------------------------------------------------------------

      # Imports

      #-----------------------------------------------------------------------------

      from base64 import encodestring, decodestring

      import pprint

      from IPython.utils import py3compat

      str_to_bytes = py3compat.str_to_bytes

      #-----------------------------------------------------------------------------

      # Code

      #-----------------------------------------------------------------------------

      def restore_bytes(nb):

          """Restore bytes of image data from unicode-only formats.

          Base64 encoding is handled elsewhere.  Bytes objects in the notebook are

          always b64-encoded. We DO NOT encode/decode around file formats.

          """

          for ws in nb.worksheets:

              for cell in ws.cells:

                  if cell.cell_type == 'code':

                      for output in cell.outputs:

                          if 'png' in output:

                              output.png = str_to_bytes(output.png, 'ascii')

                          if 'jpeg' in output:

                              output.jpeg = str_to_bytes(output.jpeg, 'ascii')

          return nb

      # output keys that are likely to have multiline values

      _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

      # FIXME: workaround for old splitlines()

      def _join_lines(lines):

          """join lines that have been written by splitlines()

          Has logic to protect against `splitlines()`, which

          should have been `splitlines(True)`

          """

          if lines and lines[0].endswith(('\n', '\r')):

              # created by splitlines(True)

              return u''.join(lines)

          else:

              # created by splitlines()

              return u'\n'.join(lines)

      def rejoin_lines(nb):

          """rejoin multiline text into strings

          For reversing effects of ``split_lines(nb)``.

          This only rejoins lines that have been split, so if text objects were not split

          they will pass through unchanged.

          Used when reading JSON files that may have been passed through split_lines.

          """

          for ws in nb.worksheets:

              for cell in ws.cells:

                  if cell.cell_type == 'code':

                      if 'input' in cell and isinstance(cell.input, list):

                          cell.input = _join_lines(cell.input)

                      for output in cell.outputs:

                          for key in _multiline_outputs:

                              item = output.get(key, None)

                              if isinstance(item, list):

                                  output[key] = _join_lines(item)

                  else: # text, heading cell

                      for key in ['source', 'rendered']:

                          item = cell.get(key, None)

                          if isinstance(item, list):

                              cell[key] = _join_lines(item)

          return nb

      def split_lines(nb):

          """split likely multiline text into lists of strings

          For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will

          reverse the effects of ``split_lines(nb)``.

          Used when writing JSON files.

          """

          for ws in nb.worksheets:

              for cell in ws.cells:

                  if cell.cell_type == 'code':

                      if 'input' in cell and isinstance(cell.input, basestring):

                          cell.input = cell.input.splitlines(True)

                      for output in cell.outputs:

                          for key in _multiline_outputs:

                              item = output.get(key, None)

                              if isinstance(item, basestring):

                                  output[key] = item.splitlines(True)

                  else: # text, heading cell

                      for key in ['source', 'rendered']:

                          item = cell.get(key, None)

                          if isinstance(item, basestring):

                              cell[key] = item.splitlines(True)

          return nb

      # b64 encode/decode are never actually used, because all bytes objects in

      # the notebook are already b64-encoded, and we don't need/want to double-encode

      def base64_decode(nb):

          """Restore all bytes objects in the notebook from base64-encoded strings.

          Note: This is never used

          """

          for ws in nb.worksheets:

              for cell in ws.cells:

                  if cell.cell_type == 'code':

                      for output in cell.outputs:

                          if 'png' in output:

                              if isinstance(output.png, unicode):

                                  output.png = output.png.encode('ascii')

                              output.png = decodestring(output.png)

                          if 'jpeg' in output:

                              if isinstance(output.jpeg, unicode):

                                  output.jpeg = output.jpeg.encode('ascii')

                              output.jpeg = decodestring(output.jpeg)

          return nb

      def base64_encode(nb):

          """Base64 encode all bytes objects in the notebook.

          These will be b64-encoded unicode strings

          Note: This is never used

          """

          for ws in nb.worksheets:

              for cell in ws.cells:

                  if cell.cell_type == 'code':

                      for output in cell.outputs:

                          if 'png' in output:

                              output.png = encodestring(output.png).decode('ascii')

                          if 'jpeg' in output:

                              output.jpeg = encodestring(output.jpeg).decode('ascii')

          return nb

      class NotebookReader(object):

          """A class for reading notebooks."""

          def reads(self, s, **kwargs):

              """Read a notebook from a string."""

              raise NotImplementedError("loads must be implemented in a subclass")

          def read(self, fp, **kwargs):

              """Read a notebook from a file like object"""

              nbs = fp.read()

              if not py3compat.PY3 and not isinstance(nbs, unicode):

                  nbs = py3compat.str_to_unicode(nbs)

              return self.reads(nbs, **kwargs)

      class NotebookWriter(object):

          """A class for writing notebooks."""

          def writes(self, nb, **kwargs):

              """Write a notebook to a string."""

              raise NotImplementedError("loads must be implemented in a subclass")

          def write(self, nb, fp, **kwargs):

              """Write a notebook to a file like object"""

              nbs = self.writes(nb,**kwargs)

              if not py3compat.PY3 and not isinstance(nbs, unicode):

                  # this branch is likely only taken for JSON on Python 2

                  nbs = py3compat.str_to_unicode(nbs)

              return fp.write(nbs)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Base classes and utilities for readers and writers.

				Authors:

				* Brian Granger
				"""

				#-----------------------------------------------------------------------------
				# Copyright (C) 2008-2011 The IPython Development Team
				#
				# Distributed under the terms of the BSD License. The full license is in
				# the file COPYING, distributed as part of this software.
				#-----------------------------------------------------------------------------

				#-----------------------------------------------------------------------------
				# Imports
				#-----------------------------------------------------------------------------

				from base64 import encodestring, decodestring
				import pprint

				from IPython.utils import py3compat

				str_to_bytes = py3compat.str_to_bytes

				#-----------------------------------------------------------------------------
				# Code
				#-----------------------------------------------------------------------------

				def restore_bytes(nb):
				"""Restore bytes of image data from unicode-only formats.

				Base64 encoding is handled elsewhere. Bytes objects in the notebook are
				always b64-encoded. We DO NOT encode/decode around file formats.
				"""
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == 'code':
				for output in cell.outputs:
				if 'png' in output:
				output.png = str_to_bytes(output.png, 'ascii')
				if 'jpeg' in output:
				output.jpeg = str_to_bytes(output.jpeg, 'ascii')
				return nb

				# output keys that are likely to have multiline values
				_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']


				# FIXME: workaround for old splitlines()
				def _join_lines(lines):
				"""join lines that have been written by splitlines()

				Has logic to protect against `splitlines()`, which
				should have been `splitlines(True)`
				"""
				if lines and lines[0].endswith(('\n', '\r')):
				# created by splitlines(True)
				return u''.join(lines)
				else:
				# created by splitlines()
				return u'\n'.join(lines)


				def rejoin_lines(nb):
				"""rejoin multiline text into strings

				For reversing effects of ``split_lines(nb)``.

				This only rejoins lines that have been split, so if text objects were not split
				they will pass through unchanged.

				Used when reading JSON files that may have been passed through split_lines.
				"""
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == 'code':
				if 'input' in cell and isinstance(cell.input, list):
				cell.input = _join_lines(cell.input)
				for output in cell.outputs:
				for key in _multiline_outputs:
				item = output.get(key, None)
				if isinstance(item, list):
				output[key] = _join_lines(item)
				else: # text, heading cell
				for key in ['source', 'rendered']:
				item = cell.get(key, None)
				if isinstance(item, list):
				cell[key] = _join_lines(item)
				return nb


				def split_lines(nb):
				"""split likely multiline text into lists of strings

				For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
				reverse the effects of ``split_lines(nb)``.

				Used when writing JSON files.
				"""
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == 'code':
				if 'input' in cell and isinstance(cell.input, basestring):
				cell.input = cell.input.splitlines(True)
				for output in cell.outputs:
				for key in _multiline_outputs:
				item = output.get(key, None)
				if isinstance(item, basestring):
				output[key] = item.splitlines(True)
				else: # text, heading cell
				for key in ['source', 'rendered']:
				item = cell.get(key, None)
				if isinstance(item, basestring):
				cell[key] = item.splitlines(True)
				return nb

				# b64 encode/decode are never actually used, because all bytes objects in
				# the notebook are already b64-encoded, and we don't need/want to double-encode

				def base64_decode(nb):
				"""Restore all bytes objects in the notebook from base64-encoded strings.

				Note: This is never used
				"""
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == 'code':
				for output in cell.outputs:
				if 'png' in output:
				if isinstance(output.png, unicode):
				output.png = output.png.encode('ascii')
				output.png = decodestring(output.png)
				if 'jpeg' in output:
				if isinstance(output.jpeg, unicode):
				output.jpeg = output.jpeg.encode('ascii')
				output.jpeg = decodestring(output.jpeg)
				return nb


				def base64_encode(nb):
				"""Base64 encode all bytes objects in the notebook.

				These will be b64-encoded unicode strings

				Note: This is never used
				"""
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == 'code':
				for output in cell.outputs:
				if 'png' in output:
				output.png = encodestring(output.png).decode('ascii')
				if 'jpeg' in output:
				output.jpeg = encodestring(output.jpeg).decode('ascii')
				return nb


				class NotebookReader(object):
				"""A class for reading notebooks."""

				def reads(self, s, **kwargs):
				"""Read a notebook from a string."""
				raise NotImplementedError("loads must be implemented in a subclass")

				def read(self, fp, **kwargs):
				"""Read a notebook from a file like object"""
				nbs = fp.read()
				if not py3compat.PY3 and not isinstance(nbs, unicode):
				nbs = py3compat.str_to_unicode(nbs)
				return self.reads(nbs, **kwargs)


				class NotebookWriter(object):
				"""A class for writing notebooks."""

				def writes(self, nb, **kwargs):
				"""Write a notebook to a string."""
				raise NotImplementedError("loads must be implemented in a subclass")

				def write(self, nb, fp, **kwargs):
				"""Write a notebook to a file like object"""
				nbs = self.writes(nb,**kwargs)
				if not py3compat.PY3 and not isinstance(nbs, unicode):
				# this branch is likely only taken for JSON on Python 2
				nbs = py3compat.str_to_unicode(nbs)
				return fp.write(nbs)