##// END OF EJS Templates
Merge pull request #8429 from Carreau/backport-notebook-89...
Merge pull request #8429 from Carreau/backport-notebook-89 Backport jupyter/notebook#89

File last commit:

r17148:99cdf189 merge
r21379:78b7cfd9 merge
Show More
jsonutil.py
239 lines | 7.5 KiB | text/x-python | PythonLexer
MinRK
allow unicode keys in dicts in json_clean
r17133 """Utilities to manipulate JSON objects."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
Fernando Perez
Created JSON-safety utilities....
r2947
MinRK
use math to check for nan/inf
r8022 import math
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 import re
Fernando Perez
Created JSON-safety utilities....
r2947 import types
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 from datetime import datetime
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041 try:
# base64.encodestring is deprecated in Python 3.x
from base64 import encodebytes
except ImportError:
# Python 2.x
from base64 import encodestring as encodebytes
Thomas Kluyver
Various Python 3 fixes in IPython.utils
r4764 from IPython.utils import py3compat
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 from IPython.utils.py3compat import string_types, unicode_type, iteritems
Brandon Parsons
saner default encoding mechanism
r6716 from IPython.utils.encoding import DEFAULT_ENCODING
Thomas Kluyver
Various Python 3 fixes in IPython.utils
r4764 next_attr_name = '__next__' if py3compat.PY3 else 'next'
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 #-----------------------------------------------------------------------------
# Globals and constants
#-----------------------------------------------------------------------------
# timestamp formats
MinRK
fix ISO8601 re...
r13511 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
MinRK
allow datestamps to exclude microseconds...
r15308 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
Fernando Perez
Created JSON-safety utilities....
r2947
MinRK
make one strptime call at import of jsonutil...
r15588 # holy crap, strptime is not threadsafe.
# Calling it once at import seems to help.
datetime.strptime("1", "%d")
Fernando Perez
Created JSON-safety utilities....
r2947 #-----------------------------------------------------------------------------
# Classes and functions
#-----------------------------------------------------------------------------
MinRK
move rekey to jsonutil from parallel.util...
r4036 def rekey(dikt):
"""Rekey a dict that has been forced to use str keys where there should be
ints by json."""
MinRK
don’t modify dict keys while iterating through them...
r17136 for k in list(dikt):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(k, string_types):
MinRK
don’t modify dict keys while iterating through them...
r17136 nk = None
MinRK
move rekey to jsonutil from parallel.util...
r4036 try:
MinRK
don’t modify dict keys while iterating through them...
r17136 nk = int(k)
MinRK
move rekey to jsonutil from parallel.util...
r4036 except ValueError:
try:
MinRK
don’t modify dict keys while iterating through them...
r17136 nk = float(k)
MinRK
move rekey to jsonutil from parallel.util...
r4036 except ValueError:
continue
if nk in dikt:
MinRK
don’t modify dict keys while iterating through them...
r17136 raise KeyError("already have key %r" % nk)
MinRK
move rekey to jsonutil from parallel.util...
r4036 dikt[nk] = dikt.pop(k)
return dikt
MinRK
separate single-date parsing from walking extraction...
r13520 def parse_date(s):
"""parse an ISO8601 date string
If it is None or not a valid ISO8601 timestamp,
it will be returned unmodified.
Otherwise, it will return a datetime object.
"""
if s is None:
return s
m = ISO8601_PAT.match(s)
if m:
# FIXME: add actual timezone support
# this just drops the timezone info
MinRK
allow datestamps to exclude microseconds...
r15308 notz, ms, tz = m.groups()
if not ms:
ms = '.0'
notz = notz + ms
MinRK
separate single-date parsing from walking extraction...
r13520 return datetime.strptime(notz, ISO8601)
return s
MinRK
move rekey to jsonutil from parallel.util...
r4036
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 def extract_dates(obj):
"""extract ISO8601 dates from unpacked JSON"""
if isinstance(obj, dict):
MinRK
separate single-date parsing from walking extraction...
r13520 new_obj = {} # don't clobber
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k,v in iteritems(obj):
MinRK
separate single-date parsing from walking extraction...
r13520 new_obj[k] = extract_dates(v)
obj = new_obj
MinRK
handle datetime objects in Session...
r4008 elif isinstance(obj, (list, tuple)):
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 obj = [ extract_dates(o) for o in obj ]
Thomas Kluyver
Replace references to unicode and basestring
r13353 elif isinstance(obj, string_types):
MinRK
separate single-date parsing from walking extraction...
r13520 obj = parse_date(obj)
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 return obj
MinRK
handle datetime objects in Session...
r4008 def squash_dates(obj):
"""squash datetime objects into ISO8601 strings"""
if isinstance(obj, dict):
MinRK
don't clobber existing dicts in extract_dates/squash_dates
r4010 obj = dict(obj) # don't clobber
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k,v in iteritems(obj):
MinRK
handle datetime objects in Session...
r4008 obj[k] = squash_dates(v)
elif isinstance(obj, (list, tuple)):
obj = [ squash_dates(o) for o in obj ]
elif isinstance(obj, datetime):
MinRK
use `isoformat()` in jsonutil...
r11143 obj = obj.isoformat()
MinRK
handle datetime objects in Session...
r4008 return obj
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 def date_default(obj):
MinRK
handle datetime objects in Session...
r4008 """default function for packing datetime objects in JSON."""
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 if isinstance(obj, datetime):
MinRK
use `isoformat()` in jsonutil...
r11143 return obj.isoformat()
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006 else:
raise TypeError("%r is not JSON serializable"%obj)
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 # constants for identifying png/jpeg data
PNG = b'\x89PNG\r\n\x1a\n'
MinRK
fix png/jpeg b64-encoding check
r10051 # front of PNG base64-encoded
PNG64 = b'iVBORw0KG'
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 JPEG = b'\xff\xd8'
MinRK
fix png/jpeg b64-encoding check
r10051 # front of JPEG base64-encoded
JPEG64 = b'/9'
Brian E. Granger
Adding PDFFormatter and kernel side handling of PDF display data.
r15121 # front of PDF base64-encoded
PDF64 = b'JVBER'
MinRK
move _encode_binary to jsonutil.encode_images...
r7737
def encode_images(format_dict):
"""b64-encodes images in a displaypub format dict
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 Perhaps this should be handled in json_clean itself?
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
expand encode_images docstring
r7746 Parameters
----------
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
expand encode_images docstring
r7746 format_dict : dict
A dictionary of display data keyed by mime-type
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
expand encode_images docstring
r7746 Returns
-------
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
expand encode_images docstring
r7746 format_dict : dict
A copy of the same dictionary,
Brian E. Granger
Adding PDFFormatter and kernel side handling of PDF display data.
r15121 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
MinRK
expand encode_images docstring
r7746 is base64-encoded.
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 """
encoded = format_dict.copy()
MinRK
tweak double-encode logic for image data...
r10048
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 pngdata = format_dict.get('image/png')
MinRK
tweak double-encode logic for image data...
r10048 if isinstance(pngdata, bytes):
# make sure we don't double-encode
MinRK
fix png/jpeg b64-encoding check
r10051 if not pngdata.startswith(PNG64):
MinRK
tweak double-encode logic for image data...
r10048 pngdata = encodebytes(pngdata)
encoded['image/png'] = pngdata.decode('ascii')
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 jpegdata = format_dict.get('image/jpeg')
MinRK
tweak double-encode logic for image data...
r10048 if isinstance(jpegdata, bytes):
# make sure we don't double-encode
MinRK
fix png/jpeg b64-encoding check
r10051 if not jpegdata.startswith(JPEG64):
MinRK
tweak double-encode logic for image data...
r10048 jpegdata = encodebytes(jpegdata)
encoded['image/jpeg'] = jpegdata.decode('ascii')
Brian E. Granger
Adding PDFFormatter and kernel side handling of PDF display data.
r15121 pdfdata = format_dict.get('application/pdf')
if isinstance(pdfdata, bytes):
# make sure we don't double-encode
if not pdfdata.startswith(PDF64):
pdfdata = encodebytes(pdfdata)
encoded['application/pdf'] = pdfdata.decode('ascii')
MinRK
move _encode_binary to jsonutil.encode_images...
r7737 return encoded
MinRK
merge IPython.parallel.streamsession into IPython.zmq.session...
r4006
Fernando Perez
Created JSON-safety utilities....
r2947 def json_clean(obj):
"""Clean an object to ensure it's safe to encode in JSON.
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 Atomic, immutable objects are returned unmodified. Sets and tuples are
converted to lists, lists are copied and dicts are also copied.
Note: dicts whose keys could cause collisions upon encoding (such as a dict
with both the number 1 and the string '1' as keys) will cause a ValueError
to be raised.
Parameters
----------
obj : any python object
Returns
-------
out : object
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 A version of the input which will not cause an encoding error when
encoded as JSON. Note that this function does not *encode* its inputs,
it simply sanitizes it so that there will be no encoding errors later.
"""
MinRK
re-cast int/float subclasses to int/float in json_clean...
r13706 # types that are 'atomic' and ok in json as-is.
atomic_ok = (unicode_type, type(None))
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 # containers that we need to convert into lists
container_to_list = (tuple, set, types.GeneratorType)
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
clean nan/inf in json_clean...
r8021 if isinstance(obj, float):
# cast out-of-range floats to their reprs
MinRK
use math to check for nan/inf
r8022 if math.isnan(obj) or math.isinf(obj):
MinRK
clean nan/inf in json_clean...
r8021 return repr(obj)
MinRK
re-cast int/float subclasses to int/float in json_clean...
r13706 return float(obj)
if isinstance(obj, int):
# cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
if isinstance(obj, bool):
# bools are ints, but we don't want to cast them to 0,1
return obj
return int(obj)
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 if isinstance(obj, atomic_ok):
return obj
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
MinRK
Don't treat bytes objects as json-safe...
r4719 if isinstance(obj, bytes):
Brandon Parsons
saner default encoding mechanism
r6716 return obj.decode(DEFAULT_ENCODING, 'replace')
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 if isinstance(obj, container_to_list) or (
Thomas Kluyver
Various Python 3 fixes in IPython.utils
r4764 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
Fernando Perez
Created JSON-safety utilities....
r2947 obj = list(obj)
Mikhail Korobov
P3K: fix DeprecationWarning under Python 3.x (base64.encodestring is deprecated)
r9041
Fernando Perez
Created JSON-safety utilities....
r2947 if isinstance(obj, list):
return [json_clean(x) for x in obj]
if isinstance(obj, dict):
# First, validate that the dict won't lose data in conversion due to
# key collisions after stringification. This can happen with keys like
# True and 'true' or 1 and '1', which collide in JSON.
nkeys = len(obj)
MinRK
allow unicode keys in dicts in json_clean
r17133 nkeys_collapsed = len(set(map(unicode_type, obj)))
Fernando Perez
Created JSON-safety utilities....
r2947 if nkeys != nkeys_collapsed:
MinRK
allow unicode keys in dicts in json_clean
r17133 raise ValueError('dict cannot be safely converted to JSON: '
Fernando Perez
Created JSON-safety utilities....
r2947 'key collision would lead to dropped values')
# If all OK, proceed by making the new dict that will be json-safe
out = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k,v in iteritems(obj):
MinRK
allow unicode keys in dicts in json_clean
r17133 out[unicode_type(k)] = json_clean(v)
Fernando Perez
Created JSON-safety utilities....
r2947 return out
# If we get here, we don't know how to handle the object, so we just get
# its repr and return that. This will catch lambdas, open sockets, class
# objects, and any other complicated contraption that json can't encode
return repr(obj)