upstream/ipython Commit - r17155:a70897a4

Backport PR : don’t modify dict keys while iterating through them...

MinRK -

r17155:a70897a4

parent child

IPython/utils/jsonutil.py

0 +4 -8

             """Utilities to manipulate JSON objects."""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             import math
             import re
             import types
             from datetime import datetime
             try:
                 # base64.encodestring is deprecated in Python 3.x
                 from base64 import encodebytes
             except ImportError:
                 # Python 2.x
                 from base64 import encodestring as encodebytes
             from IPython.utils import py3compat
             from IPython.utils.py3compat import string_types, unicode_type, iteritems
             from IPython.utils.encoding import DEFAULT_ENCODING
             next_attr_name = '__next__' if py3compat.PY3 else 'next'
             #-----------------------------------------------------------------------------
             # Globals and constants
             #-----------------------------------------------------------------------------
             # timestamp formats
             ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
             ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
             # holy crap, strptime is not threadsafe.
             # Calling it once at import seems to help.
             datetime.strptime("1", "%d")
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             def rekey(dikt):
                 """Rekey a dict that has been forced to use str keys where there should be
                 ints by json."""
-                for k in dikt:
+                for k in list(dikt):
                     if isinstance(k, string_types):
-                        ik=fk=None
+                        nk = None
                         try:
-                            ik = int(k)
+                            nk = int(k)
                         except ValueError:
                             try:
-                                fk = float(k)
+                                nk = float(k)
                             except ValueError:
                                 continue
-                        if ik is not None:
-                            nk = ik
-                        else:
-                            nk = fk
                         if nk in dikt:
                             raise KeyError("already have key %r" % nk)
                         dikt[nk] = dikt.pop(k)
                 return dikt
             def parse_date(s):
                 """parse an ISO8601 date string
                 If it is None or not a valid ISO8601 timestamp,
                 it will be returned unmodified.
                 Otherwise, it will return a datetime object.
                 """
                 if s is None:
                     return s
                 m = ISO8601_PAT.match(s)
                 if m:
                     # FIXME: add actual timezone support
                     # this just drops the timezone info
                     notz, ms, tz = m.groups()
                     if not ms:
                         ms = '.0'
                     notz = notz + ms
                     return datetime.strptime(notz, ISO8601)
                 return s
             def extract_dates(obj):
                 """extract ISO8601 dates from unpacked JSON"""
                 if isinstance(obj, dict):
                     new_obj = {} # don't clobber
                     for k,v in iteritems(obj):
                         new_obj[k] = extract_dates(v)
                     obj = new_obj
                 elif isinstance(obj, (list, tuple)):
                     obj = [ extract_dates(o) for o in obj ]
                 elif isinstance(obj, string_types):
                     obj = parse_date(obj)
                 return obj
             def squash_dates(obj):
                 """squash datetime objects into ISO8601 strings"""
                 if isinstance(obj, dict):
                     obj = dict(obj) # don't clobber
                     for k,v in iteritems(obj):
                         obj[k] = squash_dates(v)
                 elif isinstance(obj, (list, tuple)):
                     obj = [ squash_dates(o) for o in obj ]
                 elif isinstance(obj, datetime):
                     obj = obj.isoformat()
                 return obj
             def date_default(obj):
                 """default function for packing datetime objects in JSON."""
                 if isinstance(obj, datetime):
                     return obj.isoformat()
                 else:
                     raise TypeError("%r is not JSON serializable"%obj)
             # constants for identifying png/jpeg data
             PNG = b'\x89PNG\r\n\x1a\n'
             # front of PNG base64-encoded
             PNG64 = b'iVBORw0KG'
             JPEG = b'\xff\xd8'
             # front of JPEG base64-encoded
             JPEG64 = b'/9'
             # front of PDF base64-encoded
             PDF64 = b'JVBER'
             def encode_images(format_dict):
                 """b64-encodes images in a displaypub format dict
                 Perhaps this should be handled in json_clean itself?
                 Parameters
                 ----------
                 format_dict : dict
                     A dictionary of display data keyed by mime-type
                 Returns
                 -------
                 format_dict : dict
                     A copy of the same dictionary,
                     but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
                     is base64-encoded.
                 """
                 encoded = format_dict.copy()
                 pngdata = format_dict.get('image/png')
                 if isinstance(pngdata, bytes):
                     # make sure we don't double-encode
                     if not pngdata.startswith(PNG64):
                         pngdata = encodebytes(pngdata)
                     encoded['image/png'] = pngdata.decode('ascii')
                 jpegdata = format_dict.get('image/jpeg')
                 if isinstance(jpegdata, bytes):
                     # make sure we don't double-encode
                     if not jpegdata.startswith(JPEG64):
                         jpegdata = encodebytes(jpegdata)
                     encoded['image/jpeg'] = jpegdata.decode('ascii')
                 pdfdata = format_dict.get('application/pdf')
                 if isinstance(pdfdata, bytes):
                     # make sure we don't double-encode
                     if not pdfdata.startswith(PDF64):
                         pdfdata = encodebytes(pdfdata)
                     encoded['application/pdf'] = pdfdata.decode('ascii')
                 return encoded
             def json_clean(obj):
                 """Clean an object to ensure it's safe to encode in JSON.
                 Atomic, immutable objects are returned unmodified.  Sets and tuples are
                 converted to lists, lists are copied and dicts are also copied.
                 Note: dicts whose keys could cause collisions upon encoding (such as a dict
                 with both the number 1 and the string '1' as keys) will cause a ValueError
                 to be raised.
                 Parameters
                 ----------
                 obj : any python object
                 Returns
                 -------
                 out : object
                   A version of the input which will not cause an encoding error when
                   encoded as JSON.  Note that this function does not *encode* its inputs,
                   it simply sanitizes it so that there will be no encoding errors later.
                 """
                 # types that are 'atomic' and ok in json as-is.
                 atomic_ok = (unicode_type, type(None))
                 # containers that we need to convert into lists
                 container_to_list = (tuple, set, types.GeneratorType)
                 if isinstance(obj, float):
                     # cast out-of-range floats to their reprs
                     if math.isnan(obj) or math.isinf(obj):
                         return repr(obj)
                     return float(obj)
                 if isinstance(obj, int):
                     # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
                     if isinstance(obj, bool):
                         # bools are ints, but we don't want to cast them to 0,1
                         return obj
                     return int(obj)
                 if isinstance(obj, atomic_ok):
                     return obj
                 if isinstance(obj, bytes):
                     return obj.decode(DEFAULT_ENCODING, 'replace')
                 if isinstance(obj, container_to_list) or (
                     hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
                     obj = list(obj)
                 if isinstance(obj, list):
                     return [json_clean(x) for x in obj]
                 if isinstance(obj, dict):
                     # First, validate that the dict won't lose data in conversion due to
                     # key collisions after stringification.  This can happen with keys like
                     # True and 'true' or 1 and '1', which collide in JSON.
                     nkeys = len(obj)
                     nkeys_collapsed = len(set(map(unicode_type, obj)))
                     if nkeys != nkeys_collapsed:
                         raise ValueError('dict cannot be safely converted to JSON: '
                                          'key collision would lead to dropped values')
                     # If all OK, proceed by making the new dict that will be json-safe
                     out = {}
                     for k,v in iteritems(obj):
                         out[unicode_type(k)] = json_clean(v)
                     return out
                 # If we get here, we don't know how to handle the object, so we just get
                 # its repr and return that.  This will catch lambdas, open sockets, class
                 # objects, and any other complicated contraption that json can't encode
                 return repr(obj)

IPython/utils/tests/test_jsonutil.py

0 +7 0

             # coding: utf-8
             """Test suite for our JSON utilities."""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             import datetime
             import json
             from base64 import decodestring
             import nose.tools as nt
             from IPython.utils import jsonutil, tz
             from ..jsonutil import json_clean, encode_images
             from ..py3compat import unicode_to_str, str_to_bytes, iteritems
             class Int(int):
                 def __str__(self):
                     return 'Int(%i)' % self
             def test():
                 # list of input/expected output.  Use None for the expected output if it
                 # can be the same as the input.
                 pairs = [(1, None), # start with scalars
                          (1.0, None),
                          ('a', None),
                          (True, None),
                          (False, None),
                          (None, None),
                          # complex numbers for now just go to strings, as otherwise they
                          # are unserializable
                          (1j, '1j'),
                          # Containers
                          ([1, 2], None),
                          ((1, 2), [1, 2]),
                          (set([1, 2]), [1, 2]),
                          (dict(x=1), None),
                          ({'x': 1, 'y':[1,2,3], '1':'int'}, None),
                          # More exotic objects
                          ((x for x in range(3)), [0, 1, 2]),
                          (iter([1, 2]), [1, 2]),
                          (Int(5), 5),
                          ]
                 for val, jval in pairs:
                     if jval is None:
                         jval = val
                     out = json_clean(val)
                     # validate our cleanup
                     nt.assert_equal(out, jval)
                     # and ensure that what we return, indeed encodes cleanly
                     json.loads(json.dumps(out))
+            def test_rekey():
+                # This could fail due to modifying the dict keys in-place on Python 3
+                d = { i:i for i in map(str, range(128)) }
+                d = jsonutil.rekey(d)
+                for key in d:
+                    nt.assert_is_instance(key, int)
             def test_encode_images():
                 # invalid data, but the header and footer are from real files
                 pngdata = b'\x89PNG\r\n\x1a\nblahblahnotactuallyvalidIEND\xaeB`\x82'
                 jpegdata = b'\xff\xd8\xff\xe0\x00\x10JFIFblahblahjpeg(\xa0\x0f\xff\xd9'
                 pdfdata = b'%PDF-1.\ntrailer<</Root<</Pages<</Kids[<</MediaBox[0 0 3 3]>>]>>>>>>'
                 fmt = {
                     'image/png'  : pngdata,
                     'image/jpeg' : jpegdata,
                     'application/pdf' : pdfdata
                 }
                 encoded = encode_images(fmt)
                 for key, value in iteritems(fmt):
                     # encoded has unicode, want bytes
                     decoded = decodestring(encoded[key].encode('ascii'))
                     nt.assert_equal(decoded, value)
                 encoded2 = encode_images(encoded)
                 nt.assert_equal(encoded, encoded2)
                 b64_str = {}
                 for key, encoded in iteritems(encoded):
                     b64_str[key] = unicode_to_str(encoded)
                 encoded3 = encode_images(b64_str)
                 nt.assert_equal(encoded3, b64_str)
                 for key, value in iteritems(fmt):
                     # encoded3 has str, want bytes
                     decoded = decodestring(str_to_bytes(encoded3[key]))
                     nt.assert_equal(decoded, value)
             def test_lambda():
                 jc = json_clean(lambda : 1)
                 nt.assert_is_instance(jc, str)
                 nt.assert_in('<lambda>', jc)
                 json.dumps(jc)
             def test_extract_dates():
                 timestamps = [
                     '2013-07-03T16:34:52.249482',
                     '2013-07-03T16:34:52.249482Z',
                     '2013-07-03T16:34:52.249482Z-0800',
                     '2013-07-03T16:34:52.249482Z+0800',
                     '2013-07-03T16:34:52.249482Z+08:00',
                     '2013-07-03T16:34:52.249482Z-08:00',
                     '2013-07-03T16:34:52.249482-0800',
                     '2013-07-03T16:34:52.249482+0800',
                     '2013-07-03T16:34:52.249482+08:00',
                     '2013-07-03T16:34:52.249482-08:00',
                 ]
                 extracted = jsonutil.extract_dates(timestamps)
                 ref = extracted[0]
                 for dt in extracted:
                     nt.assert_true(isinstance(dt, datetime.datetime))
                     nt.assert_equal(dt, ref)
             def test_parse_ms_precision():
                 base = '2013-07-03T16:34:52'
                 digits = '1234567890'
                 parsed = jsonutil.parse_date(base)
                 nt.assert_is_instance(parsed, datetime.datetime)
                 for i in range(len(digits)):
                     ts = base + '.' + digits[:i]
                     parsed = jsonutil.parse_date(ts)
                     if i >= 1 and i <= 6:
                         nt.assert_is_instance(parsed, datetime.datetime)
                     else:
                         nt.assert_is_instance(parsed, str)
             def test_date_default():
                 data = dict(today=datetime.datetime.now(), utcnow=tz.utcnow())
                 jsondata = json.dumps(data, default=jsonutil.date_default)
                 nt.assert_in("+00", jsondata)
                 nt.assert_equal(jsondata.count("+00"), 1)
                 extracted = jsonutil.extract_dates(json.loads(jsondata))
                 for dt in extracted.values():
                     nt.assert_is_instance(dt, datetime.datetime)
             def test_exception():
                 bad_dicts = [{1:'number', '1':'string'},
                              {True:'bool', 'True':'string'},
                              ]
                 for d in bad_dicts:
                     nt.assert_raises(ValueError, json_clean, d)
             def test_unicode_dict():
                 data = {u'üniço∂e': u'üniço∂e'}
                 clean = jsonutil.json_clean(data)
                 nt.assert_equal(data, clean)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages