##// END OF EJS Templates
Backport PR #6081: don’t modify dict keys while iterating through them...
MinRK -
Show More
@@ -1,243 +1,239 b''
1 1 """Utilities to manipulate JSON objects."""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 import math
7 7 import re
8 8 import types
9 9 from datetime import datetime
10 10
11 11 try:
12 12 # base64.encodestring is deprecated in Python 3.x
13 13 from base64 import encodebytes
14 14 except ImportError:
15 15 # Python 2.x
16 16 from base64 import encodestring as encodebytes
17 17
18 18 from IPython.utils import py3compat
19 19 from IPython.utils.py3compat import string_types, unicode_type, iteritems
20 20 from IPython.utils.encoding import DEFAULT_ENCODING
21 21 next_attr_name = '__next__' if py3compat.PY3 else 'next'
22 22
23 23 #-----------------------------------------------------------------------------
24 24 # Globals and constants
25 25 #-----------------------------------------------------------------------------
26 26
27 27 # timestamp formats
28 28 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
29 29 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
30 30
31 31 # holy crap, strptime is not threadsafe.
32 32 # Calling it once at import seems to help.
33 33 datetime.strptime("1", "%d")
34 34
35 35 #-----------------------------------------------------------------------------
36 36 # Classes and functions
37 37 #-----------------------------------------------------------------------------
38 38
39 39 def rekey(dikt):
40 40 """Rekey a dict that has been forced to use str keys where there should be
41 41 ints by json."""
42 for k in dikt:
42 for k in list(dikt):
43 43 if isinstance(k, string_types):
44 ik=fk=None
44 nk = None
45 45 try:
46 ik = int(k)
46 nk = int(k)
47 47 except ValueError:
48 48 try:
49 fk = float(k)
49 nk = float(k)
50 50 except ValueError:
51 51 continue
52 if ik is not None:
53 nk = ik
54 else:
55 nk = fk
56 52 if nk in dikt:
57 raise KeyError("already have key %r"%nk)
53 raise KeyError("already have key %r" % nk)
58 54 dikt[nk] = dikt.pop(k)
59 55 return dikt
60 56
61 57 def parse_date(s):
62 58 """parse an ISO8601 date string
63 59
64 60 If it is None or not a valid ISO8601 timestamp,
65 61 it will be returned unmodified.
66 62 Otherwise, it will return a datetime object.
67 63 """
68 64 if s is None:
69 65 return s
70 66 m = ISO8601_PAT.match(s)
71 67 if m:
72 68 # FIXME: add actual timezone support
73 69 # this just drops the timezone info
74 70 notz, ms, tz = m.groups()
75 71 if not ms:
76 72 ms = '.0'
77 73 notz = notz + ms
78 74 return datetime.strptime(notz, ISO8601)
79 75 return s
80 76
81 77 def extract_dates(obj):
82 78 """extract ISO8601 dates from unpacked JSON"""
83 79 if isinstance(obj, dict):
84 80 new_obj = {} # don't clobber
85 81 for k,v in iteritems(obj):
86 82 new_obj[k] = extract_dates(v)
87 83 obj = new_obj
88 84 elif isinstance(obj, (list, tuple)):
89 85 obj = [ extract_dates(o) for o in obj ]
90 86 elif isinstance(obj, string_types):
91 87 obj = parse_date(obj)
92 88 return obj
93 89
94 90 def squash_dates(obj):
95 91 """squash datetime objects into ISO8601 strings"""
96 92 if isinstance(obj, dict):
97 93 obj = dict(obj) # don't clobber
98 94 for k,v in iteritems(obj):
99 95 obj[k] = squash_dates(v)
100 96 elif isinstance(obj, (list, tuple)):
101 97 obj = [ squash_dates(o) for o in obj ]
102 98 elif isinstance(obj, datetime):
103 99 obj = obj.isoformat()
104 100 return obj
105 101
106 102 def date_default(obj):
107 103 """default function for packing datetime objects in JSON."""
108 104 if isinstance(obj, datetime):
109 105 return obj.isoformat()
110 106 else:
111 107 raise TypeError("%r is not JSON serializable"%obj)
112 108
113 109
114 110 # constants for identifying png/jpeg data
115 111 PNG = b'\x89PNG\r\n\x1a\n'
116 112 # front of PNG base64-encoded
117 113 PNG64 = b'iVBORw0KG'
118 114 JPEG = b'\xff\xd8'
119 115 # front of JPEG base64-encoded
120 116 JPEG64 = b'/9'
121 117 # front of PDF base64-encoded
122 118 PDF64 = b'JVBER'
123 119
124 120 def encode_images(format_dict):
125 121 """b64-encodes images in a displaypub format dict
126 122
127 123 Perhaps this should be handled in json_clean itself?
128 124
129 125 Parameters
130 126 ----------
131 127
132 128 format_dict : dict
133 129 A dictionary of display data keyed by mime-type
134 130
135 131 Returns
136 132 -------
137 133
138 134 format_dict : dict
139 135 A copy of the same dictionary,
140 136 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
141 137 is base64-encoded.
142 138
143 139 """
144 140 encoded = format_dict.copy()
145 141
146 142 pngdata = format_dict.get('image/png')
147 143 if isinstance(pngdata, bytes):
148 144 # make sure we don't double-encode
149 145 if not pngdata.startswith(PNG64):
150 146 pngdata = encodebytes(pngdata)
151 147 encoded['image/png'] = pngdata.decode('ascii')
152 148
153 149 jpegdata = format_dict.get('image/jpeg')
154 150 if isinstance(jpegdata, bytes):
155 151 # make sure we don't double-encode
156 152 if not jpegdata.startswith(JPEG64):
157 153 jpegdata = encodebytes(jpegdata)
158 154 encoded['image/jpeg'] = jpegdata.decode('ascii')
159 155
160 156 pdfdata = format_dict.get('application/pdf')
161 157 if isinstance(pdfdata, bytes):
162 158 # make sure we don't double-encode
163 159 if not pdfdata.startswith(PDF64):
164 160 pdfdata = encodebytes(pdfdata)
165 161 encoded['application/pdf'] = pdfdata.decode('ascii')
166 162
167 163 return encoded
168 164
169 165
170 166 def json_clean(obj):
171 167 """Clean an object to ensure it's safe to encode in JSON.
172 168
173 169 Atomic, immutable objects are returned unmodified. Sets and tuples are
174 170 converted to lists, lists are copied and dicts are also copied.
175 171
176 172 Note: dicts whose keys could cause collisions upon encoding (such as a dict
177 173 with both the number 1 and the string '1' as keys) will cause a ValueError
178 174 to be raised.
179 175
180 176 Parameters
181 177 ----------
182 178 obj : any python object
183 179
184 180 Returns
185 181 -------
186 182 out : object
187 183
188 184 A version of the input which will not cause an encoding error when
189 185 encoded as JSON. Note that this function does not *encode* its inputs,
190 186 it simply sanitizes it so that there will be no encoding errors later.
191 187
192 188 """
193 189 # types that are 'atomic' and ok in json as-is.
194 190 atomic_ok = (unicode_type, type(None))
195 191
196 192 # containers that we need to convert into lists
197 193 container_to_list = (tuple, set, types.GeneratorType)
198 194
199 195 if isinstance(obj, float):
200 196 # cast out-of-range floats to their reprs
201 197 if math.isnan(obj) or math.isinf(obj):
202 198 return repr(obj)
203 199 return float(obj)
204 200
205 201 if isinstance(obj, int):
206 202 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
207 203 if isinstance(obj, bool):
208 204 # bools are ints, but we don't want to cast them to 0,1
209 205 return obj
210 206 return int(obj)
211 207
212 208 if isinstance(obj, atomic_ok):
213 209 return obj
214 210
215 211 if isinstance(obj, bytes):
216 212 return obj.decode(DEFAULT_ENCODING, 'replace')
217 213
218 214 if isinstance(obj, container_to_list) or (
219 215 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
220 216 obj = list(obj)
221 217
222 218 if isinstance(obj, list):
223 219 return [json_clean(x) for x in obj]
224 220
225 221 if isinstance(obj, dict):
226 222 # First, validate that the dict won't lose data in conversion due to
227 223 # key collisions after stringification. This can happen with keys like
228 224 # True and 'true' or 1 and '1', which collide in JSON.
229 225 nkeys = len(obj)
230 226 nkeys_collapsed = len(set(map(unicode_type, obj)))
231 227 if nkeys != nkeys_collapsed:
232 228 raise ValueError('dict cannot be safely converted to JSON: '
233 229 'key collision would lead to dropped values')
234 230 # If all OK, proceed by making the new dict that will be json-safe
235 231 out = {}
236 232 for k,v in iteritems(obj):
237 233 out[unicode_type(k)] = json_clean(v)
238 234 return out
239 235
240 236 # If we get here, we don't know how to handle the object, so we just get
241 237 # its repr and return that. This will catch lambdas, open sockets, class
242 238 # objects, and any other complicated contraption that json can't encode
243 239 return repr(obj)
@@ -1,144 +1,151 b''
1 1 # coding: utf-8
2 2 """Test suite for our JSON utilities."""
3 3
4 4 # Copyright (c) IPython Development Team.
5 5 # Distributed under the terms of the Modified BSD License.
6 6
7 7 import datetime
8 8 import json
9 9 from base64 import decodestring
10 10
11 11 import nose.tools as nt
12 12
13 13 from IPython.utils import jsonutil, tz
14 14 from ..jsonutil import json_clean, encode_images
15 15 from ..py3compat import unicode_to_str, str_to_bytes, iteritems
16 16
17 17
18 18 class Int(int):
19 19 def __str__(self):
20 20 return 'Int(%i)' % self
21 21
22 22 def test():
23 23 # list of input/expected output. Use None for the expected output if it
24 24 # can be the same as the input.
25 25 pairs = [(1, None), # start with scalars
26 26 (1.0, None),
27 27 ('a', None),
28 28 (True, None),
29 29 (False, None),
30 30 (None, None),
31 31 # complex numbers for now just go to strings, as otherwise they
32 32 # are unserializable
33 33 (1j, '1j'),
34 34 # Containers
35 35 ([1, 2], None),
36 36 ((1, 2), [1, 2]),
37 37 (set([1, 2]), [1, 2]),
38 38 (dict(x=1), None),
39 39 ({'x': 1, 'y':[1,2,3], '1':'int'}, None),
40 40 # More exotic objects
41 41 ((x for x in range(3)), [0, 1, 2]),
42 42 (iter([1, 2]), [1, 2]),
43 43 (Int(5), 5),
44 44 ]
45 45
46 46 for val, jval in pairs:
47 47 if jval is None:
48 48 jval = val
49 49 out = json_clean(val)
50 50 # validate our cleanup
51 51 nt.assert_equal(out, jval)
52 52 # and ensure that what we return, indeed encodes cleanly
53 53 json.loads(json.dumps(out))
54 54
55 55
56 def test_rekey():
57 # This could fail due to modifying the dict keys in-place on Python 3
58 d = { i:i for i in map(str, range(128)) }
59 d = jsonutil.rekey(d)
60 for key in d:
61 nt.assert_is_instance(key, int)
62
56 63
57 64 def test_encode_images():
58 65 # invalid data, but the header and footer are from real files
59 66 pngdata = b'\x89PNG\r\n\x1a\nblahblahnotactuallyvalidIEND\xaeB`\x82'
60 67 jpegdata = b'\xff\xd8\xff\xe0\x00\x10JFIFblahblahjpeg(\xa0\x0f\xff\xd9'
61 68 pdfdata = b'%PDF-1.\ntrailer<</Root<</Pages<</Kids[<</MediaBox[0 0 3 3]>>]>>>>>>'
62 69
63 70 fmt = {
64 71 'image/png' : pngdata,
65 72 'image/jpeg' : jpegdata,
66 73 'application/pdf' : pdfdata
67 74 }
68 75 encoded = encode_images(fmt)
69 76 for key, value in iteritems(fmt):
70 77 # encoded has unicode, want bytes
71 78 decoded = decodestring(encoded[key].encode('ascii'))
72 79 nt.assert_equal(decoded, value)
73 80 encoded2 = encode_images(encoded)
74 81 nt.assert_equal(encoded, encoded2)
75 82
76 83 b64_str = {}
77 84 for key, encoded in iteritems(encoded):
78 85 b64_str[key] = unicode_to_str(encoded)
79 86 encoded3 = encode_images(b64_str)
80 87 nt.assert_equal(encoded3, b64_str)
81 88 for key, value in iteritems(fmt):
82 89 # encoded3 has str, want bytes
83 90 decoded = decodestring(str_to_bytes(encoded3[key]))
84 91 nt.assert_equal(decoded, value)
85 92
86 93 def test_lambda():
87 94 jc = json_clean(lambda : 1)
88 95 nt.assert_is_instance(jc, str)
89 96 nt.assert_in('<lambda>', jc)
90 97 json.dumps(jc)
91 98
92 99 def test_extract_dates():
93 100 timestamps = [
94 101 '2013-07-03T16:34:52.249482',
95 102 '2013-07-03T16:34:52.249482Z',
96 103 '2013-07-03T16:34:52.249482Z-0800',
97 104 '2013-07-03T16:34:52.249482Z+0800',
98 105 '2013-07-03T16:34:52.249482Z+08:00',
99 106 '2013-07-03T16:34:52.249482Z-08:00',
100 107 '2013-07-03T16:34:52.249482-0800',
101 108 '2013-07-03T16:34:52.249482+0800',
102 109 '2013-07-03T16:34:52.249482+08:00',
103 110 '2013-07-03T16:34:52.249482-08:00',
104 111 ]
105 112 extracted = jsonutil.extract_dates(timestamps)
106 113 ref = extracted[0]
107 114 for dt in extracted:
108 115 nt.assert_true(isinstance(dt, datetime.datetime))
109 116 nt.assert_equal(dt, ref)
110 117
111 118 def test_parse_ms_precision():
112 119 base = '2013-07-03T16:34:52'
113 120 digits = '1234567890'
114 121
115 122 parsed = jsonutil.parse_date(base)
116 123 nt.assert_is_instance(parsed, datetime.datetime)
117 124 for i in range(len(digits)):
118 125 ts = base + '.' + digits[:i]
119 126 parsed = jsonutil.parse_date(ts)
120 127 if i >= 1 and i <= 6:
121 128 nt.assert_is_instance(parsed, datetime.datetime)
122 129 else:
123 130 nt.assert_is_instance(parsed, str)
124 131
125 132 def test_date_default():
126 133 data = dict(today=datetime.datetime.now(), utcnow=tz.utcnow())
127 134 jsondata = json.dumps(data, default=jsonutil.date_default)
128 135 nt.assert_in("+00", jsondata)
129 136 nt.assert_equal(jsondata.count("+00"), 1)
130 137 extracted = jsonutil.extract_dates(json.loads(jsondata))
131 138 for dt in extracted.values():
132 139 nt.assert_is_instance(dt, datetime.datetime)
133 140
134 141 def test_exception():
135 142 bad_dicts = [{1:'number', '1':'string'},
136 143 {True:'bool', 'True':'string'},
137 144 ]
138 145 for d in bad_dicts:
139 146 nt.assert_raises(ValueError, json_clean, d)
140 147
141 148 def test_unicode_dict():
142 149 data = {u'üniço∂e': u'üniço∂e'}
143 150 clean = jsonutil.json_clean(data)
144 151 nt.assert_equal(data, clean)
General Comments 0
You need to be logged in to leave comments. Login now