##// END OF EJS Templates
don’t modify dict keys while iterating through them...
MinRK -
Show More
@@ -1,263 +1,259 b''
1 1 """Utilities to manipulate JSON objects.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2010-2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING.txt, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 # stdlib
14 14 import math
15 15 import re
16 16 import types
17 17 from datetime import datetime
18 18
19 19 try:
20 20 # base64.encodestring is deprecated in Python 3.x
21 21 from base64 import encodebytes
22 22 except ImportError:
23 23 # Python 2.x
24 24 from base64 import encodestring as encodebytes
25 25
26 26 from IPython.utils import py3compat
27 27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 28 from IPython.utils.encoding import DEFAULT_ENCODING
29 29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Globals and constants
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # timestamp formats
36 36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
38 38
39 39 # holy crap, strptime is not threadsafe.
40 40 # Calling it once at import seems to help.
41 41 datetime.strptime("1", "%d")
42 42
43 43 #-----------------------------------------------------------------------------
44 44 # Classes and functions
45 45 #-----------------------------------------------------------------------------
46 46
47 47 def rekey(dikt):
48 48 """Rekey a dict that has been forced to use str keys where there should be
49 49 ints by json."""
50 for k in dikt:
50 for k in list(dikt):
51 51 if isinstance(k, string_types):
52 ik=fk=None
52 nk = None
53 53 try:
54 ik = int(k)
54 nk = int(k)
55 55 except ValueError:
56 56 try:
57 fk = float(k)
57 nk = float(k)
58 58 except ValueError:
59 59 continue
60 if ik is not None:
61 nk = ik
62 else:
63 nk = fk
64 60 if nk in dikt:
65 raise KeyError("already have key %r"%nk)
61 raise KeyError("already have key %r" % nk)
66 62 dikt[nk] = dikt.pop(k)
67 63 return dikt
68 64
69 65 def parse_date(s):
70 66 """parse an ISO8601 date string
71 67
72 68 If it is None or not a valid ISO8601 timestamp,
73 69 it will be returned unmodified.
74 70 Otherwise, it will return a datetime object.
75 71 """
76 72 if s is None:
77 73 return s
78 74 m = ISO8601_PAT.match(s)
79 75 if m:
80 76 # FIXME: add actual timezone support
81 77 # this just drops the timezone info
82 78 notz, ms, tz = m.groups()
83 79 if not ms:
84 80 ms = '.0'
85 81 notz = notz + ms
86 82 return datetime.strptime(notz, ISO8601)
87 83 return s
88 84
89 85 def extract_dates(obj):
90 86 """extract ISO8601 dates from unpacked JSON"""
91 87 if isinstance(obj, dict):
92 88 new_obj = {} # don't clobber
93 89 for k,v in iteritems(obj):
94 90 new_obj[k] = extract_dates(v)
95 91 obj = new_obj
96 92 elif isinstance(obj, (list, tuple)):
97 93 obj = [ extract_dates(o) for o in obj ]
98 94 elif isinstance(obj, string_types):
99 95 obj = parse_date(obj)
100 96 return obj
101 97
102 98 def squash_dates(obj):
103 99 """squash datetime objects into ISO8601 strings"""
104 100 if isinstance(obj, dict):
105 101 obj = dict(obj) # don't clobber
106 102 for k,v in iteritems(obj):
107 103 obj[k] = squash_dates(v)
108 104 elif isinstance(obj, (list, tuple)):
109 105 obj = [ squash_dates(o) for o in obj ]
110 106 elif isinstance(obj, datetime):
111 107 obj = obj.isoformat()
112 108 return obj
113 109
114 110 def date_default(obj):
115 111 """default function for packing datetime objects in JSON."""
116 112 if isinstance(obj, datetime):
117 113 return obj.isoformat()
118 114 else:
119 115 raise TypeError("%r is not JSON serializable"%obj)
120 116
121 117
122 118 # constants for identifying png/jpeg data
123 119 PNG = b'\x89PNG\r\n\x1a\n'
124 120 # front of PNG base64-encoded
125 121 PNG64 = b'iVBORw0KG'
126 122 JPEG = b'\xff\xd8'
127 123 # front of JPEG base64-encoded
128 124 JPEG64 = b'/9'
129 125 # front of PDF base64-encoded
130 126 PDF64 = b'JVBER'
131 127
132 128 def encode_images(format_dict):
133 129 """b64-encodes images in a displaypub format dict
134 130
135 131 Perhaps this should be handled in json_clean itself?
136 132
137 133 Parameters
138 134 ----------
139 135
140 136 format_dict : dict
141 137 A dictionary of display data keyed by mime-type
142 138
143 139 Returns
144 140 -------
145 141
146 142 format_dict : dict
147 143 A copy of the same dictionary,
148 144 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
149 145 is base64-encoded.
150 146
151 147 """
152 148 encoded = format_dict.copy()
153 149
154 150 pngdata = format_dict.get('image/png')
155 151 if isinstance(pngdata, bytes):
156 152 # make sure we don't double-encode
157 153 if not pngdata.startswith(PNG64):
158 154 pngdata = encodebytes(pngdata)
159 155 encoded['image/png'] = pngdata.decode('ascii')
160 156
161 157 jpegdata = format_dict.get('image/jpeg')
162 158 if isinstance(jpegdata, bytes):
163 159 # make sure we don't double-encode
164 160 if not jpegdata.startswith(JPEG64):
165 161 jpegdata = encodebytes(jpegdata)
166 162 encoded['image/jpeg'] = jpegdata.decode('ascii')
167 163
168 164 pdfdata = format_dict.get('application/pdf')
169 165 if isinstance(pdfdata, bytes):
170 166 # make sure we don't double-encode
171 167 if not pdfdata.startswith(PDF64):
172 168 pdfdata = encodebytes(pdfdata)
173 169 encoded['application/pdf'] = pdfdata.decode('ascii')
174 170
175 171 return encoded
176 172
177 173
178 174 def json_clean(obj):
179 175 """Clean an object to ensure it's safe to encode in JSON.
180 176
181 177 Atomic, immutable objects are returned unmodified. Sets and tuples are
182 178 converted to lists, lists are copied and dicts are also copied.
183 179
184 180 Note: dicts whose keys could cause collisions upon encoding (such as a dict
185 181 with both the number 1 and the string '1' as keys) will cause a ValueError
186 182 to be raised.
187 183
188 184 Parameters
189 185 ----------
190 186 obj : any python object
191 187
192 188 Returns
193 189 -------
194 190 out : object
195 191
196 192 A version of the input which will not cause an encoding error when
197 193 encoded as JSON. Note that this function does not *encode* its inputs,
198 194 it simply sanitizes it so that there will be no encoding errors later.
199 195
200 196 Examples
201 197 --------
202 198 >>> json_clean(4)
203 199 4
204 200 >>> json_clean(list(range(10)))
205 201 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
206 202 >>> sorted(json_clean(dict(x=1, y=2)).items())
207 203 [('x', 1), ('y', 2)]
208 204 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
209 205 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
210 206 >>> json_clean(True)
211 207 True
212 208 """
213 209 # types that are 'atomic' and ok in json as-is.
214 210 atomic_ok = (unicode_type, type(None))
215 211
216 212 # containers that we need to convert into lists
217 213 container_to_list = (tuple, set, types.GeneratorType)
218 214
219 215 if isinstance(obj, float):
220 216 # cast out-of-range floats to their reprs
221 217 if math.isnan(obj) or math.isinf(obj):
222 218 return repr(obj)
223 219 return float(obj)
224 220
225 221 if isinstance(obj, int):
226 222 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
227 223 if isinstance(obj, bool):
228 224 # bools are ints, but we don't want to cast them to 0,1
229 225 return obj
230 226 return int(obj)
231 227
232 228 if isinstance(obj, atomic_ok):
233 229 return obj
234 230
235 231 if isinstance(obj, bytes):
236 232 return obj.decode(DEFAULT_ENCODING, 'replace')
237 233
238 234 if isinstance(obj, container_to_list) or (
239 235 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
240 236 obj = list(obj)
241 237
242 238 if isinstance(obj, list):
243 239 return [json_clean(x) for x in obj]
244 240
245 241 if isinstance(obj, dict):
246 242 # First, validate that the dict won't lose data in conversion due to
247 243 # key collisions after stringification. This can happen with keys like
248 244 # True and 'true' or 1 and '1', which collide in JSON.
249 245 nkeys = len(obj)
250 246 nkeys_collapsed = len(set(map(str, obj)))
251 247 if nkeys != nkeys_collapsed:
252 248 raise ValueError('dict can not be safely converted to JSON: '
253 249 'key collision would lead to dropped values')
254 250 # If all OK, proceed by making the new dict that will be json-safe
255 251 out = {}
256 252 for k,v in iteritems(obj):
257 253 out[str(k)] = json_clean(v)
258 254 return out
259 255
260 256 # If we get here, we don't know how to handle the object, so we just get
261 257 # its repr and return that. This will catch lambdas, open sockets, class
262 258 # objects, and any other complicated contraption that json can't encode
263 259 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now