##// END OF EJS Templates
re-cast int/float subclasses to int/float in json_clean...
MinRK -
Show More
@@ -1,241 +1,247 b''
1 1 """Utilities to manipulate JSON objects.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2010-2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING.txt, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 # stdlib
14 14 import math
15 15 import re
16 16 import types
17 17 from datetime import datetime
18 18
19 19 try:
20 20 # base64.encodestring is deprecated in Python 3.x
21 21 from base64 import encodebytes
22 22 except ImportError:
23 23 # Python 2.x
24 24 from base64 import encodestring as encodebytes
25 25
26 26 from IPython.utils import py3compat
27 27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 28 from IPython.utils.encoding import DEFAULT_ENCODING
29 29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Globals and constants
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # timestamp formats
36 36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6})Z?([\+\-]\d{2}:?\d{2})?$")
38 38
39 39 #-----------------------------------------------------------------------------
40 40 # Classes and functions
41 41 #-----------------------------------------------------------------------------
42 42
43 43 def rekey(dikt):
44 44 """Rekey a dict that has been forced to use str keys where there should be
45 45 ints by json."""
46 46 for k in dikt:
47 47 if isinstance(k, string_types):
48 48 ik=fk=None
49 49 try:
50 50 ik = int(k)
51 51 except ValueError:
52 52 try:
53 53 fk = float(k)
54 54 except ValueError:
55 55 continue
56 56 if ik is not None:
57 57 nk = ik
58 58 else:
59 59 nk = fk
60 60 if nk in dikt:
61 61 raise KeyError("already have key %r"%nk)
62 62 dikt[nk] = dikt.pop(k)
63 63 return dikt
64 64
65 65 def parse_date(s):
66 66 """parse an ISO8601 date string
67 67
68 68 If it is None or not a valid ISO8601 timestamp,
69 69 it will be returned unmodified.
70 70 Otherwise, it will return a datetime object.
71 71 """
72 72 if s is None:
73 73 return s
74 74 m = ISO8601_PAT.match(s)
75 75 if m:
76 76 # FIXME: add actual timezone support
77 77 # this just drops the timezone info
78 78 notz = m.groups()[0]
79 79 return datetime.strptime(notz, ISO8601)
80 80 return s
81 81
82 82 def extract_dates(obj):
83 83 """extract ISO8601 dates from unpacked JSON"""
84 84 if isinstance(obj, dict):
85 85 new_obj = {} # don't clobber
86 86 for k,v in iteritems(obj):
87 87 new_obj[k] = extract_dates(v)
88 88 obj = new_obj
89 89 elif isinstance(obj, (list, tuple)):
90 90 obj = [ extract_dates(o) for o in obj ]
91 91 elif isinstance(obj, string_types):
92 92 obj = parse_date(obj)
93 93 return obj
94 94
95 95 def squash_dates(obj):
96 96 """squash datetime objects into ISO8601 strings"""
97 97 if isinstance(obj, dict):
98 98 obj = dict(obj) # don't clobber
99 99 for k,v in iteritems(obj):
100 100 obj[k] = squash_dates(v)
101 101 elif isinstance(obj, (list, tuple)):
102 102 obj = [ squash_dates(o) for o in obj ]
103 103 elif isinstance(obj, datetime):
104 104 obj = obj.isoformat()
105 105 return obj
106 106
107 107 def date_default(obj):
108 108 """default function for packing datetime objects in JSON."""
109 109 if isinstance(obj, datetime):
110 110 return obj.isoformat()
111 111 else:
112 112 raise TypeError("%r is not JSON serializable"%obj)
113 113
114 114
115 115 # constants for identifying png/jpeg data
116 116 PNG = b'\x89PNG\r\n\x1a\n'
117 117 # front of PNG base64-encoded
118 118 PNG64 = b'iVBORw0KG'
119 119 JPEG = b'\xff\xd8'
120 120 # front of JPEG base64-encoded
121 121 JPEG64 = b'/9'
122 122
123 123 def encode_images(format_dict):
124 124 """b64-encodes images in a displaypub format dict
125 125
126 126 Perhaps this should be handled in json_clean itself?
127 127
128 128 Parameters
129 129 ----------
130 130
131 131 format_dict : dict
132 132 A dictionary of display data keyed by mime-type
133 133
134 134 Returns
135 135 -------
136 136
137 137 format_dict : dict
138 138 A copy of the same dictionary,
139 139 but binary image data ('image/png' or 'image/jpeg')
140 140 is base64-encoded.
141 141
142 142 """
143 143 encoded = format_dict.copy()
144 144
145 145 pngdata = format_dict.get('image/png')
146 146 if isinstance(pngdata, bytes):
147 147 # make sure we don't double-encode
148 148 if not pngdata.startswith(PNG64):
149 149 pngdata = encodebytes(pngdata)
150 150 encoded['image/png'] = pngdata.decode('ascii')
151 151
152 152 jpegdata = format_dict.get('image/jpeg')
153 153 if isinstance(jpegdata, bytes):
154 154 # make sure we don't double-encode
155 155 if not jpegdata.startswith(JPEG64):
156 156 jpegdata = encodebytes(jpegdata)
157 157 encoded['image/jpeg'] = jpegdata.decode('ascii')
158 158
159 159 return encoded
160 160
161 161
162 162 def json_clean(obj):
163 163 """Clean an object to ensure it's safe to encode in JSON.
164 164
165 165 Atomic, immutable objects are returned unmodified. Sets and tuples are
166 166 converted to lists, lists are copied and dicts are also copied.
167 167
168 168 Note: dicts whose keys could cause collisions upon encoding (such as a dict
169 169 with both the number 1 and the string '1' as keys) will cause a ValueError
170 170 to be raised.
171 171
172 172 Parameters
173 173 ----------
174 174 obj : any python object
175 175
176 176 Returns
177 177 -------
178 178 out : object
179 179
180 180 A version of the input which will not cause an encoding error when
181 181 encoded as JSON. Note that this function does not *encode* its inputs,
182 182 it simply sanitizes it so that there will be no encoding errors later.
183 183
184 184 Examples
185 185 --------
186 186 >>> json_clean(4)
187 187 4
188 188 >>> json_clean(list(range(10)))
189 189 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
190 190 >>> sorted(json_clean(dict(x=1, y=2)).items())
191 191 [('x', 1), ('y', 2)]
192 192 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
193 193 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
194 194 >>> json_clean(True)
195 195 True
196 196 """
197 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
198 # listed explicitly because bools pass as int instances
199 atomic_ok = (unicode_type, int, type(None))
197 # types that are 'atomic' and ok in json as-is.
198 atomic_ok = (unicode_type, type(None))
200 199
201 200 # containers that we need to convert into lists
202 201 container_to_list = (tuple, set, types.GeneratorType)
203 202
204 203 if isinstance(obj, float):
205 204 # cast out-of-range floats to their reprs
206 205 if math.isnan(obj) or math.isinf(obj):
207 206 return repr(obj)
208 return obj
207 return float(obj)
208
209 if isinstance(obj, int):
210 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
211 if isinstance(obj, bool):
212 # bools are ints, but we don't want to cast them to 0,1
213 return obj
214 return int(obj)
209 215
210 216 if isinstance(obj, atomic_ok):
211 217 return obj
212 218
213 219 if isinstance(obj, bytes):
214 220 return obj.decode(DEFAULT_ENCODING, 'replace')
215 221
216 222 if isinstance(obj, container_to_list) or (
217 223 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
218 224 obj = list(obj)
219 225
220 226 if isinstance(obj, list):
221 227 return [json_clean(x) for x in obj]
222 228
223 229 if isinstance(obj, dict):
224 230 # First, validate that the dict won't lose data in conversion due to
225 231 # key collisions after stringification. This can happen with keys like
226 232 # True and 'true' or 1 and '1', which collide in JSON.
227 233 nkeys = len(obj)
228 234 nkeys_collapsed = len(set(map(str, obj)))
229 235 if nkeys != nkeys_collapsed:
230 236 raise ValueError('dict can not be safely converted to JSON: '
231 237 'key collision would lead to dropped values')
232 238 # If all OK, proceed by making the new dict that will be json-safe
233 239 out = {}
234 240 for k,v in iteritems(obj):
235 241 out[str(k)] = json_clean(v)
236 242 return out
237 243
238 244 # If we get here, we don't know how to handle the object, so we just get
239 245 # its repr and return that. This will catch lambdas, open sockets, class
240 246 # objects, and any other complicated contraption that json can't encode
241 247 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now