##// END OF EJS Templates
re-cast int/float subclasses to int/float in json_clean...
MinRK -
Show More
@@ -1,241 +1,247 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 from IPython.utils.encoding import DEFAULT_ENCODING
28 from IPython.utils.encoding import DEFAULT_ENCODING
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30
30
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32 # Globals and constants
32 # Globals and constants
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # timestamp formats
35 # timestamp formats
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6})Z?([\+\-]\d{2}:?\d{2})?$")
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6})Z?([\+\-]\d{2}:?\d{2})?$")
38
38
39 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
40 # Classes and functions
40 # Classes and functions
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42
42
43 def rekey(dikt):
43 def rekey(dikt):
44 """Rekey a dict that has been forced to use str keys where there should be
44 """Rekey a dict that has been forced to use str keys where there should be
45 ints by json."""
45 ints by json."""
46 for k in dikt:
46 for k in dikt:
47 if isinstance(k, string_types):
47 if isinstance(k, string_types):
48 ik=fk=None
48 ik=fk=None
49 try:
49 try:
50 ik = int(k)
50 ik = int(k)
51 except ValueError:
51 except ValueError:
52 try:
52 try:
53 fk = float(k)
53 fk = float(k)
54 except ValueError:
54 except ValueError:
55 continue
55 continue
56 if ik is not None:
56 if ik is not None:
57 nk = ik
57 nk = ik
58 else:
58 else:
59 nk = fk
59 nk = fk
60 if nk in dikt:
60 if nk in dikt:
61 raise KeyError("already have key %r"%nk)
61 raise KeyError("already have key %r"%nk)
62 dikt[nk] = dikt.pop(k)
62 dikt[nk] = dikt.pop(k)
63 return dikt
63 return dikt
64
64
65 def parse_date(s):
65 def parse_date(s):
66 """parse an ISO8601 date string
66 """parse an ISO8601 date string
67
67
68 If it is None or not a valid ISO8601 timestamp,
68 If it is None or not a valid ISO8601 timestamp,
69 it will be returned unmodified.
69 it will be returned unmodified.
70 Otherwise, it will return a datetime object.
70 Otherwise, it will return a datetime object.
71 """
71 """
72 if s is None:
72 if s is None:
73 return s
73 return s
74 m = ISO8601_PAT.match(s)
74 m = ISO8601_PAT.match(s)
75 if m:
75 if m:
76 # FIXME: add actual timezone support
76 # FIXME: add actual timezone support
77 # this just drops the timezone info
77 # this just drops the timezone info
78 notz = m.groups()[0]
78 notz = m.groups()[0]
79 return datetime.strptime(notz, ISO8601)
79 return datetime.strptime(notz, ISO8601)
80 return s
80 return s
81
81
82 def extract_dates(obj):
82 def extract_dates(obj):
83 """extract ISO8601 dates from unpacked JSON"""
83 """extract ISO8601 dates from unpacked JSON"""
84 if isinstance(obj, dict):
84 if isinstance(obj, dict):
85 new_obj = {} # don't clobber
85 new_obj = {} # don't clobber
86 for k,v in iteritems(obj):
86 for k,v in iteritems(obj):
87 new_obj[k] = extract_dates(v)
87 new_obj[k] = extract_dates(v)
88 obj = new_obj
88 obj = new_obj
89 elif isinstance(obj, (list, tuple)):
89 elif isinstance(obj, (list, tuple)):
90 obj = [ extract_dates(o) for o in obj ]
90 obj = [ extract_dates(o) for o in obj ]
91 elif isinstance(obj, string_types):
91 elif isinstance(obj, string_types):
92 obj = parse_date(obj)
92 obj = parse_date(obj)
93 return obj
93 return obj
94
94
95 def squash_dates(obj):
95 def squash_dates(obj):
96 """squash datetime objects into ISO8601 strings"""
96 """squash datetime objects into ISO8601 strings"""
97 if isinstance(obj, dict):
97 if isinstance(obj, dict):
98 obj = dict(obj) # don't clobber
98 obj = dict(obj) # don't clobber
99 for k,v in iteritems(obj):
99 for k,v in iteritems(obj):
100 obj[k] = squash_dates(v)
100 obj[k] = squash_dates(v)
101 elif isinstance(obj, (list, tuple)):
101 elif isinstance(obj, (list, tuple)):
102 obj = [ squash_dates(o) for o in obj ]
102 obj = [ squash_dates(o) for o in obj ]
103 elif isinstance(obj, datetime):
103 elif isinstance(obj, datetime):
104 obj = obj.isoformat()
104 obj = obj.isoformat()
105 return obj
105 return obj
106
106
107 def date_default(obj):
107 def date_default(obj):
108 """default function for packing datetime objects in JSON."""
108 """default function for packing datetime objects in JSON."""
109 if isinstance(obj, datetime):
109 if isinstance(obj, datetime):
110 return obj.isoformat()
110 return obj.isoformat()
111 else:
111 else:
112 raise TypeError("%r is not JSON serializable"%obj)
112 raise TypeError("%r is not JSON serializable"%obj)
113
113
114
114
115 # constants for identifying png/jpeg data
115 # constants for identifying png/jpeg data
116 PNG = b'\x89PNG\r\n\x1a\n'
116 PNG = b'\x89PNG\r\n\x1a\n'
117 # front of PNG base64-encoded
117 # front of PNG base64-encoded
118 PNG64 = b'iVBORw0KG'
118 PNG64 = b'iVBORw0KG'
119 JPEG = b'\xff\xd8'
119 JPEG = b'\xff\xd8'
120 # front of JPEG base64-encoded
120 # front of JPEG base64-encoded
121 JPEG64 = b'/9'
121 JPEG64 = b'/9'
122
122
123 def encode_images(format_dict):
123 def encode_images(format_dict):
124 """b64-encodes images in a displaypub format dict
124 """b64-encodes images in a displaypub format dict
125
125
126 Perhaps this should be handled in json_clean itself?
126 Perhaps this should be handled in json_clean itself?
127
127
128 Parameters
128 Parameters
129 ----------
129 ----------
130
130
131 format_dict : dict
131 format_dict : dict
132 A dictionary of display data keyed by mime-type
132 A dictionary of display data keyed by mime-type
133
133
134 Returns
134 Returns
135 -------
135 -------
136
136
137 format_dict : dict
137 format_dict : dict
138 A copy of the same dictionary,
138 A copy of the same dictionary,
139 but binary image data ('image/png' or 'image/jpeg')
139 but binary image data ('image/png' or 'image/jpeg')
140 is base64-encoded.
140 is base64-encoded.
141
141
142 """
142 """
143 encoded = format_dict.copy()
143 encoded = format_dict.copy()
144
144
145 pngdata = format_dict.get('image/png')
145 pngdata = format_dict.get('image/png')
146 if isinstance(pngdata, bytes):
146 if isinstance(pngdata, bytes):
147 # make sure we don't double-encode
147 # make sure we don't double-encode
148 if not pngdata.startswith(PNG64):
148 if not pngdata.startswith(PNG64):
149 pngdata = encodebytes(pngdata)
149 pngdata = encodebytes(pngdata)
150 encoded['image/png'] = pngdata.decode('ascii')
150 encoded['image/png'] = pngdata.decode('ascii')
151
151
152 jpegdata = format_dict.get('image/jpeg')
152 jpegdata = format_dict.get('image/jpeg')
153 if isinstance(jpegdata, bytes):
153 if isinstance(jpegdata, bytes):
154 # make sure we don't double-encode
154 # make sure we don't double-encode
155 if not jpegdata.startswith(JPEG64):
155 if not jpegdata.startswith(JPEG64):
156 jpegdata = encodebytes(jpegdata)
156 jpegdata = encodebytes(jpegdata)
157 encoded['image/jpeg'] = jpegdata.decode('ascii')
157 encoded['image/jpeg'] = jpegdata.decode('ascii')
158
158
159 return encoded
159 return encoded
160
160
161
161
162 def json_clean(obj):
162 def json_clean(obj):
163 """Clean an object to ensure it's safe to encode in JSON.
163 """Clean an object to ensure it's safe to encode in JSON.
164
164
165 Atomic, immutable objects are returned unmodified. Sets and tuples are
165 Atomic, immutable objects are returned unmodified. Sets and tuples are
166 converted to lists, lists are copied and dicts are also copied.
166 converted to lists, lists are copied and dicts are also copied.
167
167
168 Note: dicts whose keys could cause collisions upon encoding (such as a dict
168 Note: dicts whose keys could cause collisions upon encoding (such as a dict
169 with both the number 1 and the string '1' as keys) will cause a ValueError
169 with both the number 1 and the string '1' as keys) will cause a ValueError
170 to be raised.
170 to be raised.
171
171
172 Parameters
172 Parameters
173 ----------
173 ----------
174 obj : any python object
174 obj : any python object
175
175
176 Returns
176 Returns
177 -------
177 -------
178 out : object
178 out : object
179
179
180 A version of the input which will not cause an encoding error when
180 A version of the input which will not cause an encoding error when
181 encoded as JSON. Note that this function does not *encode* its inputs,
181 encoded as JSON. Note that this function does not *encode* its inputs,
182 it simply sanitizes it so that there will be no encoding errors later.
182 it simply sanitizes it so that there will be no encoding errors later.
183
183
184 Examples
184 Examples
185 --------
185 --------
186 >>> json_clean(4)
186 >>> json_clean(4)
187 4
187 4
188 >>> json_clean(list(range(10)))
188 >>> json_clean(list(range(10)))
189 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
189 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
190 >>> sorted(json_clean(dict(x=1, y=2)).items())
190 >>> sorted(json_clean(dict(x=1, y=2)).items())
191 [('x', 1), ('y', 2)]
191 [('x', 1), ('y', 2)]
192 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
192 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
193 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
193 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
194 >>> json_clean(True)
194 >>> json_clean(True)
195 True
195 True
196 """
196 """
197 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
197 # types that are 'atomic' and ok in json as-is.
198 # listed explicitly because bools pass as int instances
198 atomic_ok = (unicode_type, type(None))
199 atomic_ok = (unicode_type, int, type(None))
200
199
201 # containers that we need to convert into lists
200 # containers that we need to convert into lists
202 container_to_list = (tuple, set, types.GeneratorType)
201 container_to_list = (tuple, set, types.GeneratorType)
203
202
204 if isinstance(obj, float):
203 if isinstance(obj, float):
205 # cast out-of-range floats to their reprs
204 # cast out-of-range floats to their reprs
206 if math.isnan(obj) or math.isinf(obj):
205 if math.isnan(obj) or math.isinf(obj):
207 return repr(obj)
206 return repr(obj)
207 return float(obj)
208
209 if isinstance(obj, int):
210 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
211 if isinstance(obj, bool):
212 # bools are ints, but we don't want to cast them to 0,1
208 return obj
213 return obj
214 return int(obj)
209
215
210 if isinstance(obj, atomic_ok):
216 if isinstance(obj, atomic_ok):
211 return obj
217 return obj
212
218
213 if isinstance(obj, bytes):
219 if isinstance(obj, bytes):
214 return obj.decode(DEFAULT_ENCODING, 'replace')
220 return obj.decode(DEFAULT_ENCODING, 'replace')
215
221
216 if isinstance(obj, container_to_list) or (
222 if isinstance(obj, container_to_list) or (
217 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
223 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
218 obj = list(obj)
224 obj = list(obj)
219
225
220 if isinstance(obj, list):
226 if isinstance(obj, list):
221 return [json_clean(x) for x in obj]
227 return [json_clean(x) for x in obj]
222
228
223 if isinstance(obj, dict):
229 if isinstance(obj, dict):
224 # First, validate that the dict won't lose data in conversion due to
230 # First, validate that the dict won't lose data in conversion due to
225 # key collisions after stringification. This can happen with keys like
231 # key collisions after stringification. This can happen with keys like
226 # True and 'true' or 1 and '1', which collide in JSON.
232 # True and 'true' or 1 and '1', which collide in JSON.
227 nkeys = len(obj)
233 nkeys = len(obj)
228 nkeys_collapsed = len(set(map(str, obj)))
234 nkeys_collapsed = len(set(map(str, obj)))
229 if nkeys != nkeys_collapsed:
235 if nkeys != nkeys_collapsed:
230 raise ValueError('dict can not be safely converted to JSON: '
236 raise ValueError('dict can not be safely converted to JSON: '
231 'key collision would lead to dropped values')
237 'key collision would lead to dropped values')
232 # If all OK, proceed by making the new dict that will be json-safe
238 # If all OK, proceed by making the new dict that will be json-safe
233 out = {}
239 out = {}
234 for k,v in iteritems(obj):
240 for k,v in iteritems(obj):
235 out[str(k)] = json_clean(v)
241 out[str(k)] = json_clean(v)
236 return out
242 return out
237
243
238 # If we get here, we don't know how to handle the object, so we just get
244 # If we get here, we don't know how to handle the object, so we just get
239 # its repr and return that. This will catch lambdas, open sockets, class
245 # its repr and return that. This will catch lambdas, open sockets, class
240 # objects, and any other complicated contraption that json can't encode
246 # objects, and any other complicated contraption that json can't encode
241 return repr(obj)
247 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now