##// END OF EJS Templates
clean nan/inf in json_clean...
MinRK -
Show More
@@ -1,200 +1,212
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import re
14 import re
15 import sys
15 import sys
16 import types
16 import types
17 from base64 import encodestring
17 from base64 import encodestring
18 from datetime import datetime
18 from datetime import datetime
19
19
20 from IPython.utils import py3compat
20 from IPython.utils import py3compat
21 from IPython.utils.encoding import DEFAULT_ENCODING
21 from IPython.utils.encoding import DEFAULT_ENCODING
22 from IPython.utils import text
22 from IPython.utils import text
23 next_attr_name = '__next__' if py3compat.PY3 else 'next'
23 next_attr_name = '__next__' if py3compat.PY3 else 'next'
24
24
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Globals and constants
26 # Globals and constants
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28
28
29 # timestamp formats
29 # timestamp formats
30 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
30 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
31 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
31 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
32
32
33 # float constants
34 NAN = float('nan')
35 INF = float('inf')
36 NINF = float('-inf')
37 INFS = (INF, NINF)
38
33 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
34 # Classes and functions
40 # Classes and functions
35 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
36
42
37 def rekey(dikt):
43 def rekey(dikt):
38 """Rekey a dict that has been forced to use str keys where there should be
44 """Rekey a dict that has been forced to use str keys where there should be
39 ints by json."""
45 ints by json."""
40 for k in dikt.iterkeys():
46 for k in dikt.iterkeys():
41 if isinstance(k, basestring):
47 if isinstance(k, basestring):
42 ik=fk=None
48 ik=fk=None
43 try:
49 try:
44 ik = int(k)
50 ik = int(k)
45 except ValueError:
51 except ValueError:
46 try:
52 try:
47 fk = float(k)
53 fk = float(k)
48 except ValueError:
54 except ValueError:
49 continue
55 continue
50 if ik is not None:
56 if ik is not None:
51 nk = ik
57 nk = ik
52 else:
58 else:
53 nk = fk
59 nk = fk
54 if nk in dikt:
60 if nk in dikt:
55 raise KeyError("already have key %r"%nk)
61 raise KeyError("already have key %r"%nk)
56 dikt[nk] = dikt.pop(k)
62 dikt[nk] = dikt.pop(k)
57 return dikt
63 return dikt
58
64
59
65
60 def extract_dates(obj):
66 def extract_dates(obj):
61 """extract ISO8601 dates from unpacked JSON"""
67 """extract ISO8601 dates from unpacked JSON"""
62 if isinstance(obj, dict):
68 if isinstance(obj, dict):
63 obj = dict(obj) # don't clobber
69 obj = dict(obj) # don't clobber
64 for k,v in obj.iteritems():
70 for k,v in obj.iteritems():
65 obj[k] = extract_dates(v)
71 obj[k] = extract_dates(v)
66 elif isinstance(obj, (list, tuple)):
72 elif isinstance(obj, (list, tuple)):
67 obj = [ extract_dates(o) for o in obj ]
73 obj = [ extract_dates(o) for o in obj ]
68 elif isinstance(obj, basestring):
74 elif isinstance(obj, basestring):
69 if ISO8601_PAT.match(obj):
75 if ISO8601_PAT.match(obj):
70 obj = datetime.strptime(obj, ISO8601)
76 obj = datetime.strptime(obj, ISO8601)
71 return obj
77 return obj
72
78
73 def squash_dates(obj):
79 def squash_dates(obj):
74 """squash datetime objects into ISO8601 strings"""
80 """squash datetime objects into ISO8601 strings"""
75 if isinstance(obj, dict):
81 if isinstance(obj, dict):
76 obj = dict(obj) # don't clobber
82 obj = dict(obj) # don't clobber
77 for k,v in obj.iteritems():
83 for k,v in obj.iteritems():
78 obj[k] = squash_dates(v)
84 obj[k] = squash_dates(v)
79 elif isinstance(obj, (list, tuple)):
85 elif isinstance(obj, (list, tuple)):
80 obj = [ squash_dates(o) for o in obj ]
86 obj = [ squash_dates(o) for o in obj ]
81 elif isinstance(obj, datetime):
87 elif isinstance(obj, datetime):
82 obj = obj.strftime(ISO8601)
88 obj = obj.strftime(ISO8601)
83 return obj
89 return obj
84
90
85 def date_default(obj):
91 def date_default(obj):
86 """default function for packing datetime objects in JSON."""
92 """default function for packing datetime objects in JSON."""
87 if isinstance(obj, datetime):
93 if isinstance(obj, datetime):
88 return obj.strftime(ISO8601)
94 return obj.strftime(ISO8601)
89 else:
95 else:
90 raise TypeError("%r is not JSON serializable"%obj)
96 raise TypeError("%r is not JSON serializable"%obj)
91
97
92
98
93 # constants for identifying png/jpeg data
99 # constants for identifying png/jpeg data
94 PNG = b'\x89PNG\r\n\x1a\n'
100 PNG = b'\x89PNG\r\n\x1a\n'
95 JPEG = b'\xff\xd8'
101 JPEG = b'\xff\xd8'
96
102
97 def encode_images(format_dict):
103 def encode_images(format_dict):
98 """b64-encodes images in a displaypub format dict
104 """b64-encodes images in a displaypub format dict
99
105
100 Perhaps this should be handled in json_clean itself?
106 Perhaps this should be handled in json_clean itself?
101
107
102 Parameters
108 Parameters
103 ----------
109 ----------
104
110
105 format_dict : dict
111 format_dict : dict
106 A dictionary of display data keyed by mime-type
112 A dictionary of display data keyed by mime-type
107
113
108 Returns
114 Returns
109 -------
115 -------
110
116
111 format_dict : dict
117 format_dict : dict
112 A copy of the same dictionary,
118 A copy of the same dictionary,
113 but binary image data ('image/png' or 'image/jpeg')
119 but binary image data ('image/png' or 'image/jpeg')
114 is base64-encoded.
120 is base64-encoded.
115
121
116 """
122 """
117 encoded = format_dict.copy()
123 encoded = format_dict.copy()
118 pngdata = format_dict.get('image/png')
124 pngdata = format_dict.get('image/png')
119 if isinstance(pngdata, bytes) and pngdata[:8] == PNG:
125 if isinstance(pngdata, bytes) and pngdata[:8] == PNG:
120 encoded['image/png'] = encodestring(pngdata).decode('ascii')
126 encoded['image/png'] = encodestring(pngdata).decode('ascii')
121 jpegdata = format_dict.get('image/jpeg')
127 jpegdata = format_dict.get('image/jpeg')
122 if isinstance(jpegdata, bytes) and jpegdata[:2] == JPEG:
128 if isinstance(jpegdata, bytes) and jpegdata[:2] == JPEG:
123 encoded['image/jpeg'] = encodestring(jpegdata).decode('ascii')
129 encoded['image/jpeg'] = encodestring(jpegdata).decode('ascii')
124 return encoded
130 return encoded
125
131
126
132
127 def json_clean(obj):
133 def json_clean(obj):
128 """Clean an object to ensure it's safe to encode in JSON.
134 """Clean an object to ensure it's safe to encode in JSON.
129
135
130 Atomic, immutable objects are returned unmodified. Sets and tuples are
136 Atomic, immutable objects are returned unmodified. Sets and tuples are
131 converted to lists, lists are copied and dicts are also copied.
137 converted to lists, lists are copied and dicts are also copied.
132
138
133 Note: dicts whose keys could cause collisions upon encoding (such as a dict
139 Note: dicts whose keys could cause collisions upon encoding (such as a dict
134 with both the number 1 and the string '1' as keys) will cause a ValueError
140 with both the number 1 and the string '1' as keys) will cause a ValueError
135 to be raised.
141 to be raised.
136
142
137 Parameters
143 Parameters
138 ----------
144 ----------
139 obj : any python object
145 obj : any python object
140
146
141 Returns
147 Returns
142 -------
148 -------
143 out : object
149 out : object
144
150
145 A version of the input which will not cause an encoding error when
151 A version of the input which will not cause an encoding error when
146 encoded as JSON. Note that this function does not *encode* its inputs,
152 encoded as JSON. Note that this function does not *encode* its inputs,
147 it simply sanitizes it so that there will be no encoding errors later.
153 it simply sanitizes it so that there will be no encoding errors later.
148
154
149 Examples
155 Examples
150 --------
156 --------
151 >>> json_clean(4)
157 >>> json_clean(4)
152 4
158 4
153 >>> json_clean(range(10))
159 >>> json_clean(range(10))
154 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
160 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
155 >>> sorted(json_clean(dict(x=1, y=2)).items())
161 >>> sorted(json_clean(dict(x=1, y=2)).items())
156 [('x', 1), ('y', 2)]
162 [('x', 1), ('y', 2)]
157 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
163 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
158 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
164 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
159 >>> json_clean(True)
165 >>> json_clean(True)
160 True
166 True
161 """
167 """
162 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
168 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
163 # listed explicitly because bools pass as int instances
169 # listed explicitly because bools pass as int instances
164 atomic_ok = (unicode, int, float, types.NoneType)
170 atomic_ok = (unicode, int, types.NoneType)
165
171
166 # containers that we need to convert into lists
172 # containers that we need to convert into lists
167 container_to_list = (tuple, set, types.GeneratorType)
173 container_to_list = (tuple, set, types.GeneratorType)
168
174
175 if isinstance(obj, float):
176 # cast out-of-range floats to their reprs
177 if obj != obj or obj in INFS:
178 return repr(obj)
179 return obj
180
169 if isinstance(obj, atomic_ok):
181 if isinstance(obj, atomic_ok):
170 return obj
182 return obj
171
183
172 if isinstance(obj, bytes):
184 if isinstance(obj, bytes):
173 return obj.decode(DEFAULT_ENCODING, 'replace')
185 return obj.decode(DEFAULT_ENCODING, 'replace')
174
186
175 if isinstance(obj, container_to_list) or (
187 if isinstance(obj, container_to_list) or (
176 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
188 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
177 obj = list(obj)
189 obj = list(obj)
178
190
179 if isinstance(obj, list):
191 if isinstance(obj, list):
180 return [json_clean(x) for x in obj]
192 return [json_clean(x) for x in obj]
181
193
182 if isinstance(obj, dict):
194 if isinstance(obj, dict):
183 # First, validate that the dict won't lose data in conversion due to
195 # First, validate that the dict won't lose data in conversion due to
184 # key collisions after stringification. This can happen with keys like
196 # key collisions after stringification. This can happen with keys like
185 # True and 'true' or 1 and '1', which collide in JSON.
197 # True and 'true' or 1 and '1', which collide in JSON.
186 nkeys = len(obj)
198 nkeys = len(obj)
187 nkeys_collapsed = len(set(map(str, obj)))
199 nkeys_collapsed = len(set(map(str, obj)))
188 if nkeys != nkeys_collapsed:
200 if nkeys != nkeys_collapsed:
189 raise ValueError('dict can not be safely converted to JSON: '
201 raise ValueError('dict can not be safely converted to JSON: '
190 'key collision would lead to dropped values')
202 'key collision would lead to dropped values')
191 # If all OK, proceed by making the new dict that will be json-safe
203 # If all OK, proceed by making the new dict that will be json-safe
192 out = {}
204 out = {}
193 for k,v in obj.iteritems():
205 for k,v in obj.iteritems():
194 out[str(k)] = json_clean(v)
206 out[str(k)] = json_clean(v)
195 return out
207 return out
196
208
197 # If we get here, we don't know how to handle the object, so we just get
209 # If we get here, we don't know how to handle the object, so we just get
198 # its repr and return that. This will catch lambdas, open sockets, class
210 # its repr and return that. This will catch lambdas, open sockets, class
199 # objects, and any other complicated contraption that json can't encode
211 # objects, and any other complicated contraption that json can't encode
200 return repr(obj)
212 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now