##// END OF EJS Templates
Backport PR #2194: clean nan/inf in json_clean...
MinRK -
Show More
@@ -1,200 +1,207 b''
1 1 """Utilities to manipulate JSON objects.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2010-2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING.txt, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 # stdlib
14 import math
14 15 import re
15 16 import sys
16 17 import types
17 18 from base64 import encodestring
18 19 from datetime import datetime
19 20
20 21 from IPython.utils import py3compat
21 22 from IPython.utils.encoding import DEFAULT_ENCODING
22 23 from IPython.utils import text
23 24 next_attr_name = '__next__' if py3compat.PY3 else 'next'
24 25
25 26 #-----------------------------------------------------------------------------
26 27 # Globals and constants
27 28 #-----------------------------------------------------------------------------
28 29
29 30 # timestamp formats
30 31 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
31 32 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
32 33
33 34 #-----------------------------------------------------------------------------
34 35 # Classes and functions
35 36 #-----------------------------------------------------------------------------
36 37
37 38 def rekey(dikt):
38 39 """Rekey a dict that has been forced to use str keys where there should be
39 40 ints by json."""
40 41 for k in dikt.iterkeys():
41 42 if isinstance(k, basestring):
42 43 ik=fk=None
43 44 try:
44 45 ik = int(k)
45 46 except ValueError:
46 47 try:
47 48 fk = float(k)
48 49 except ValueError:
49 50 continue
50 51 if ik is not None:
51 52 nk = ik
52 53 else:
53 54 nk = fk
54 55 if nk in dikt:
55 56 raise KeyError("already have key %r"%nk)
56 57 dikt[nk] = dikt.pop(k)
57 58 return dikt
58 59
59 60
60 61 def extract_dates(obj):
61 62 """extract ISO8601 dates from unpacked JSON"""
62 63 if isinstance(obj, dict):
63 64 obj = dict(obj) # don't clobber
64 65 for k,v in obj.iteritems():
65 66 obj[k] = extract_dates(v)
66 67 elif isinstance(obj, (list, tuple)):
67 68 obj = [ extract_dates(o) for o in obj ]
68 69 elif isinstance(obj, basestring):
69 70 if ISO8601_PAT.match(obj):
70 71 obj = datetime.strptime(obj, ISO8601)
71 72 return obj
72 73
73 74 def squash_dates(obj):
74 75 """squash datetime objects into ISO8601 strings"""
75 76 if isinstance(obj, dict):
76 77 obj = dict(obj) # don't clobber
77 78 for k,v in obj.iteritems():
78 79 obj[k] = squash_dates(v)
79 80 elif isinstance(obj, (list, tuple)):
80 81 obj = [ squash_dates(o) for o in obj ]
81 82 elif isinstance(obj, datetime):
82 83 obj = obj.strftime(ISO8601)
83 84 return obj
84 85
85 86 def date_default(obj):
86 87 """default function for packing datetime objects in JSON."""
87 88 if isinstance(obj, datetime):
88 89 return obj.strftime(ISO8601)
89 90 else:
90 91 raise TypeError("%r is not JSON serializable"%obj)
91 92
92 93
93 94 # constants for identifying png/jpeg data
94 95 PNG = b'\x89PNG\r\n\x1a\n'
95 96 JPEG = b'\xff\xd8'
96 97
97 98 def encode_images(format_dict):
98 99 """b64-encodes images in a displaypub format dict
99 100
100 101 Perhaps this should be handled in json_clean itself?
101 102
102 103 Parameters
103 104 ----------
104 105
105 106 format_dict : dict
106 107 A dictionary of display data keyed by mime-type
107 108
108 109 Returns
109 110 -------
110 111
111 112 format_dict : dict
112 113 A copy of the same dictionary,
113 114 but binary image data ('image/png' or 'image/jpeg')
114 115 is base64-encoded.
115 116
116 117 """
117 118 encoded = format_dict.copy()
118 119 pngdata = format_dict.get('image/png')
119 120 if isinstance(pngdata, bytes) and pngdata[:8] == PNG:
120 121 encoded['image/png'] = encodestring(pngdata).decode('ascii')
121 122 jpegdata = format_dict.get('image/jpeg')
122 123 if isinstance(jpegdata, bytes) and jpegdata[:2] == JPEG:
123 124 encoded['image/jpeg'] = encodestring(jpegdata).decode('ascii')
124 125 return encoded
125 126
126 127
127 128 def json_clean(obj):
128 129 """Clean an object to ensure it's safe to encode in JSON.
129 130
130 131 Atomic, immutable objects are returned unmodified. Sets and tuples are
131 132 converted to lists, lists are copied and dicts are also copied.
132 133
133 134 Note: dicts whose keys could cause collisions upon encoding (such as a dict
134 135 with both the number 1 and the string '1' as keys) will cause a ValueError
135 136 to be raised.
136 137
137 138 Parameters
138 139 ----------
139 140 obj : any python object
140 141
141 142 Returns
142 143 -------
143 144 out : object
144 145
145 146 A version of the input which will not cause an encoding error when
146 147 encoded as JSON. Note that this function does not *encode* its inputs,
147 148 it simply sanitizes it so that there will be no encoding errors later.
148 149
149 150 Examples
150 151 --------
151 152 >>> json_clean(4)
152 153 4
153 154 >>> json_clean(range(10))
154 155 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
155 156 >>> sorted(json_clean(dict(x=1, y=2)).items())
156 157 [('x', 1), ('y', 2)]
157 158 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
158 159 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
159 160 >>> json_clean(True)
160 161 True
161 162 """
162 163 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
163 164 # listed explicitly because bools pass as int instances
164 atomic_ok = (unicode, int, float, types.NoneType)
165 atomic_ok = (unicode, int, types.NoneType)
165 166
166 167 # containers that we need to convert into lists
167 168 container_to_list = (tuple, set, types.GeneratorType)
168
169
170 if isinstance(obj, float):
171 # cast out-of-range floats to their reprs
172 if math.isnan(obj) or math.isinf(obj):
173 return repr(obj)
174 return obj
175
169 176 if isinstance(obj, atomic_ok):
170 177 return obj
171 178
172 179 if isinstance(obj, bytes):
173 180 return obj.decode(DEFAULT_ENCODING, 'replace')
174 181
175 182 if isinstance(obj, container_to_list) or (
176 183 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
177 184 obj = list(obj)
178 185
179 186 if isinstance(obj, list):
180 187 return [json_clean(x) for x in obj]
181 188
182 189 if isinstance(obj, dict):
183 190 # First, validate that the dict won't lose data in conversion due to
184 191 # key collisions after stringification. This can happen with keys like
185 192 # True and 'true' or 1 and '1', which collide in JSON.
186 193 nkeys = len(obj)
187 194 nkeys_collapsed = len(set(map(str, obj)))
188 195 if nkeys != nkeys_collapsed:
189 196 raise ValueError('dict can not be safely converted to JSON: '
190 197 'key collision would lead to dropped values')
191 198 # If all OK, proceed by making the new dict that will be json-safe
192 199 out = {}
193 200 for k,v in obj.iteritems():
194 201 out[str(k)] = json_clean(v)
195 202 return out
196 203
197 204 # If we get here, we don't know how to handle the object, so we just get
198 205 # its repr and return that. This will catch lambdas, open sockets, class
199 206 # objects, and any other complicated contraption that json can't encode
200 207 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now