##// END OF EJS Templates
make one strptime call at import of jsonutil...
MinRK -
Show More
@@ -1,259 +1,263 b''
1 1 """Utilities to manipulate JSON objects.
2 2 """
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (C) 2010-2011 The IPython Development Team
5 5 #
6 6 # Distributed under the terms of the BSD License. The full license is in
7 7 # the file COPYING.txt, distributed as part of this software.
8 8 #-----------------------------------------------------------------------------
9 9
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 # stdlib
14 14 import math
15 15 import re
16 16 import types
17 17 from datetime import datetime
18 18
19 19 try:
20 20 # base64.encodestring is deprecated in Python 3.x
21 21 from base64 import encodebytes
22 22 except ImportError:
23 23 # Python 2.x
24 24 from base64 import encodestring as encodebytes
25 25
26 26 from IPython.utils import py3compat
27 27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 28 from IPython.utils.encoding import DEFAULT_ENCODING
29 29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Globals and constants
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # timestamp formats
36 36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
38 38
39 # holy crap, strptime is not threadsafe.
40 # Calling it once at import seems to help.
41 datetime.strptime("1", "%d")
42
39 43 #-----------------------------------------------------------------------------
40 44 # Classes and functions
41 45 #-----------------------------------------------------------------------------
42 46
43 47 def rekey(dikt):
44 48 """Rekey a dict that has been forced to use str keys where there should be
45 49 ints by json."""
46 50 for k in dikt:
47 51 if isinstance(k, string_types):
48 52 ik=fk=None
49 53 try:
50 54 ik = int(k)
51 55 except ValueError:
52 56 try:
53 57 fk = float(k)
54 58 except ValueError:
55 59 continue
56 60 if ik is not None:
57 61 nk = ik
58 62 else:
59 63 nk = fk
60 64 if nk in dikt:
61 65 raise KeyError("already have key %r"%nk)
62 66 dikt[nk] = dikt.pop(k)
63 67 return dikt
64 68
65 69 def parse_date(s):
66 70 """parse an ISO8601 date string
67 71
68 72 If it is None or not a valid ISO8601 timestamp,
69 73 it will be returned unmodified.
70 74 Otherwise, it will return a datetime object.
71 75 """
72 76 if s is None:
73 77 return s
74 78 m = ISO8601_PAT.match(s)
75 79 if m:
76 80 # FIXME: add actual timezone support
77 81 # this just drops the timezone info
78 82 notz, ms, tz = m.groups()
79 83 if not ms:
80 84 ms = '.0'
81 85 notz = notz + ms
82 86 return datetime.strptime(notz, ISO8601)
83 87 return s
84 88
85 89 def extract_dates(obj):
86 90 """extract ISO8601 dates from unpacked JSON"""
87 91 if isinstance(obj, dict):
88 92 new_obj = {} # don't clobber
89 93 for k,v in iteritems(obj):
90 94 new_obj[k] = extract_dates(v)
91 95 obj = new_obj
92 96 elif isinstance(obj, (list, tuple)):
93 97 obj = [ extract_dates(o) for o in obj ]
94 98 elif isinstance(obj, string_types):
95 99 obj = parse_date(obj)
96 100 return obj
97 101
98 102 def squash_dates(obj):
99 103 """squash datetime objects into ISO8601 strings"""
100 104 if isinstance(obj, dict):
101 105 obj = dict(obj) # don't clobber
102 106 for k,v in iteritems(obj):
103 107 obj[k] = squash_dates(v)
104 108 elif isinstance(obj, (list, tuple)):
105 109 obj = [ squash_dates(o) for o in obj ]
106 110 elif isinstance(obj, datetime):
107 111 obj = obj.isoformat()
108 112 return obj
109 113
110 114 def date_default(obj):
111 115 """default function for packing datetime objects in JSON."""
112 116 if isinstance(obj, datetime):
113 117 return obj.isoformat()
114 118 else:
115 119 raise TypeError("%r is not JSON serializable"%obj)
116 120
117 121
118 122 # constants for identifying png/jpeg data
119 123 PNG = b'\x89PNG\r\n\x1a\n'
120 124 # front of PNG base64-encoded
121 125 PNG64 = b'iVBORw0KG'
122 126 JPEG = b'\xff\xd8'
123 127 # front of JPEG base64-encoded
124 128 JPEG64 = b'/9'
125 129 # front of PDF base64-encoded
126 130 PDF64 = b'JVBER'
127 131
128 132 def encode_images(format_dict):
129 133 """b64-encodes images in a displaypub format dict
130 134
131 135 Perhaps this should be handled in json_clean itself?
132 136
133 137 Parameters
134 138 ----------
135 139
136 140 format_dict : dict
137 141 A dictionary of display data keyed by mime-type
138 142
139 143 Returns
140 144 -------
141 145
142 146 format_dict : dict
143 147 A copy of the same dictionary,
144 148 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
145 149 is base64-encoded.
146 150
147 151 """
148 152 encoded = format_dict.copy()
149 153
150 154 pngdata = format_dict.get('image/png')
151 155 if isinstance(pngdata, bytes):
152 156 # make sure we don't double-encode
153 157 if not pngdata.startswith(PNG64):
154 158 pngdata = encodebytes(pngdata)
155 159 encoded['image/png'] = pngdata.decode('ascii')
156 160
157 161 jpegdata = format_dict.get('image/jpeg')
158 162 if isinstance(jpegdata, bytes):
159 163 # make sure we don't double-encode
160 164 if not jpegdata.startswith(JPEG64):
161 165 jpegdata = encodebytes(jpegdata)
162 166 encoded['image/jpeg'] = jpegdata.decode('ascii')
163 167
164 168 pdfdata = format_dict.get('application/pdf')
165 169 if isinstance(pdfdata, bytes):
166 170 # make sure we don't double-encode
167 171 if not pdfdata.startswith(PDF64):
168 172 pdfdata = encodebytes(pdfdata)
169 173 encoded['application/pdf'] = pdfdata.decode('ascii')
170 174
171 175 return encoded
172 176
173 177
174 178 def json_clean(obj):
175 179 """Clean an object to ensure it's safe to encode in JSON.
176 180
177 181 Atomic, immutable objects are returned unmodified. Sets and tuples are
178 182 converted to lists, lists are copied and dicts are also copied.
179 183
180 184 Note: dicts whose keys could cause collisions upon encoding (such as a dict
181 185 with both the number 1 and the string '1' as keys) will cause a ValueError
182 186 to be raised.
183 187
184 188 Parameters
185 189 ----------
186 190 obj : any python object
187 191
188 192 Returns
189 193 -------
190 194 out : object
191 195
192 196 A version of the input which will not cause an encoding error when
193 197 encoded as JSON. Note that this function does not *encode* its inputs,
194 198 it simply sanitizes it so that there will be no encoding errors later.
195 199
196 200 Examples
197 201 --------
198 202 >>> json_clean(4)
199 203 4
200 204 >>> json_clean(list(range(10)))
201 205 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
202 206 >>> sorted(json_clean(dict(x=1, y=2)).items())
203 207 [('x', 1), ('y', 2)]
204 208 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
205 209 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
206 210 >>> json_clean(True)
207 211 True
208 212 """
209 213 # types that are 'atomic' and ok in json as-is.
210 214 atomic_ok = (unicode_type, type(None))
211 215
212 216 # containers that we need to convert into lists
213 217 container_to_list = (tuple, set, types.GeneratorType)
214 218
215 219 if isinstance(obj, float):
216 220 # cast out-of-range floats to their reprs
217 221 if math.isnan(obj) or math.isinf(obj):
218 222 return repr(obj)
219 223 return float(obj)
220 224
221 225 if isinstance(obj, int):
222 226 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
223 227 if isinstance(obj, bool):
224 228 # bools are ints, but we don't want to cast them to 0,1
225 229 return obj
226 230 return int(obj)
227 231
228 232 if isinstance(obj, atomic_ok):
229 233 return obj
230 234
231 235 if isinstance(obj, bytes):
232 236 return obj.decode(DEFAULT_ENCODING, 'replace')
233 237
234 238 if isinstance(obj, container_to_list) or (
235 239 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
236 240 obj = list(obj)
237 241
238 242 if isinstance(obj, list):
239 243 return [json_clean(x) for x in obj]
240 244
241 245 if isinstance(obj, dict):
242 246 # First, validate that the dict won't lose data in conversion due to
243 247 # key collisions after stringification. This can happen with keys like
244 248 # True and 'true' or 1 and '1', which collide in JSON.
245 249 nkeys = len(obj)
246 250 nkeys_collapsed = len(set(map(str, obj)))
247 251 if nkeys != nkeys_collapsed:
248 252 raise ValueError('dict can not be safely converted to JSON: '
249 253 'key collision would lead to dropped values')
250 254 # If all OK, proceed by making the new dict that will be json-safe
251 255 out = {}
252 256 for k,v in iteritems(obj):
253 257 out[str(k)] = json_clean(v)
254 258 return out
255 259
256 260 # If we get here, we don't know how to handle the object, so we just get
257 261 # its repr and return that. This will catch lambdas, open sockets, class
258 262 # objects, and any other complicated contraption that json can't encode
259 263 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now