##// END OF EJS Templates
make one strptime call at import of jsonutil...
MinRK -
Show More
@@ -1,259 +1,263 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 from IPython.utils.encoding import DEFAULT_ENCODING
28 from IPython.utils.encoding import DEFAULT_ENCODING
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30
30
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32 # Globals and constants
32 # Globals and constants
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # timestamp formats
35 # timestamp formats
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
38
38
39 # holy crap, strptime is not threadsafe.
40 # Calling it once at import seems to help.
41 datetime.strptime("1", "%d")
42
39 #-----------------------------------------------------------------------------
43 #-----------------------------------------------------------------------------
40 # Classes and functions
44 # Classes and functions
41 #-----------------------------------------------------------------------------
45 #-----------------------------------------------------------------------------
42
46
43 def rekey(dikt):
47 def rekey(dikt):
44 """Rekey a dict that has been forced to use str keys where there should be
48 """Rekey a dict that has been forced to use str keys where there should be
45 ints by json."""
49 ints by json."""
46 for k in dikt:
50 for k in dikt:
47 if isinstance(k, string_types):
51 if isinstance(k, string_types):
48 ik=fk=None
52 ik=fk=None
49 try:
53 try:
50 ik = int(k)
54 ik = int(k)
51 except ValueError:
55 except ValueError:
52 try:
56 try:
53 fk = float(k)
57 fk = float(k)
54 except ValueError:
58 except ValueError:
55 continue
59 continue
56 if ik is not None:
60 if ik is not None:
57 nk = ik
61 nk = ik
58 else:
62 else:
59 nk = fk
63 nk = fk
60 if nk in dikt:
64 if nk in dikt:
61 raise KeyError("already have key %r"%nk)
65 raise KeyError("already have key %r"%nk)
62 dikt[nk] = dikt.pop(k)
66 dikt[nk] = dikt.pop(k)
63 return dikt
67 return dikt
64
68
65 def parse_date(s):
69 def parse_date(s):
66 """parse an ISO8601 date string
70 """parse an ISO8601 date string
67
71
68 If it is None or not a valid ISO8601 timestamp,
72 If it is None or not a valid ISO8601 timestamp,
69 it will be returned unmodified.
73 it will be returned unmodified.
70 Otherwise, it will return a datetime object.
74 Otherwise, it will return a datetime object.
71 """
75 """
72 if s is None:
76 if s is None:
73 return s
77 return s
74 m = ISO8601_PAT.match(s)
78 m = ISO8601_PAT.match(s)
75 if m:
79 if m:
76 # FIXME: add actual timezone support
80 # FIXME: add actual timezone support
77 # this just drops the timezone info
81 # this just drops the timezone info
78 notz, ms, tz = m.groups()
82 notz, ms, tz = m.groups()
79 if not ms:
83 if not ms:
80 ms = '.0'
84 ms = '.0'
81 notz = notz + ms
85 notz = notz + ms
82 return datetime.strptime(notz, ISO8601)
86 return datetime.strptime(notz, ISO8601)
83 return s
87 return s
84
88
85 def extract_dates(obj):
89 def extract_dates(obj):
86 """extract ISO8601 dates from unpacked JSON"""
90 """extract ISO8601 dates from unpacked JSON"""
87 if isinstance(obj, dict):
91 if isinstance(obj, dict):
88 new_obj = {} # don't clobber
92 new_obj = {} # don't clobber
89 for k,v in iteritems(obj):
93 for k,v in iteritems(obj):
90 new_obj[k] = extract_dates(v)
94 new_obj[k] = extract_dates(v)
91 obj = new_obj
95 obj = new_obj
92 elif isinstance(obj, (list, tuple)):
96 elif isinstance(obj, (list, tuple)):
93 obj = [ extract_dates(o) for o in obj ]
97 obj = [ extract_dates(o) for o in obj ]
94 elif isinstance(obj, string_types):
98 elif isinstance(obj, string_types):
95 obj = parse_date(obj)
99 obj = parse_date(obj)
96 return obj
100 return obj
97
101
98 def squash_dates(obj):
102 def squash_dates(obj):
99 """squash datetime objects into ISO8601 strings"""
103 """squash datetime objects into ISO8601 strings"""
100 if isinstance(obj, dict):
104 if isinstance(obj, dict):
101 obj = dict(obj) # don't clobber
105 obj = dict(obj) # don't clobber
102 for k,v in iteritems(obj):
106 for k,v in iteritems(obj):
103 obj[k] = squash_dates(v)
107 obj[k] = squash_dates(v)
104 elif isinstance(obj, (list, tuple)):
108 elif isinstance(obj, (list, tuple)):
105 obj = [ squash_dates(o) for o in obj ]
109 obj = [ squash_dates(o) for o in obj ]
106 elif isinstance(obj, datetime):
110 elif isinstance(obj, datetime):
107 obj = obj.isoformat()
111 obj = obj.isoformat()
108 return obj
112 return obj
109
113
110 def date_default(obj):
114 def date_default(obj):
111 """default function for packing datetime objects in JSON."""
115 """default function for packing datetime objects in JSON."""
112 if isinstance(obj, datetime):
116 if isinstance(obj, datetime):
113 return obj.isoformat()
117 return obj.isoformat()
114 else:
118 else:
115 raise TypeError("%r is not JSON serializable"%obj)
119 raise TypeError("%r is not JSON serializable"%obj)
116
120
117
121
118 # constants for identifying png/jpeg data
122 # constants for identifying png/jpeg data
119 PNG = b'\x89PNG\r\n\x1a\n'
123 PNG = b'\x89PNG\r\n\x1a\n'
120 # front of PNG base64-encoded
124 # front of PNG base64-encoded
121 PNG64 = b'iVBORw0KG'
125 PNG64 = b'iVBORw0KG'
122 JPEG = b'\xff\xd8'
126 JPEG = b'\xff\xd8'
123 # front of JPEG base64-encoded
127 # front of JPEG base64-encoded
124 JPEG64 = b'/9'
128 JPEG64 = b'/9'
125 # front of PDF base64-encoded
129 # front of PDF base64-encoded
126 PDF64 = b'JVBER'
130 PDF64 = b'JVBER'
127
131
128 def encode_images(format_dict):
132 def encode_images(format_dict):
129 """b64-encodes images in a displaypub format dict
133 """b64-encodes images in a displaypub format dict
130
134
131 Perhaps this should be handled in json_clean itself?
135 Perhaps this should be handled in json_clean itself?
132
136
133 Parameters
137 Parameters
134 ----------
138 ----------
135
139
136 format_dict : dict
140 format_dict : dict
137 A dictionary of display data keyed by mime-type
141 A dictionary of display data keyed by mime-type
138
142
139 Returns
143 Returns
140 -------
144 -------
141
145
142 format_dict : dict
146 format_dict : dict
143 A copy of the same dictionary,
147 A copy of the same dictionary,
144 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
148 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
145 is base64-encoded.
149 is base64-encoded.
146
150
147 """
151 """
148 encoded = format_dict.copy()
152 encoded = format_dict.copy()
149
153
150 pngdata = format_dict.get('image/png')
154 pngdata = format_dict.get('image/png')
151 if isinstance(pngdata, bytes):
155 if isinstance(pngdata, bytes):
152 # make sure we don't double-encode
156 # make sure we don't double-encode
153 if not pngdata.startswith(PNG64):
157 if not pngdata.startswith(PNG64):
154 pngdata = encodebytes(pngdata)
158 pngdata = encodebytes(pngdata)
155 encoded['image/png'] = pngdata.decode('ascii')
159 encoded['image/png'] = pngdata.decode('ascii')
156
160
157 jpegdata = format_dict.get('image/jpeg')
161 jpegdata = format_dict.get('image/jpeg')
158 if isinstance(jpegdata, bytes):
162 if isinstance(jpegdata, bytes):
159 # make sure we don't double-encode
163 # make sure we don't double-encode
160 if not jpegdata.startswith(JPEG64):
164 if not jpegdata.startswith(JPEG64):
161 jpegdata = encodebytes(jpegdata)
165 jpegdata = encodebytes(jpegdata)
162 encoded['image/jpeg'] = jpegdata.decode('ascii')
166 encoded['image/jpeg'] = jpegdata.decode('ascii')
163
167
164 pdfdata = format_dict.get('application/pdf')
168 pdfdata = format_dict.get('application/pdf')
165 if isinstance(pdfdata, bytes):
169 if isinstance(pdfdata, bytes):
166 # make sure we don't double-encode
170 # make sure we don't double-encode
167 if not pdfdata.startswith(PDF64):
171 if not pdfdata.startswith(PDF64):
168 pdfdata = encodebytes(pdfdata)
172 pdfdata = encodebytes(pdfdata)
169 encoded['application/pdf'] = pdfdata.decode('ascii')
173 encoded['application/pdf'] = pdfdata.decode('ascii')
170
174
171 return encoded
175 return encoded
172
176
173
177
174 def json_clean(obj):
178 def json_clean(obj):
175 """Clean an object to ensure it's safe to encode in JSON.
179 """Clean an object to ensure it's safe to encode in JSON.
176
180
177 Atomic, immutable objects are returned unmodified. Sets and tuples are
181 Atomic, immutable objects are returned unmodified. Sets and tuples are
178 converted to lists, lists are copied and dicts are also copied.
182 converted to lists, lists are copied and dicts are also copied.
179
183
180 Note: dicts whose keys could cause collisions upon encoding (such as a dict
184 Note: dicts whose keys could cause collisions upon encoding (such as a dict
181 with both the number 1 and the string '1' as keys) will cause a ValueError
185 with both the number 1 and the string '1' as keys) will cause a ValueError
182 to be raised.
186 to be raised.
183
187
184 Parameters
188 Parameters
185 ----------
189 ----------
186 obj : any python object
190 obj : any python object
187
191
188 Returns
192 Returns
189 -------
193 -------
190 out : object
194 out : object
191
195
192 A version of the input which will not cause an encoding error when
196 A version of the input which will not cause an encoding error when
193 encoded as JSON. Note that this function does not *encode* its inputs,
197 encoded as JSON. Note that this function does not *encode* its inputs,
194 it simply sanitizes it so that there will be no encoding errors later.
198 it simply sanitizes it so that there will be no encoding errors later.
195
199
196 Examples
200 Examples
197 --------
201 --------
198 >>> json_clean(4)
202 >>> json_clean(4)
199 4
203 4
200 >>> json_clean(list(range(10)))
204 >>> json_clean(list(range(10)))
201 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
205 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
202 >>> sorted(json_clean(dict(x=1, y=2)).items())
206 >>> sorted(json_clean(dict(x=1, y=2)).items())
203 [('x', 1), ('y', 2)]
207 [('x', 1), ('y', 2)]
204 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
208 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
205 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
209 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
206 >>> json_clean(True)
210 >>> json_clean(True)
207 True
211 True
208 """
212 """
209 # types that are 'atomic' and ok in json as-is.
213 # types that are 'atomic' and ok in json as-is.
210 atomic_ok = (unicode_type, type(None))
214 atomic_ok = (unicode_type, type(None))
211
215
212 # containers that we need to convert into lists
216 # containers that we need to convert into lists
213 container_to_list = (tuple, set, types.GeneratorType)
217 container_to_list = (tuple, set, types.GeneratorType)
214
218
215 if isinstance(obj, float):
219 if isinstance(obj, float):
216 # cast out-of-range floats to their reprs
220 # cast out-of-range floats to their reprs
217 if math.isnan(obj) or math.isinf(obj):
221 if math.isnan(obj) or math.isinf(obj):
218 return repr(obj)
222 return repr(obj)
219 return float(obj)
223 return float(obj)
220
224
221 if isinstance(obj, int):
225 if isinstance(obj, int):
222 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
226 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
223 if isinstance(obj, bool):
227 if isinstance(obj, bool):
224 # bools are ints, but we don't want to cast them to 0,1
228 # bools are ints, but we don't want to cast them to 0,1
225 return obj
229 return obj
226 return int(obj)
230 return int(obj)
227
231
228 if isinstance(obj, atomic_ok):
232 if isinstance(obj, atomic_ok):
229 return obj
233 return obj
230
234
231 if isinstance(obj, bytes):
235 if isinstance(obj, bytes):
232 return obj.decode(DEFAULT_ENCODING, 'replace')
236 return obj.decode(DEFAULT_ENCODING, 'replace')
233
237
234 if isinstance(obj, container_to_list) or (
238 if isinstance(obj, container_to_list) or (
235 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
239 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
236 obj = list(obj)
240 obj = list(obj)
237
241
238 if isinstance(obj, list):
242 if isinstance(obj, list):
239 return [json_clean(x) for x in obj]
243 return [json_clean(x) for x in obj]
240
244
241 if isinstance(obj, dict):
245 if isinstance(obj, dict):
242 # First, validate that the dict won't lose data in conversion due to
246 # First, validate that the dict won't lose data in conversion due to
243 # key collisions after stringification. This can happen with keys like
247 # key collisions after stringification. This can happen with keys like
244 # True and 'true' or 1 and '1', which collide in JSON.
248 # True and 'true' or 1 and '1', which collide in JSON.
245 nkeys = len(obj)
249 nkeys = len(obj)
246 nkeys_collapsed = len(set(map(str, obj)))
250 nkeys_collapsed = len(set(map(str, obj)))
247 if nkeys != nkeys_collapsed:
251 if nkeys != nkeys_collapsed:
248 raise ValueError('dict can not be safely converted to JSON: '
252 raise ValueError('dict can not be safely converted to JSON: '
249 'key collision would lead to dropped values')
253 'key collision would lead to dropped values')
250 # If all OK, proceed by making the new dict that will be json-safe
254 # If all OK, proceed by making the new dict that will be json-safe
251 out = {}
255 out = {}
252 for k,v in iteritems(obj):
256 for k,v in iteritems(obj):
253 out[str(k)] = json_clean(v)
257 out[str(k)] = json_clean(v)
254 return out
258 return out
255
259
256 # If we get here, we don't know how to handle the object, so we just get
260 # If we get here, we don't know how to handle the object, so we just get
257 # its repr and return that. This will catch lambdas, open sockets, class
261 # its repr and return that. This will catch lambdas, open sockets, class
258 # objects, and any other complicated contraption that json can't encode
262 # objects, and any other complicated contraption that json can't encode
259 return repr(obj)
263 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now