##// END OF EJS Templates
allow datestamps to exclude microseconds...
MinRK -
Show More
@@ -1,256 +1,259 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 from IPython.utils.encoding import DEFAULT_ENCODING
28 from IPython.utils.encoding import DEFAULT_ENCODING
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30
30
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32 # Globals and constants
32 # Globals and constants
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # timestamp formats
35 # timestamp formats
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6})Z?([\+\-]\d{2}:?\d{2})?$")
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
38
38
39 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
40 # Classes and functions
40 # Classes and functions
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42
42
43 def rekey(dikt):
43 def rekey(dikt):
44 """Rekey a dict that has been forced to use str keys where there should be
44 """Rekey a dict that has been forced to use str keys where there should be
45 ints by json."""
45 ints by json."""
46 for k in dikt:
46 for k in dikt:
47 if isinstance(k, string_types):
47 if isinstance(k, string_types):
48 ik=fk=None
48 ik=fk=None
49 try:
49 try:
50 ik = int(k)
50 ik = int(k)
51 except ValueError:
51 except ValueError:
52 try:
52 try:
53 fk = float(k)
53 fk = float(k)
54 except ValueError:
54 except ValueError:
55 continue
55 continue
56 if ik is not None:
56 if ik is not None:
57 nk = ik
57 nk = ik
58 else:
58 else:
59 nk = fk
59 nk = fk
60 if nk in dikt:
60 if nk in dikt:
61 raise KeyError("already have key %r"%nk)
61 raise KeyError("already have key %r"%nk)
62 dikt[nk] = dikt.pop(k)
62 dikt[nk] = dikt.pop(k)
63 return dikt
63 return dikt
64
64
65 def parse_date(s):
65 def parse_date(s):
66 """parse an ISO8601 date string
66 """parse an ISO8601 date string
67
67
68 If it is None or not a valid ISO8601 timestamp,
68 If it is None or not a valid ISO8601 timestamp,
69 it will be returned unmodified.
69 it will be returned unmodified.
70 Otherwise, it will return a datetime object.
70 Otherwise, it will return a datetime object.
71 """
71 """
72 if s is None:
72 if s is None:
73 return s
73 return s
74 m = ISO8601_PAT.match(s)
74 m = ISO8601_PAT.match(s)
75 if m:
75 if m:
76 # FIXME: add actual timezone support
76 # FIXME: add actual timezone support
77 # this just drops the timezone info
77 # this just drops the timezone info
78 notz = m.groups()[0]
78 notz, ms, tz = m.groups()
79 if not ms:
80 ms = '.0'
81 notz = notz + ms
79 return datetime.strptime(notz, ISO8601)
82 return datetime.strptime(notz, ISO8601)
80 return s
83 return s
81
84
82 def extract_dates(obj):
85 def extract_dates(obj):
83 """extract ISO8601 dates from unpacked JSON"""
86 """extract ISO8601 dates from unpacked JSON"""
84 if isinstance(obj, dict):
87 if isinstance(obj, dict):
85 new_obj = {} # don't clobber
88 new_obj = {} # don't clobber
86 for k,v in iteritems(obj):
89 for k,v in iteritems(obj):
87 new_obj[k] = extract_dates(v)
90 new_obj[k] = extract_dates(v)
88 obj = new_obj
91 obj = new_obj
89 elif isinstance(obj, (list, tuple)):
92 elif isinstance(obj, (list, tuple)):
90 obj = [ extract_dates(o) for o in obj ]
93 obj = [ extract_dates(o) for o in obj ]
91 elif isinstance(obj, string_types):
94 elif isinstance(obj, string_types):
92 obj = parse_date(obj)
95 obj = parse_date(obj)
93 return obj
96 return obj
94
97
95 def squash_dates(obj):
98 def squash_dates(obj):
96 """squash datetime objects into ISO8601 strings"""
99 """squash datetime objects into ISO8601 strings"""
97 if isinstance(obj, dict):
100 if isinstance(obj, dict):
98 obj = dict(obj) # don't clobber
101 obj = dict(obj) # don't clobber
99 for k,v in iteritems(obj):
102 for k,v in iteritems(obj):
100 obj[k] = squash_dates(v)
103 obj[k] = squash_dates(v)
101 elif isinstance(obj, (list, tuple)):
104 elif isinstance(obj, (list, tuple)):
102 obj = [ squash_dates(o) for o in obj ]
105 obj = [ squash_dates(o) for o in obj ]
103 elif isinstance(obj, datetime):
106 elif isinstance(obj, datetime):
104 obj = obj.isoformat()
107 obj = obj.isoformat()
105 return obj
108 return obj
106
109
107 def date_default(obj):
110 def date_default(obj):
108 """default function for packing datetime objects in JSON."""
111 """default function for packing datetime objects in JSON."""
109 if isinstance(obj, datetime):
112 if isinstance(obj, datetime):
110 return obj.isoformat()
113 return obj.isoformat()
111 else:
114 else:
112 raise TypeError("%r is not JSON serializable"%obj)
115 raise TypeError("%r is not JSON serializable"%obj)
113
116
114
117
115 # constants for identifying png/jpeg data
118 # constants for identifying png/jpeg data
116 PNG = b'\x89PNG\r\n\x1a\n'
119 PNG = b'\x89PNG\r\n\x1a\n'
117 # front of PNG base64-encoded
120 # front of PNG base64-encoded
118 PNG64 = b'iVBORw0KG'
121 PNG64 = b'iVBORw0KG'
119 JPEG = b'\xff\xd8'
122 JPEG = b'\xff\xd8'
120 # front of JPEG base64-encoded
123 # front of JPEG base64-encoded
121 JPEG64 = b'/9'
124 JPEG64 = b'/9'
122 # front of PDF base64-encoded
125 # front of PDF base64-encoded
123 PDF64 = b'JVBER'
126 PDF64 = b'JVBER'
124
127
125 def encode_images(format_dict):
128 def encode_images(format_dict):
126 """b64-encodes images in a displaypub format dict
129 """b64-encodes images in a displaypub format dict
127
130
128 Perhaps this should be handled in json_clean itself?
131 Perhaps this should be handled in json_clean itself?
129
132
130 Parameters
133 Parameters
131 ----------
134 ----------
132
135
133 format_dict : dict
136 format_dict : dict
134 A dictionary of display data keyed by mime-type
137 A dictionary of display data keyed by mime-type
135
138
136 Returns
139 Returns
137 -------
140 -------
138
141
139 format_dict : dict
142 format_dict : dict
140 A copy of the same dictionary,
143 A copy of the same dictionary,
141 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
144 but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
142 is base64-encoded.
145 is base64-encoded.
143
146
144 """
147 """
145 encoded = format_dict.copy()
148 encoded = format_dict.copy()
146
149
147 pngdata = format_dict.get('image/png')
150 pngdata = format_dict.get('image/png')
148 if isinstance(pngdata, bytes):
151 if isinstance(pngdata, bytes):
149 # make sure we don't double-encode
152 # make sure we don't double-encode
150 if not pngdata.startswith(PNG64):
153 if not pngdata.startswith(PNG64):
151 pngdata = encodebytes(pngdata)
154 pngdata = encodebytes(pngdata)
152 encoded['image/png'] = pngdata.decode('ascii')
155 encoded['image/png'] = pngdata.decode('ascii')
153
156
154 jpegdata = format_dict.get('image/jpeg')
157 jpegdata = format_dict.get('image/jpeg')
155 if isinstance(jpegdata, bytes):
158 if isinstance(jpegdata, bytes):
156 # make sure we don't double-encode
159 # make sure we don't double-encode
157 if not jpegdata.startswith(JPEG64):
160 if not jpegdata.startswith(JPEG64):
158 jpegdata = encodebytes(jpegdata)
161 jpegdata = encodebytes(jpegdata)
159 encoded['image/jpeg'] = jpegdata.decode('ascii')
162 encoded['image/jpeg'] = jpegdata.decode('ascii')
160
163
161 pdfdata = format_dict.get('application/pdf')
164 pdfdata = format_dict.get('application/pdf')
162 if isinstance(pdfdata, bytes):
165 if isinstance(pdfdata, bytes):
163 # make sure we don't double-encode
166 # make sure we don't double-encode
164 if not pdfdata.startswith(PDF64):
167 if not pdfdata.startswith(PDF64):
165 pdfdata = encodebytes(pdfdata)
168 pdfdata = encodebytes(pdfdata)
166 encoded['application/pdf'] = pdfdata.decode('ascii')
169 encoded['application/pdf'] = pdfdata.decode('ascii')
167
170
168 return encoded
171 return encoded
169
172
170
173
171 def json_clean(obj):
174 def json_clean(obj):
172 """Clean an object to ensure it's safe to encode in JSON.
175 """Clean an object to ensure it's safe to encode in JSON.
173
176
174 Atomic, immutable objects are returned unmodified. Sets and tuples are
177 Atomic, immutable objects are returned unmodified. Sets and tuples are
175 converted to lists, lists are copied and dicts are also copied.
178 converted to lists, lists are copied and dicts are also copied.
176
179
177 Note: dicts whose keys could cause collisions upon encoding (such as a dict
180 Note: dicts whose keys could cause collisions upon encoding (such as a dict
178 with both the number 1 and the string '1' as keys) will cause a ValueError
181 with both the number 1 and the string '1' as keys) will cause a ValueError
179 to be raised.
182 to be raised.
180
183
181 Parameters
184 Parameters
182 ----------
185 ----------
183 obj : any python object
186 obj : any python object
184
187
185 Returns
188 Returns
186 -------
189 -------
187 out : object
190 out : object
188
191
189 A version of the input which will not cause an encoding error when
192 A version of the input which will not cause an encoding error when
190 encoded as JSON. Note that this function does not *encode* its inputs,
193 encoded as JSON. Note that this function does not *encode* its inputs,
191 it simply sanitizes it so that there will be no encoding errors later.
194 it simply sanitizes it so that there will be no encoding errors later.
192
195
193 Examples
196 Examples
194 --------
197 --------
195 >>> json_clean(4)
198 >>> json_clean(4)
196 4
199 4
197 >>> json_clean(list(range(10)))
200 >>> json_clean(list(range(10)))
198 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
201 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
199 >>> sorted(json_clean(dict(x=1, y=2)).items())
202 >>> sorted(json_clean(dict(x=1, y=2)).items())
200 [('x', 1), ('y', 2)]
203 [('x', 1), ('y', 2)]
201 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
204 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
202 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
205 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
203 >>> json_clean(True)
206 >>> json_clean(True)
204 True
207 True
205 """
208 """
206 # types that are 'atomic' and ok in json as-is.
209 # types that are 'atomic' and ok in json as-is.
207 atomic_ok = (unicode_type, type(None))
210 atomic_ok = (unicode_type, type(None))
208
211
209 # containers that we need to convert into lists
212 # containers that we need to convert into lists
210 container_to_list = (tuple, set, types.GeneratorType)
213 container_to_list = (tuple, set, types.GeneratorType)
211
214
212 if isinstance(obj, float):
215 if isinstance(obj, float):
213 # cast out-of-range floats to their reprs
216 # cast out-of-range floats to their reprs
214 if math.isnan(obj) or math.isinf(obj):
217 if math.isnan(obj) or math.isinf(obj):
215 return repr(obj)
218 return repr(obj)
216 return float(obj)
219 return float(obj)
217
220
218 if isinstance(obj, int):
221 if isinstance(obj, int):
219 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
222 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
220 if isinstance(obj, bool):
223 if isinstance(obj, bool):
221 # bools are ints, but we don't want to cast them to 0,1
224 # bools are ints, but we don't want to cast them to 0,1
222 return obj
225 return obj
223 return int(obj)
226 return int(obj)
224
227
225 if isinstance(obj, atomic_ok):
228 if isinstance(obj, atomic_ok):
226 return obj
229 return obj
227
230
228 if isinstance(obj, bytes):
231 if isinstance(obj, bytes):
229 return obj.decode(DEFAULT_ENCODING, 'replace')
232 return obj.decode(DEFAULT_ENCODING, 'replace')
230
233
231 if isinstance(obj, container_to_list) or (
234 if isinstance(obj, container_to_list) or (
232 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
235 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
233 obj = list(obj)
236 obj = list(obj)
234
237
235 if isinstance(obj, list):
238 if isinstance(obj, list):
236 return [json_clean(x) for x in obj]
239 return [json_clean(x) for x in obj]
237
240
238 if isinstance(obj, dict):
241 if isinstance(obj, dict):
239 # First, validate that the dict won't lose data in conversion due to
242 # First, validate that the dict won't lose data in conversion due to
240 # key collisions after stringification. This can happen with keys like
243 # key collisions after stringification. This can happen with keys like
241 # True and 'true' or 1 and '1', which collide in JSON.
244 # True and 'true' or 1 and '1', which collide in JSON.
242 nkeys = len(obj)
245 nkeys = len(obj)
243 nkeys_collapsed = len(set(map(str, obj)))
246 nkeys_collapsed = len(set(map(str, obj)))
244 if nkeys != nkeys_collapsed:
247 if nkeys != nkeys_collapsed:
245 raise ValueError('dict can not be safely converted to JSON: '
248 raise ValueError('dict can not be safely converted to JSON: '
246 'key collision would lead to dropped values')
249 'key collision would lead to dropped values')
247 # If all OK, proceed by making the new dict that will be json-safe
250 # If all OK, proceed by making the new dict that will be json-safe
248 out = {}
251 out = {}
249 for k,v in iteritems(obj):
252 for k,v in iteritems(obj):
250 out[str(k)] = json_clean(v)
253 out[str(k)] = json_clean(v)
251 return out
254 return out
252
255
253 # If we get here, we don't know how to handle the object, so we just get
256 # If we get here, we don't know how to handle the object, so we just get
254 # its repr and return that. This will catch lambdas, open sockets, class
257 # its repr and return that. This will catch lambdas, open sockets, class
255 # objects, and any other complicated contraption that json can't encode
258 # objects, and any other complicated contraption that json can't encode
256 return repr(obj)
259 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now