##// END OF EJS Templates
fix ISO8601 re...
MinRK -
Show More
@@ -1,229 +1,229
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
27 from IPython.utils.py3compat import string_types, unicode_type, iteritems
28 from IPython.utils.encoding import DEFAULT_ENCODING
28 from IPython.utils.encoding import DEFAULT_ENCODING
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29 next_attr_name = '__next__' if py3compat.PY3 else 'next'
30
30
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32 # Globals and constants
32 # Globals and constants
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # timestamp formats
35 # timestamp formats
36 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+)Z?([\+\-]\d{2}:?\d{2})?$")
37 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{1,6})Z?([\+\-]\d{2}:?\d{2})?$")
38
38
39 #-----------------------------------------------------------------------------
39 #-----------------------------------------------------------------------------
40 # Classes and functions
40 # Classes and functions
41 #-----------------------------------------------------------------------------
41 #-----------------------------------------------------------------------------
42
42
43 def rekey(dikt):
43 def rekey(dikt):
44 """Rekey a dict that has been forced to use str keys where there should be
44 """Rekey a dict that has been forced to use str keys where there should be
45 ints by json."""
45 ints by json."""
46 for k in dikt:
46 for k in dikt:
47 if isinstance(k, string_types):
47 if isinstance(k, string_types):
48 ik=fk=None
48 ik=fk=None
49 try:
49 try:
50 ik = int(k)
50 ik = int(k)
51 except ValueError:
51 except ValueError:
52 try:
52 try:
53 fk = float(k)
53 fk = float(k)
54 except ValueError:
54 except ValueError:
55 continue
55 continue
56 if ik is not None:
56 if ik is not None:
57 nk = ik
57 nk = ik
58 else:
58 else:
59 nk = fk
59 nk = fk
60 if nk in dikt:
60 if nk in dikt:
61 raise KeyError("already have key %r"%nk)
61 raise KeyError("already have key %r"%nk)
62 dikt[nk] = dikt.pop(k)
62 dikt[nk] = dikt.pop(k)
63 return dikt
63 return dikt
64
64
65
65
66 def extract_dates(obj):
66 def extract_dates(obj):
67 """extract ISO8601 dates from unpacked JSON"""
67 """extract ISO8601 dates from unpacked JSON"""
68 if isinstance(obj, dict):
68 if isinstance(obj, dict):
69 obj = dict(obj) # don't clobber
69 obj = dict(obj) # don't clobber
70 for k,v in iteritems(obj):
70 for k,v in iteritems(obj):
71 obj[k] = extract_dates(v)
71 obj[k] = extract_dates(v)
72 elif isinstance(obj, (list, tuple)):
72 elif isinstance(obj, (list, tuple)):
73 obj = [ extract_dates(o) for o in obj ]
73 obj = [ extract_dates(o) for o in obj ]
74 elif isinstance(obj, string_types):
74 elif isinstance(obj, string_types):
75 m = ISO8601_PAT.match(obj)
75 m = ISO8601_PAT.match(obj)
76 if m:
76 if m:
77 # FIXME: add actual timezone support
77 # FIXME: add actual timezone support
78 # this just drops the timezone info
78 # this just drops the timezone info
79 notz = m.groups()[0]
79 notz = m.groups()[0]
80 obj = datetime.strptime(notz, ISO8601)
80 obj = datetime.strptime(notz, ISO8601)
81 return obj
81 return obj
82
82
83 def squash_dates(obj):
83 def squash_dates(obj):
84 """squash datetime objects into ISO8601 strings"""
84 """squash datetime objects into ISO8601 strings"""
85 if isinstance(obj, dict):
85 if isinstance(obj, dict):
86 obj = dict(obj) # don't clobber
86 obj = dict(obj) # don't clobber
87 for k,v in iteritems(obj):
87 for k,v in iteritems(obj):
88 obj[k] = squash_dates(v)
88 obj[k] = squash_dates(v)
89 elif isinstance(obj, (list, tuple)):
89 elif isinstance(obj, (list, tuple)):
90 obj = [ squash_dates(o) for o in obj ]
90 obj = [ squash_dates(o) for o in obj ]
91 elif isinstance(obj, datetime):
91 elif isinstance(obj, datetime):
92 obj = obj.isoformat()
92 obj = obj.isoformat()
93 return obj
93 return obj
94
94
95 def date_default(obj):
95 def date_default(obj):
96 """default function for packing datetime objects in JSON."""
96 """default function for packing datetime objects in JSON."""
97 if isinstance(obj, datetime):
97 if isinstance(obj, datetime):
98 return obj.isoformat()
98 return obj.isoformat()
99 else:
99 else:
100 raise TypeError("%r is not JSON serializable"%obj)
100 raise TypeError("%r is not JSON serializable"%obj)
101
101
102
102
103 # constants for identifying png/jpeg data
103 # constants for identifying png/jpeg data
104 PNG = b'\x89PNG\r\n\x1a\n'
104 PNG = b'\x89PNG\r\n\x1a\n'
105 # front of PNG base64-encoded
105 # front of PNG base64-encoded
106 PNG64 = b'iVBORw0KG'
106 PNG64 = b'iVBORw0KG'
107 JPEG = b'\xff\xd8'
107 JPEG = b'\xff\xd8'
108 # front of JPEG base64-encoded
108 # front of JPEG base64-encoded
109 JPEG64 = b'/9'
109 JPEG64 = b'/9'
110
110
111 def encode_images(format_dict):
111 def encode_images(format_dict):
112 """b64-encodes images in a displaypub format dict
112 """b64-encodes images in a displaypub format dict
113
113
114 Perhaps this should be handled in json_clean itself?
114 Perhaps this should be handled in json_clean itself?
115
115
116 Parameters
116 Parameters
117 ----------
117 ----------
118
118
119 format_dict : dict
119 format_dict : dict
120 A dictionary of display data keyed by mime-type
120 A dictionary of display data keyed by mime-type
121
121
122 Returns
122 Returns
123 -------
123 -------
124
124
125 format_dict : dict
125 format_dict : dict
126 A copy of the same dictionary,
126 A copy of the same dictionary,
127 but binary image data ('image/png' or 'image/jpeg')
127 but binary image data ('image/png' or 'image/jpeg')
128 is base64-encoded.
128 is base64-encoded.
129
129
130 """
130 """
131 encoded = format_dict.copy()
131 encoded = format_dict.copy()
132
132
133 pngdata = format_dict.get('image/png')
133 pngdata = format_dict.get('image/png')
134 if isinstance(pngdata, bytes):
134 if isinstance(pngdata, bytes):
135 # make sure we don't double-encode
135 # make sure we don't double-encode
136 if not pngdata.startswith(PNG64):
136 if not pngdata.startswith(PNG64):
137 pngdata = encodebytes(pngdata)
137 pngdata = encodebytes(pngdata)
138 encoded['image/png'] = pngdata.decode('ascii')
138 encoded['image/png'] = pngdata.decode('ascii')
139
139
140 jpegdata = format_dict.get('image/jpeg')
140 jpegdata = format_dict.get('image/jpeg')
141 if isinstance(jpegdata, bytes):
141 if isinstance(jpegdata, bytes):
142 # make sure we don't double-encode
142 # make sure we don't double-encode
143 if not jpegdata.startswith(JPEG64):
143 if not jpegdata.startswith(JPEG64):
144 jpegdata = encodebytes(jpegdata)
144 jpegdata = encodebytes(jpegdata)
145 encoded['image/jpeg'] = jpegdata.decode('ascii')
145 encoded['image/jpeg'] = jpegdata.decode('ascii')
146
146
147 return encoded
147 return encoded
148
148
149
149
150 def json_clean(obj):
150 def json_clean(obj):
151 """Clean an object to ensure it's safe to encode in JSON.
151 """Clean an object to ensure it's safe to encode in JSON.
152
152
153 Atomic, immutable objects are returned unmodified. Sets and tuples are
153 Atomic, immutable objects are returned unmodified. Sets and tuples are
154 converted to lists, lists are copied and dicts are also copied.
154 converted to lists, lists are copied and dicts are also copied.
155
155
156 Note: dicts whose keys could cause collisions upon encoding (such as a dict
156 Note: dicts whose keys could cause collisions upon encoding (such as a dict
157 with both the number 1 and the string '1' as keys) will cause a ValueError
157 with both the number 1 and the string '1' as keys) will cause a ValueError
158 to be raised.
158 to be raised.
159
159
160 Parameters
160 Parameters
161 ----------
161 ----------
162 obj : any python object
162 obj : any python object
163
163
164 Returns
164 Returns
165 -------
165 -------
166 out : object
166 out : object
167
167
168 A version of the input which will not cause an encoding error when
168 A version of the input which will not cause an encoding error when
169 encoded as JSON. Note that this function does not *encode* its inputs,
169 encoded as JSON. Note that this function does not *encode* its inputs,
170 it simply sanitizes it so that there will be no encoding errors later.
170 it simply sanitizes it so that there will be no encoding errors later.
171
171
172 Examples
172 Examples
173 --------
173 --------
174 >>> json_clean(4)
174 >>> json_clean(4)
175 4
175 4
176 >>> json_clean(list(range(10)))
176 >>> json_clean(list(range(10)))
177 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
177 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
178 >>> sorted(json_clean(dict(x=1, y=2)).items())
178 >>> sorted(json_clean(dict(x=1, y=2)).items())
179 [('x', 1), ('y', 2)]
179 [('x', 1), ('y', 2)]
180 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
180 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
181 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
181 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
182 >>> json_clean(True)
182 >>> json_clean(True)
183 True
183 True
184 """
184 """
185 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
185 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
186 # listed explicitly because bools pass as int instances
186 # listed explicitly because bools pass as int instances
187 atomic_ok = (unicode_type, int, type(None))
187 atomic_ok = (unicode_type, int, type(None))
188
188
189 # containers that we need to convert into lists
189 # containers that we need to convert into lists
190 container_to_list = (tuple, set, types.GeneratorType)
190 container_to_list = (tuple, set, types.GeneratorType)
191
191
192 if isinstance(obj, float):
192 if isinstance(obj, float):
193 # cast out-of-range floats to their reprs
193 # cast out-of-range floats to their reprs
194 if math.isnan(obj) or math.isinf(obj):
194 if math.isnan(obj) or math.isinf(obj):
195 return repr(obj)
195 return repr(obj)
196 return obj
196 return obj
197
197
198 if isinstance(obj, atomic_ok):
198 if isinstance(obj, atomic_ok):
199 return obj
199 return obj
200
200
201 if isinstance(obj, bytes):
201 if isinstance(obj, bytes):
202 return obj.decode(DEFAULT_ENCODING, 'replace')
202 return obj.decode(DEFAULT_ENCODING, 'replace')
203
203
204 if isinstance(obj, container_to_list) or (
204 if isinstance(obj, container_to_list) or (
205 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
205 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
206 obj = list(obj)
206 obj = list(obj)
207
207
208 if isinstance(obj, list):
208 if isinstance(obj, list):
209 return [json_clean(x) for x in obj]
209 return [json_clean(x) for x in obj]
210
210
211 if isinstance(obj, dict):
211 if isinstance(obj, dict):
212 # First, validate that the dict won't lose data in conversion due to
212 # First, validate that the dict won't lose data in conversion due to
213 # key collisions after stringification. This can happen with keys like
213 # key collisions after stringification. This can happen with keys like
214 # True and 'true' or 1 and '1', which collide in JSON.
214 # True and 'true' or 1 and '1', which collide in JSON.
215 nkeys = len(obj)
215 nkeys = len(obj)
216 nkeys_collapsed = len(set(map(str, obj)))
216 nkeys_collapsed = len(set(map(str, obj)))
217 if nkeys != nkeys_collapsed:
217 if nkeys != nkeys_collapsed:
218 raise ValueError('dict can not be safely converted to JSON: '
218 raise ValueError('dict can not be safely converted to JSON: '
219 'key collision would lead to dropped values')
219 'key collision would lead to dropped values')
220 # If all OK, proceed by making the new dict that will be json-safe
220 # If all OK, proceed by making the new dict that will be json-safe
221 out = {}
221 out = {}
222 for k,v in iteritems(obj):
222 for k,v in iteritems(obj):
223 out[str(k)] = json_clean(v)
223 out[str(k)] = json_clean(v)
224 return out
224 return out
225
225
226 # If we get here, we don't know how to handle the object, so we just get
226 # If we get here, we don't know how to handle the object, so we just get
227 # its repr and return that. This will catch lambdas, open sockets, class
227 # its repr and return that. This will catch lambdas, open sockets, class
228 # objects, and any other complicated contraption that json can't encode
228 # objects, and any other complicated contraption that json can't encode
229 return repr(obj)
229 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now