##// END OF EJS Templates
tweak regexp handling of tzinfo...
MinRK -
Show More
@@ -1,226 +1,228 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.encoding import DEFAULT_ENCODING
27 from IPython.utils.encoding import DEFAULT_ENCODING
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29
29
30 #-----------------------------------------------------------------------------
30 #-----------------------------------------------------------------------------
31 # Globals and constants
31 # Globals and constants
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33
33
34 # timestamp formats
34 # timestamp formats
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?([\+\-]\d{2}:?\d{2})?$")
36 ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+)Z?([\+\-]\d{2}:?\d{2})?$")
37
37
38 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
39 # Classes and functions
39 # Classes and functions
40 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
41
41
42 def rekey(dikt):
42 def rekey(dikt):
43 """Rekey a dict that has been forced to use str keys where there should be
43 """Rekey a dict that has been forced to use str keys where there should be
44 ints by json."""
44 ints by json."""
45 for k in dikt.iterkeys():
45 for k in dikt.iterkeys():
46 if isinstance(k, basestring):
46 if isinstance(k, basestring):
47 ik=fk=None
47 ik=fk=None
48 try:
48 try:
49 ik = int(k)
49 ik = int(k)
50 except ValueError:
50 except ValueError:
51 try:
51 try:
52 fk = float(k)
52 fk = float(k)
53 except ValueError:
53 except ValueError:
54 continue
54 continue
55 if ik is not None:
55 if ik is not None:
56 nk = ik
56 nk = ik
57 else:
57 else:
58 nk = fk
58 nk = fk
59 if nk in dikt:
59 if nk in dikt:
60 raise KeyError("already have key %r"%nk)
60 raise KeyError("already have key %r"%nk)
61 dikt[nk] = dikt.pop(k)
61 dikt[nk] = dikt.pop(k)
62 return dikt
62 return dikt
63
63
64
64
65 def extract_dates(obj):
65 def extract_dates(obj):
66 """extract ISO8601 dates from unpacked JSON"""
66 """extract ISO8601 dates from unpacked JSON"""
67 if isinstance(obj, dict):
67 if isinstance(obj, dict):
68 obj = dict(obj) # don't clobber
68 obj = dict(obj) # don't clobber
69 for k,v in obj.iteritems():
69 for k,v in obj.iteritems():
70 obj[k] = extract_dates(v)
70 obj[k] = extract_dates(v)
71 elif isinstance(obj, (list, tuple)):
71 elif isinstance(obj, (list, tuple)):
72 obj = [ extract_dates(o) for o in obj ]
72 obj = [ extract_dates(o) for o in obj ]
73 elif isinstance(obj, basestring):
73 elif isinstance(obj, basestring):
74 if ISO8601_PAT.match(obj):
74 m = ISO8601_PAT.match(obj)
75 if m:
75 # FIXME: add actual timezone support
76 # FIXME: add actual timezone support
76 notz = obj.split('Z',1)[0]
77 # this just drops the timezone info
78 notz = m.groups()[0]
77 obj = datetime.strptime(notz, ISO8601)
79 obj = datetime.strptime(notz, ISO8601)
78 return obj
80 return obj
79
81
80 def squash_dates(obj):
82 def squash_dates(obj):
81 """squash datetime objects into ISO8601 strings"""
83 """squash datetime objects into ISO8601 strings"""
82 if isinstance(obj, dict):
84 if isinstance(obj, dict):
83 obj = dict(obj) # don't clobber
85 obj = dict(obj) # don't clobber
84 for k,v in obj.iteritems():
86 for k,v in obj.iteritems():
85 obj[k] = squash_dates(v)
87 obj[k] = squash_dates(v)
86 elif isinstance(obj, (list, tuple)):
88 elif isinstance(obj, (list, tuple)):
87 obj = [ squash_dates(o) for o in obj ]
89 obj = [ squash_dates(o) for o in obj ]
88 elif isinstance(obj, datetime):
90 elif isinstance(obj, datetime):
89 obj = obj.isoformat()
91 obj = obj.isoformat()
90 return obj
92 return obj
91
93
92 def date_default(obj):
94 def date_default(obj):
93 """default function for packing datetime objects in JSON."""
95 """default function for packing datetime objects in JSON."""
94 if isinstance(obj, datetime):
96 if isinstance(obj, datetime):
95 return obj.isoformat()
97 return obj.isoformat()
96 else:
98 else:
97 raise TypeError("%r is not JSON serializable"%obj)
99 raise TypeError("%r is not JSON serializable"%obj)
98
100
99
101
100 # constants for identifying png/jpeg data
102 # constants for identifying png/jpeg data
101 PNG = b'\x89PNG\r\n\x1a\n'
103 PNG = b'\x89PNG\r\n\x1a\n'
102 # front of PNG base64-encoded
104 # front of PNG base64-encoded
103 PNG64 = b'iVBORw0KG'
105 PNG64 = b'iVBORw0KG'
104 JPEG = b'\xff\xd8'
106 JPEG = b'\xff\xd8'
105 # front of JPEG base64-encoded
107 # front of JPEG base64-encoded
106 JPEG64 = b'/9'
108 JPEG64 = b'/9'
107
109
108 def encode_images(format_dict):
110 def encode_images(format_dict):
109 """b64-encodes images in a displaypub format dict
111 """b64-encodes images in a displaypub format dict
110
112
111 Perhaps this should be handled in json_clean itself?
113 Perhaps this should be handled in json_clean itself?
112
114
113 Parameters
115 Parameters
114 ----------
116 ----------
115
117
116 format_dict : dict
118 format_dict : dict
117 A dictionary of display data keyed by mime-type
119 A dictionary of display data keyed by mime-type
118
120
119 Returns
121 Returns
120 -------
122 -------
121
123
122 format_dict : dict
124 format_dict : dict
123 A copy of the same dictionary,
125 A copy of the same dictionary,
124 but binary image data ('image/png' or 'image/jpeg')
126 but binary image data ('image/png' or 'image/jpeg')
125 is base64-encoded.
127 is base64-encoded.
126
128
127 """
129 """
128 encoded = format_dict.copy()
130 encoded = format_dict.copy()
129
131
130 pngdata = format_dict.get('image/png')
132 pngdata = format_dict.get('image/png')
131 if isinstance(pngdata, bytes):
133 if isinstance(pngdata, bytes):
132 # make sure we don't double-encode
134 # make sure we don't double-encode
133 if not pngdata.startswith(PNG64):
135 if not pngdata.startswith(PNG64):
134 pngdata = encodebytes(pngdata)
136 pngdata = encodebytes(pngdata)
135 encoded['image/png'] = pngdata.decode('ascii')
137 encoded['image/png'] = pngdata.decode('ascii')
136
138
137 jpegdata = format_dict.get('image/jpeg')
139 jpegdata = format_dict.get('image/jpeg')
138 if isinstance(jpegdata, bytes):
140 if isinstance(jpegdata, bytes):
139 # make sure we don't double-encode
141 # make sure we don't double-encode
140 if not jpegdata.startswith(JPEG64):
142 if not jpegdata.startswith(JPEG64):
141 jpegdata = encodebytes(jpegdata)
143 jpegdata = encodebytes(jpegdata)
142 encoded['image/jpeg'] = jpegdata.decode('ascii')
144 encoded['image/jpeg'] = jpegdata.decode('ascii')
143
145
144 return encoded
146 return encoded
145
147
146
148
147 def json_clean(obj):
149 def json_clean(obj):
148 """Clean an object to ensure it's safe to encode in JSON.
150 """Clean an object to ensure it's safe to encode in JSON.
149
151
150 Atomic, immutable objects are returned unmodified. Sets and tuples are
152 Atomic, immutable objects are returned unmodified. Sets and tuples are
151 converted to lists, lists are copied and dicts are also copied.
153 converted to lists, lists are copied and dicts are also copied.
152
154
153 Note: dicts whose keys could cause collisions upon encoding (such as a dict
155 Note: dicts whose keys could cause collisions upon encoding (such as a dict
154 with both the number 1 and the string '1' as keys) will cause a ValueError
156 with both the number 1 and the string '1' as keys) will cause a ValueError
155 to be raised.
157 to be raised.
156
158
157 Parameters
159 Parameters
158 ----------
160 ----------
159 obj : any python object
161 obj : any python object
160
162
161 Returns
163 Returns
162 -------
164 -------
163 out : object
165 out : object
164
166
165 A version of the input which will not cause an encoding error when
167 A version of the input which will not cause an encoding error when
166 encoded as JSON. Note that this function does not *encode* its inputs,
168 encoded as JSON. Note that this function does not *encode* its inputs,
167 it simply sanitizes it so that there will be no encoding errors later.
169 it simply sanitizes it so that there will be no encoding errors later.
168
170
169 Examples
171 Examples
170 --------
172 --------
171 >>> json_clean(4)
173 >>> json_clean(4)
172 4
174 4
173 >>> json_clean(range(10))
175 >>> json_clean(range(10))
174 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
176 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
175 >>> sorted(json_clean(dict(x=1, y=2)).items())
177 >>> sorted(json_clean(dict(x=1, y=2)).items())
176 [('x', 1), ('y', 2)]
178 [('x', 1), ('y', 2)]
177 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
179 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
178 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
180 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
179 >>> json_clean(True)
181 >>> json_clean(True)
180 True
182 True
181 """
183 """
182 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
184 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
183 # listed explicitly because bools pass as int instances
185 # listed explicitly because bools pass as int instances
184 atomic_ok = (unicode, int, types.NoneType)
186 atomic_ok = (unicode, int, types.NoneType)
185
187
186 # containers that we need to convert into lists
188 # containers that we need to convert into lists
187 container_to_list = (tuple, set, types.GeneratorType)
189 container_to_list = (tuple, set, types.GeneratorType)
188
190
189 if isinstance(obj, float):
191 if isinstance(obj, float):
190 # cast out-of-range floats to their reprs
192 # cast out-of-range floats to their reprs
191 if math.isnan(obj) or math.isinf(obj):
193 if math.isnan(obj) or math.isinf(obj):
192 return repr(obj)
194 return repr(obj)
193 return obj
195 return obj
194
196
195 if isinstance(obj, atomic_ok):
197 if isinstance(obj, atomic_ok):
196 return obj
198 return obj
197
199
198 if isinstance(obj, bytes):
200 if isinstance(obj, bytes):
199 return obj.decode(DEFAULT_ENCODING, 'replace')
201 return obj.decode(DEFAULT_ENCODING, 'replace')
200
202
201 if isinstance(obj, container_to_list) or (
203 if isinstance(obj, container_to_list) or (
202 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
204 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
203 obj = list(obj)
205 obj = list(obj)
204
206
205 if isinstance(obj, list):
207 if isinstance(obj, list):
206 return [json_clean(x) for x in obj]
208 return [json_clean(x) for x in obj]
207
209
208 if isinstance(obj, dict):
210 if isinstance(obj, dict):
209 # First, validate that the dict won't lose data in conversion due to
211 # First, validate that the dict won't lose data in conversion due to
210 # key collisions after stringification. This can happen with keys like
212 # key collisions after stringification. This can happen with keys like
211 # True and 'true' or 1 and '1', which collide in JSON.
213 # True and 'true' or 1 and '1', which collide in JSON.
212 nkeys = len(obj)
214 nkeys = len(obj)
213 nkeys_collapsed = len(set(map(str, obj)))
215 nkeys_collapsed = len(set(map(str, obj)))
214 if nkeys != nkeys_collapsed:
216 if nkeys != nkeys_collapsed:
215 raise ValueError('dict can not be safely converted to JSON: '
217 raise ValueError('dict can not be safely converted to JSON: '
216 'key collision would lead to dropped values')
218 'key collision would lead to dropped values')
217 # If all OK, proceed by making the new dict that will be json-safe
219 # If all OK, proceed by making the new dict that will be json-safe
218 out = {}
220 out = {}
219 for k,v in obj.iteritems():
221 for k,v in obj.iteritems():
220 out[str(k)] = json_clean(v)
222 out[str(k)] = json_clean(v)
221 return out
223 return out
222
224
223 # If we get here, we don't know how to handle the object, so we just get
225 # If we get here, we don't know how to handle the object, so we just get
224 # its repr and return that. This will catch lambdas, open sockets, class
226 # its repr and return that. This will catch lambdas, open sockets, class
225 # objects, and any other complicated contraption that json can't encode
227 # objects, and any other complicated contraption that json can't encode
226 return repr(obj)
228 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now