##// END OF EJS Templates
fix png/jpeg b64-encoding check
MinRK -
Show More
@@ -1,222 +1,224 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.encoding import DEFAULT_ENCODING
27 from IPython.utils.encoding import DEFAULT_ENCODING
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29
29
30 #-----------------------------------------------------------------------------
30 #-----------------------------------------------------------------------------
31 # Globals and constants
31 # Globals and constants
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33
33
34 # timestamp formats
34 # timestamp formats
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
36 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
37
37
38 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
39 # Classes and functions
39 # Classes and functions
40 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
41
41
42 def rekey(dikt):
42 def rekey(dikt):
43 """Rekey a dict that has been forced to use str keys where there should be
43 """Rekey a dict that has been forced to use str keys where there should be
44 ints by json."""
44 ints by json."""
45 for k in dikt.iterkeys():
45 for k in dikt.iterkeys():
46 if isinstance(k, basestring):
46 if isinstance(k, basestring):
47 ik=fk=None
47 ik=fk=None
48 try:
48 try:
49 ik = int(k)
49 ik = int(k)
50 except ValueError:
50 except ValueError:
51 try:
51 try:
52 fk = float(k)
52 fk = float(k)
53 except ValueError:
53 except ValueError:
54 continue
54 continue
55 if ik is not None:
55 if ik is not None:
56 nk = ik
56 nk = ik
57 else:
57 else:
58 nk = fk
58 nk = fk
59 if nk in dikt:
59 if nk in dikt:
60 raise KeyError("already have key %r"%nk)
60 raise KeyError("already have key %r"%nk)
61 dikt[nk] = dikt.pop(k)
61 dikt[nk] = dikt.pop(k)
62 return dikt
62 return dikt
63
63
64
64
65 def extract_dates(obj):
65 def extract_dates(obj):
66 """extract ISO8601 dates from unpacked JSON"""
66 """extract ISO8601 dates from unpacked JSON"""
67 if isinstance(obj, dict):
67 if isinstance(obj, dict):
68 obj = dict(obj) # don't clobber
68 obj = dict(obj) # don't clobber
69 for k,v in obj.iteritems():
69 for k,v in obj.iteritems():
70 obj[k] = extract_dates(v)
70 obj[k] = extract_dates(v)
71 elif isinstance(obj, (list, tuple)):
71 elif isinstance(obj, (list, tuple)):
72 obj = [ extract_dates(o) for o in obj ]
72 obj = [ extract_dates(o) for o in obj ]
73 elif isinstance(obj, basestring):
73 elif isinstance(obj, basestring):
74 if ISO8601_PAT.match(obj):
74 if ISO8601_PAT.match(obj):
75 obj = datetime.strptime(obj, ISO8601)
75 obj = datetime.strptime(obj, ISO8601)
76 return obj
76 return obj
77
77
78 def squash_dates(obj):
78 def squash_dates(obj):
79 """squash datetime objects into ISO8601 strings"""
79 """squash datetime objects into ISO8601 strings"""
80 if isinstance(obj, dict):
80 if isinstance(obj, dict):
81 obj = dict(obj) # don't clobber
81 obj = dict(obj) # don't clobber
82 for k,v in obj.iteritems():
82 for k,v in obj.iteritems():
83 obj[k] = squash_dates(v)
83 obj[k] = squash_dates(v)
84 elif isinstance(obj, (list, tuple)):
84 elif isinstance(obj, (list, tuple)):
85 obj = [ squash_dates(o) for o in obj ]
85 obj = [ squash_dates(o) for o in obj ]
86 elif isinstance(obj, datetime):
86 elif isinstance(obj, datetime):
87 obj = obj.strftime(ISO8601)
87 obj = obj.strftime(ISO8601)
88 return obj
88 return obj
89
89
90 def date_default(obj):
90 def date_default(obj):
91 """default function for packing datetime objects in JSON."""
91 """default function for packing datetime objects in JSON."""
92 if isinstance(obj, datetime):
92 if isinstance(obj, datetime):
93 return obj.strftime(ISO8601)
93 return obj.strftime(ISO8601)
94 else:
94 else:
95 raise TypeError("%r is not JSON serializable"%obj)
95 raise TypeError("%r is not JSON serializable"%obj)
96
96
97
97
98 # constants for identifying png/jpeg data
98 # constants for identifying png/jpeg data
99 PNG = b'\x89PNG\r\n\x1a\n'
99 PNG = b'\x89PNG\r\n\x1a\n'
100 PNG64 = encodebytes(PNG)
100 # front of PNG base64-encoded
101 PNG64 = b'iVBORw0KG'
101 JPEG = b'\xff\xd8'
102 JPEG = b'\xff\xd8'
102 JPEG64 = encodebytes(JPEG)
103 # front of JPEG base64-encoded
104 JPEG64 = b'/9'
103
105
104 def encode_images(format_dict):
106 def encode_images(format_dict):
105 """b64-encodes images in a displaypub format dict
107 """b64-encodes images in a displaypub format dict
106
108
107 Perhaps this should be handled in json_clean itself?
109 Perhaps this should be handled in json_clean itself?
108
110
109 Parameters
111 Parameters
110 ----------
112 ----------
111
113
112 format_dict : dict
114 format_dict : dict
113 A dictionary of display data keyed by mime-type
115 A dictionary of display data keyed by mime-type
114
116
115 Returns
117 Returns
116 -------
118 -------
117
119
118 format_dict : dict
120 format_dict : dict
119 A copy of the same dictionary,
121 A copy of the same dictionary,
120 but binary image data ('image/png' or 'image/jpeg')
122 but binary image data ('image/png' or 'image/jpeg')
121 is base64-encoded.
123 is base64-encoded.
122
124
123 """
125 """
124 encoded = format_dict.copy()
126 encoded = format_dict.copy()
125
127
126 pngdata = format_dict.get('image/png')
128 pngdata = format_dict.get('image/png')
127 if isinstance(pngdata, bytes):
129 if isinstance(pngdata, bytes):
128 # make sure we don't double-encode
130 # make sure we don't double-encode
129 if pngdata[:13] != PNG64:
131 if not pngdata.startswith(PNG64):
130 pngdata = encodebytes(pngdata)
132 pngdata = encodebytes(pngdata)
131 encoded['image/png'] = pngdata.decode('ascii')
133 encoded['image/png'] = pngdata.decode('ascii')
132
134
133 jpegdata = format_dict.get('image/jpeg')
135 jpegdata = format_dict.get('image/jpeg')
134 if isinstance(jpegdata, bytes):
136 if isinstance(jpegdata, bytes):
135 # make sure we don't double-encode
137 # make sure we don't double-encode
136 if jpegdata[:5] != JPEG64:
138 if not jpegdata.startswith(JPEG64):
137 jpegdata = encodebytes(jpegdata)
139 jpegdata = encodebytes(jpegdata)
138 encoded['image/jpeg'] = jpegdata.decode('ascii')
140 encoded['image/jpeg'] = jpegdata.decode('ascii')
139
141
140 return encoded
142 return encoded
141
143
142
144
143 def json_clean(obj):
145 def json_clean(obj):
144 """Clean an object to ensure it's safe to encode in JSON.
146 """Clean an object to ensure it's safe to encode in JSON.
145
147
146 Atomic, immutable objects are returned unmodified. Sets and tuples are
148 Atomic, immutable objects are returned unmodified. Sets and tuples are
147 converted to lists, lists are copied and dicts are also copied.
149 converted to lists, lists are copied and dicts are also copied.
148
150
149 Note: dicts whose keys could cause collisions upon encoding (such as a dict
151 Note: dicts whose keys could cause collisions upon encoding (such as a dict
150 with both the number 1 and the string '1' as keys) will cause a ValueError
152 with both the number 1 and the string '1' as keys) will cause a ValueError
151 to be raised.
153 to be raised.
152
154
153 Parameters
155 Parameters
154 ----------
156 ----------
155 obj : any python object
157 obj : any python object
156
158
157 Returns
159 Returns
158 -------
160 -------
159 out : object
161 out : object
160
162
161 A version of the input which will not cause an encoding error when
163 A version of the input which will not cause an encoding error when
162 encoded as JSON. Note that this function does not *encode* its inputs,
164 encoded as JSON. Note that this function does not *encode* its inputs,
163 it simply sanitizes it so that there will be no encoding errors later.
165 it simply sanitizes it so that there will be no encoding errors later.
164
166
165 Examples
167 Examples
166 --------
168 --------
167 >>> json_clean(4)
169 >>> json_clean(4)
168 4
170 4
169 >>> json_clean(range(10))
171 >>> json_clean(range(10))
170 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
172 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
171 >>> sorted(json_clean(dict(x=1, y=2)).items())
173 >>> sorted(json_clean(dict(x=1, y=2)).items())
172 [('x', 1), ('y', 2)]
174 [('x', 1), ('y', 2)]
173 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
175 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
174 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
176 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
175 >>> json_clean(True)
177 >>> json_clean(True)
176 True
178 True
177 """
179 """
178 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
180 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
179 # listed explicitly because bools pass as int instances
181 # listed explicitly because bools pass as int instances
180 atomic_ok = (unicode, int, types.NoneType)
182 atomic_ok = (unicode, int, types.NoneType)
181
183
182 # containers that we need to convert into lists
184 # containers that we need to convert into lists
183 container_to_list = (tuple, set, types.GeneratorType)
185 container_to_list = (tuple, set, types.GeneratorType)
184
186
185 if isinstance(obj, float):
187 if isinstance(obj, float):
186 # cast out-of-range floats to their reprs
188 # cast out-of-range floats to their reprs
187 if math.isnan(obj) or math.isinf(obj):
189 if math.isnan(obj) or math.isinf(obj):
188 return repr(obj)
190 return repr(obj)
189 return obj
191 return obj
190
192
191 if isinstance(obj, atomic_ok):
193 if isinstance(obj, atomic_ok):
192 return obj
194 return obj
193
195
194 if isinstance(obj, bytes):
196 if isinstance(obj, bytes):
195 return obj.decode(DEFAULT_ENCODING, 'replace')
197 return obj.decode(DEFAULT_ENCODING, 'replace')
196
198
197 if isinstance(obj, container_to_list) or (
199 if isinstance(obj, container_to_list) or (
198 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
200 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
199 obj = list(obj)
201 obj = list(obj)
200
202
201 if isinstance(obj, list):
203 if isinstance(obj, list):
202 return [json_clean(x) for x in obj]
204 return [json_clean(x) for x in obj]
203
205
204 if isinstance(obj, dict):
206 if isinstance(obj, dict):
205 # First, validate that the dict won't lose data in conversion due to
207 # First, validate that the dict won't lose data in conversion due to
206 # key collisions after stringification. This can happen with keys like
208 # key collisions after stringification. This can happen with keys like
207 # True and 'true' or 1 and '1', which collide in JSON.
209 # True and 'true' or 1 and '1', which collide in JSON.
208 nkeys = len(obj)
210 nkeys = len(obj)
209 nkeys_collapsed = len(set(map(str, obj)))
211 nkeys_collapsed = len(set(map(str, obj)))
210 if nkeys != nkeys_collapsed:
212 if nkeys != nkeys_collapsed:
211 raise ValueError('dict can not be safely converted to JSON: '
213 raise ValueError('dict can not be safely converted to JSON: '
212 'key collision would lead to dropped values')
214 'key collision would lead to dropped values')
213 # If all OK, proceed by making the new dict that will be json-safe
215 # If all OK, proceed by making the new dict that will be json-safe
214 out = {}
216 out = {}
215 for k,v in obj.iteritems():
217 for k,v in obj.iteritems():
216 out[str(k)] = json_clean(v)
218 out[str(k)] = json_clean(v)
217 return out
219 return out
218
220
219 # If we get here, we don't know how to handle the object, so we just get
221 # If we get here, we don't know how to handle the object, so we just get
220 # its repr and return that. This will catch lambdas, open sockets, class
222 # its repr and return that. This will catch lambdas, open sockets, class
221 # objects, and any other complicated contraption that json can't encode
223 # objects, and any other complicated contraption that json can't encode
222 return repr(obj)
224 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now