##// END OF EJS Templates
tweak double-encode logic for image data...
MinRK -
Show More
@@ -1,211 +1,222 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010-2011 The IPython Development Team
4 # Copyright (C) 2010-2011 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import math
14 import math
15 import re
15 import re
16 import types
16 import types
17 from datetime import datetime
17 from datetime import datetime
18
18
19 try:
19 try:
20 # base64.encodestring is deprecated in Python 3.x
20 # base64.encodestring is deprecated in Python 3.x
21 from base64 import encodebytes
21 from base64 import encodebytes
22 except ImportError:
22 except ImportError:
23 # Python 2.x
23 # Python 2.x
24 from base64 import encodestring as encodebytes
24 from base64 import encodestring as encodebytes
25
25
26 from IPython.utils import py3compat
26 from IPython.utils import py3compat
27 from IPython.utils.encoding import DEFAULT_ENCODING
27 from IPython.utils.encoding import DEFAULT_ENCODING
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
28 next_attr_name = '__next__' if py3compat.PY3 else 'next'
29
29
30 #-----------------------------------------------------------------------------
30 #-----------------------------------------------------------------------------
31 # Globals and constants
31 # Globals and constants
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33
33
34 # timestamp formats
34 # timestamp formats
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
35 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
36 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
36 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
37
37
38 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
39 # Classes and functions
39 # Classes and functions
40 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
41
41
42 def rekey(dikt):
42 def rekey(dikt):
43 """Rekey a dict that has been forced to use str keys where there should be
43 """Rekey a dict that has been forced to use str keys where there should be
44 ints by json."""
44 ints by json."""
45 for k in dikt.iterkeys():
45 for k in dikt.iterkeys():
46 if isinstance(k, basestring):
46 if isinstance(k, basestring):
47 ik=fk=None
47 ik=fk=None
48 try:
48 try:
49 ik = int(k)
49 ik = int(k)
50 except ValueError:
50 except ValueError:
51 try:
51 try:
52 fk = float(k)
52 fk = float(k)
53 except ValueError:
53 except ValueError:
54 continue
54 continue
55 if ik is not None:
55 if ik is not None:
56 nk = ik
56 nk = ik
57 else:
57 else:
58 nk = fk
58 nk = fk
59 if nk in dikt:
59 if nk in dikt:
60 raise KeyError("already have key %r"%nk)
60 raise KeyError("already have key %r"%nk)
61 dikt[nk] = dikt.pop(k)
61 dikt[nk] = dikt.pop(k)
62 return dikt
62 return dikt
63
63
64
64
65 def extract_dates(obj):
65 def extract_dates(obj):
66 """extract ISO8601 dates from unpacked JSON"""
66 """extract ISO8601 dates from unpacked JSON"""
67 if isinstance(obj, dict):
67 if isinstance(obj, dict):
68 obj = dict(obj) # don't clobber
68 obj = dict(obj) # don't clobber
69 for k,v in obj.iteritems():
69 for k,v in obj.iteritems():
70 obj[k] = extract_dates(v)
70 obj[k] = extract_dates(v)
71 elif isinstance(obj, (list, tuple)):
71 elif isinstance(obj, (list, tuple)):
72 obj = [ extract_dates(o) for o in obj ]
72 obj = [ extract_dates(o) for o in obj ]
73 elif isinstance(obj, basestring):
73 elif isinstance(obj, basestring):
74 if ISO8601_PAT.match(obj):
74 if ISO8601_PAT.match(obj):
75 obj = datetime.strptime(obj, ISO8601)
75 obj = datetime.strptime(obj, ISO8601)
76 return obj
76 return obj
77
77
78 def squash_dates(obj):
78 def squash_dates(obj):
79 """squash datetime objects into ISO8601 strings"""
79 """squash datetime objects into ISO8601 strings"""
80 if isinstance(obj, dict):
80 if isinstance(obj, dict):
81 obj = dict(obj) # don't clobber
81 obj = dict(obj) # don't clobber
82 for k,v in obj.iteritems():
82 for k,v in obj.iteritems():
83 obj[k] = squash_dates(v)
83 obj[k] = squash_dates(v)
84 elif isinstance(obj, (list, tuple)):
84 elif isinstance(obj, (list, tuple)):
85 obj = [ squash_dates(o) for o in obj ]
85 obj = [ squash_dates(o) for o in obj ]
86 elif isinstance(obj, datetime):
86 elif isinstance(obj, datetime):
87 obj = obj.strftime(ISO8601)
87 obj = obj.strftime(ISO8601)
88 return obj
88 return obj
89
89
90 def date_default(obj):
90 def date_default(obj):
91 """default function for packing datetime objects in JSON."""
91 """default function for packing datetime objects in JSON."""
92 if isinstance(obj, datetime):
92 if isinstance(obj, datetime):
93 return obj.strftime(ISO8601)
93 return obj.strftime(ISO8601)
94 else:
94 else:
95 raise TypeError("%r is not JSON serializable"%obj)
95 raise TypeError("%r is not JSON serializable"%obj)
96
96
97
97
98 # constants for identifying png/jpeg data
98 # constants for identifying png/jpeg data
99 PNG = b'\x89PNG\r\n\x1a\n'
99 PNG = b'\x89PNG\r\n\x1a\n'
100 PNG64 = encodebytes(PNG)
100 JPEG = b'\xff\xd8'
101 JPEG = b'\xff\xd8'
102 JPEG64 = encodebytes(JPEG)
101
103
102 def encode_images(format_dict):
104 def encode_images(format_dict):
103 """b64-encodes images in a displaypub format dict
105 """b64-encodes images in a displaypub format dict
104
106
105 Perhaps this should be handled in json_clean itself?
107 Perhaps this should be handled in json_clean itself?
106
108
107 Parameters
109 Parameters
108 ----------
110 ----------
109
111
110 format_dict : dict
112 format_dict : dict
111 A dictionary of display data keyed by mime-type
113 A dictionary of display data keyed by mime-type
112
114
113 Returns
115 Returns
114 -------
116 -------
115
117
116 format_dict : dict
118 format_dict : dict
117 A copy of the same dictionary,
119 A copy of the same dictionary,
118 but binary image data ('image/png' or 'image/jpeg')
120 but binary image data ('image/png' or 'image/jpeg')
119 is base64-encoded.
121 is base64-encoded.
120
122
121 """
123 """
122 encoded = format_dict.copy()
124 encoded = format_dict.copy()
125
123 pngdata = format_dict.get('image/png')
126 pngdata = format_dict.get('image/png')
124 if isinstance(pngdata, bytes) and pngdata[:8] == PNG:
127 if isinstance(pngdata, bytes):
125 encoded['image/png'] = encodebytes(pngdata).decode('ascii')
128 # make sure we don't double-encode
129 if pngdata[:13] != PNG64:
130 pngdata = encodebytes(pngdata)
131 encoded['image/png'] = pngdata.decode('ascii')
132
126 jpegdata = format_dict.get('image/jpeg')
133 jpegdata = format_dict.get('image/jpeg')
127 if isinstance(jpegdata, bytes) and jpegdata[:2] == JPEG:
134 if isinstance(jpegdata, bytes):
128 encoded['image/jpeg'] = encodebytes(jpegdata).decode('ascii')
135 # make sure we don't double-encode
136 if jpegdata[:5] != JPEG64:
137 jpegdata = encodebytes(jpegdata)
138 encoded['image/jpeg'] = jpegdata.decode('ascii')
139
129 return encoded
140 return encoded
130
141
131
142
132 def json_clean(obj):
143 def json_clean(obj):
133 """Clean an object to ensure it's safe to encode in JSON.
144 """Clean an object to ensure it's safe to encode in JSON.
134
145
135 Atomic, immutable objects are returned unmodified. Sets and tuples are
146 Atomic, immutable objects are returned unmodified. Sets and tuples are
136 converted to lists, lists are copied and dicts are also copied.
147 converted to lists, lists are copied and dicts are also copied.
137
148
138 Note: dicts whose keys could cause collisions upon encoding (such as a dict
149 Note: dicts whose keys could cause collisions upon encoding (such as a dict
139 with both the number 1 and the string '1' as keys) will cause a ValueError
150 with both the number 1 and the string '1' as keys) will cause a ValueError
140 to be raised.
151 to be raised.
141
152
142 Parameters
153 Parameters
143 ----------
154 ----------
144 obj : any python object
155 obj : any python object
145
156
146 Returns
157 Returns
147 -------
158 -------
148 out : object
159 out : object
149
160
150 A version of the input which will not cause an encoding error when
161 A version of the input which will not cause an encoding error when
151 encoded as JSON. Note that this function does not *encode* its inputs,
162 encoded as JSON. Note that this function does not *encode* its inputs,
152 it simply sanitizes it so that there will be no encoding errors later.
163 it simply sanitizes it so that there will be no encoding errors later.
153
164
154 Examples
165 Examples
155 --------
166 --------
156 >>> json_clean(4)
167 >>> json_clean(4)
157 4
168 4
158 >>> json_clean(range(10))
169 >>> json_clean(range(10))
159 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
170 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
160 >>> sorted(json_clean(dict(x=1, y=2)).items())
171 >>> sorted(json_clean(dict(x=1, y=2)).items())
161 [('x', 1), ('y', 2)]
172 [('x', 1), ('y', 2)]
162 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
173 >>> sorted(json_clean(dict(x=1, y=2, z=[1,2,3])).items())
163 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
174 [('x', 1), ('y', 2), ('z', [1, 2, 3])]
164 >>> json_clean(True)
175 >>> json_clean(True)
165 True
176 True
166 """
177 """
167 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
178 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
168 # listed explicitly because bools pass as int instances
179 # listed explicitly because bools pass as int instances
169 atomic_ok = (unicode, int, types.NoneType)
180 atomic_ok = (unicode, int, types.NoneType)
170
181
171 # containers that we need to convert into lists
182 # containers that we need to convert into lists
172 container_to_list = (tuple, set, types.GeneratorType)
183 container_to_list = (tuple, set, types.GeneratorType)
173
184
174 if isinstance(obj, float):
185 if isinstance(obj, float):
175 # cast out-of-range floats to their reprs
186 # cast out-of-range floats to their reprs
176 if math.isnan(obj) or math.isinf(obj):
187 if math.isnan(obj) or math.isinf(obj):
177 return repr(obj)
188 return repr(obj)
178 return obj
189 return obj
179
190
180 if isinstance(obj, atomic_ok):
191 if isinstance(obj, atomic_ok):
181 return obj
192 return obj
182
193
183 if isinstance(obj, bytes):
194 if isinstance(obj, bytes):
184 return obj.decode(DEFAULT_ENCODING, 'replace')
195 return obj.decode(DEFAULT_ENCODING, 'replace')
185
196
186 if isinstance(obj, container_to_list) or (
197 if isinstance(obj, container_to_list) or (
187 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
198 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
188 obj = list(obj)
199 obj = list(obj)
189
200
190 if isinstance(obj, list):
201 if isinstance(obj, list):
191 return [json_clean(x) for x in obj]
202 return [json_clean(x) for x in obj]
192
203
193 if isinstance(obj, dict):
204 if isinstance(obj, dict):
194 # First, validate that the dict won't lose data in conversion due to
205 # First, validate that the dict won't lose data in conversion due to
195 # key collisions after stringification. This can happen with keys like
206 # key collisions after stringification. This can happen with keys like
196 # True and 'true' or 1 and '1', which collide in JSON.
207 # True and 'true' or 1 and '1', which collide in JSON.
197 nkeys = len(obj)
208 nkeys = len(obj)
198 nkeys_collapsed = len(set(map(str, obj)))
209 nkeys_collapsed = len(set(map(str, obj)))
199 if nkeys != nkeys_collapsed:
210 if nkeys != nkeys_collapsed:
200 raise ValueError('dict can not be safely converted to JSON: '
211 raise ValueError('dict can not be safely converted to JSON: '
201 'key collision would lead to dropped values')
212 'key collision would lead to dropped values')
202 # If all OK, proceed by making the new dict that will be json-safe
213 # If all OK, proceed by making the new dict that will be json-safe
203 out = {}
214 out = {}
204 for k,v in obj.iteritems():
215 for k,v in obj.iteritems():
205 out[str(k)] = json_clean(v)
216 out[str(k)] = json_clean(v)
206 return out
217 return out
207
218
208 # If we get here, we don't know how to handle the object, so we just get
219 # If we get here, we don't know how to handle the object, so we just get
209 # its repr and return that. This will catch lambdas, open sockets, class
220 # its repr and return that. This will catch lambdas, open sockets, class
210 # objects, and any other complicated contraption that json can't encode
221 # objects, and any other complicated contraption that json can't encode
211 return repr(obj)
222 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now