##// END OF EJS Templates
Merge pull request #769 from minrk/jsonclean...
Brian E. Granger -
r4720:14867dbb merge
parent child Browse files
Show More
@@ -1,157 +1,161 b''
1 """Utilities to manipulate JSON objects.
1 """Utilities to manipulate JSON objects.
2 """
2 """
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (C) 2010 The IPython Development Team
4 # Copyright (C) 2010 The IPython Development Team
5 #
5 #
6 # Distributed under the terms of the BSD License. The full license is in
6 # Distributed under the terms of the BSD License. The full license is in
7 # the file COPYING.txt, distributed as part of this software.
7 # the file COPYING.txt, distributed as part of this software.
8 #-----------------------------------------------------------------------------
8 #-----------------------------------------------------------------------------
9
9
10 #-----------------------------------------------------------------------------
10 #-----------------------------------------------------------------------------
11 # Imports
11 # Imports
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # stdlib
13 # stdlib
14 import re
14 import re
15 import sys
15 import types
16 import types
16 from datetime import datetime
17 from datetime import datetime
17
18
18 #-----------------------------------------------------------------------------
19 #-----------------------------------------------------------------------------
19 # Globals and constants
20 # Globals and constants
20 #-----------------------------------------------------------------------------
21 #-----------------------------------------------------------------------------
21
22
22 # timestamp formats
23 # timestamp formats
23 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
24 ISO8601="%Y-%m-%dT%H:%M:%S.%f"
24 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
25 ISO8601_PAT=re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$")
25
26
26 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
27 # Classes and functions
28 # Classes and functions
28 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
29
30
30 def rekey(dikt):
31 def rekey(dikt):
31 """Rekey a dict that has been forced to use str keys where there should be
32 """Rekey a dict that has been forced to use str keys where there should be
32 ints by json."""
33 ints by json."""
33 for k in dikt.iterkeys():
34 for k in dikt.iterkeys():
34 if isinstance(k, basestring):
35 if isinstance(k, basestring):
35 ik=fk=None
36 ik=fk=None
36 try:
37 try:
37 ik = int(k)
38 ik = int(k)
38 except ValueError:
39 except ValueError:
39 try:
40 try:
40 fk = float(k)
41 fk = float(k)
41 except ValueError:
42 except ValueError:
42 continue
43 continue
43 if ik is not None:
44 if ik is not None:
44 nk = ik
45 nk = ik
45 else:
46 else:
46 nk = fk
47 nk = fk
47 if nk in dikt:
48 if nk in dikt:
48 raise KeyError("already have key %r"%nk)
49 raise KeyError("already have key %r"%nk)
49 dikt[nk] = dikt.pop(k)
50 dikt[nk] = dikt.pop(k)
50 return dikt
51 return dikt
51
52
52
53
53 def extract_dates(obj):
54 def extract_dates(obj):
54 """extract ISO8601 dates from unpacked JSON"""
55 """extract ISO8601 dates from unpacked JSON"""
55 if isinstance(obj, dict):
56 if isinstance(obj, dict):
56 obj = dict(obj) # don't clobber
57 obj = dict(obj) # don't clobber
57 for k,v in obj.iteritems():
58 for k,v in obj.iteritems():
58 obj[k] = extract_dates(v)
59 obj[k] = extract_dates(v)
59 elif isinstance(obj, (list, tuple)):
60 elif isinstance(obj, (list, tuple)):
60 obj = [ extract_dates(o) for o in obj ]
61 obj = [ extract_dates(o) for o in obj ]
61 elif isinstance(obj, basestring):
62 elif isinstance(obj, basestring):
62 if ISO8601_PAT.match(obj):
63 if ISO8601_PAT.match(obj):
63 obj = datetime.strptime(obj, ISO8601)
64 obj = datetime.strptime(obj, ISO8601)
64 return obj
65 return obj
65
66
66 def squash_dates(obj):
67 def squash_dates(obj):
67 """squash datetime objects into ISO8601 strings"""
68 """squash datetime objects into ISO8601 strings"""
68 if isinstance(obj, dict):
69 if isinstance(obj, dict):
69 obj = dict(obj) # don't clobber
70 obj = dict(obj) # don't clobber
70 for k,v in obj.iteritems():
71 for k,v in obj.iteritems():
71 obj[k] = squash_dates(v)
72 obj[k] = squash_dates(v)
72 elif isinstance(obj, (list, tuple)):
73 elif isinstance(obj, (list, tuple)):
73 obj = [ squash_dates(o) for o in obj ]
74 obj = [ squash_dates(o) for o in obj ]
74 elif isinstance(obj, datetime):
75 elif isinstance(obj, datetime):
75 obj = obj.strftime(ISO8601)
76 obj = obj.strftime(ISO8601)
76 return obj
77 return obj
77
78
78 def date_default(obj):
79 def date_default(obj):
79 """default function for packing datetime objects in JSON."""
80 """default function for packing datetime objects in JSON."""
80 if isinstance(obj, datetime):
81 if isinstance(obj, datetime):
81 return obj.strftime(ISO8601)
82 return obj.strftime(ISO8601)
82 else:
83 else:
83 raise TypeError("%r is not JSON serializable"%obj)
84 raise TypeError("%r is not JSON serializable"%obj)
84
85
85
86
86
87
87 def json_clean(obj):
88 def json_clean(obj):
88 """Clean an object to ensure it's safe to encode in JSON.
89 """Clean an object to ensure it's safe to encode in JSON.
89
90
90 Atomic, immutable objects are returned unmodified. Sets and tuples are
91 Atomic, immutable objects are returned unmodified. Sets and tuples are
91 converted to lists, lists are copied and dicts are also copied.
92 converted to lists, lists are copied and dicts are also copied.
92
93
93 Note: dicts whose keys could cause collisions upon encoding (such as a dict
94 Note: dicts whose keys could cause collisions upon encoding (such as a dict
94 with both the number 1 and the string '1' as keys) will cause a ValueError
95 with both the number 1 and the string '1' as keys) will cause a ValueError
95 to be raised.
96 to be raised.
96
97
97 Parameters
98 Parameters
98 ----------
99 ----------
99 obj : any python object
100 obj : any python object
100
101
101 Returns
102 Returns
102 -------
103 -------
103 out : object
104 out : object
104
105
105 A version of the input which will not cause an encoding error when
106 A version of the input which will not cause an encoding error when
106 encoded as JSON. Note that this function does not *encode* its inputs,
107 encoded as JSON. Note that this function does not *encode* its inputs,
107 it simply sanitizes it so that there will be no encoding errors later.
108 it simply sanitizes it so that there will be no encoding errors later.
108
109
109 Examples
110 Examples
110 --------
111 --------
111 >>> json_clean(4)
112 >>> json_clean(4)
112 4
113 4
113 >>> json_clean(range(10))
114 >>> json_clean(range(10))
114 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
115 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
115 >>> json_clean(dict(x=1, y=2))
116 >>> json_clean(dict(x=1, y=2))
116 {'y': 2, 'x': 1}
117 {'y': 2, 'x': 1}
117 >>> json_clean(dict(x=1, y=2, z=[1,2,3]))
118 >>> json_clean(dict(x=1, y=2, z=[1,2,3]))
118 {'y': 2, 'x': 1, 'z': [1, 2, 3]}
119 {'y': 2, 'x': 1, 'z': [1, 2, 3]}
119 >>> json_clean(True)
120 >>> json_clean(True)
120 True
121 True
121 """
122 """
122 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
123 # types that are 'atomic' and ok in json as-is. bool doesn't need to be
123 # listed explicitly because bools pass as int instances
124 # listed explicitly because bools pass as int instances
124 atomic_ok = (basestring, int, float, types.NoneType)
125 atomic_ok = (unicode, int, float, types.NoneType)
125
126
126 # containers that we need to convert into lists
127 # containers that we need to convert into lists
127 container_to_list = (tuple, set, types.GeneratorType)
128 container_to_list = (tuple, set, types.GeneratorType)
128
129
129 if isinstance(obj, atomic_ok):
130 if isinstance(obj, atomic_ok):
130 return obj
131 return obj
131
132
133 if isinstance(obj, bytes):
134 return obj.decode(sys.getdefaultencoding(), 'replace')
135
132 if isinstance(obj, container_to_list) or (
136 if isinstance(obj, container_to_list) or (
133 hasattr(obj, '__iter__') and hasattr(obj, 'next')):
137 hasattr(obj, '__iter__') and hasattr(obj, 'next')):
134 obj = list(obj)
138 obj = list(obj)
135
139
136 if isinstance(obj, list):
140 if isinstance(obj, list):
137 return [json_clean(x) for x in obj]
141 return [json_clean(x) for x in obj]
138
142
139 if isinstance(obj, dict):
143 if isinstance(obj, dict):
140 # First, validate that the dict won't lose data in conversion due to
144 # First, validate that the dict won't lose data in conversion due to
141 # key collisions after stringification. This can happen with keys like
145 # key collisions after stringification. This can happen with keys like
142 # True and 'true' or 1 and '1', which collide in JSON.
146 # True and 'true' or 1 and '1', which collide in JSON.
143 nkeys = len(obj)
147 nkeys = len(obj)
144 nkeys_collapsed = len(set(map(str, obj)))
148 nkeys_collapsed = len(set(map(str, obj)))
145 if nkeys != nkeys_collapsed:
149 if nkeys != nkeys_collapsed:
146 raise ValueError('dict can not be safely converted to JSON: '
150 raise ValueError('dict can not be safely converted to JSON: '
147 'key collision would lead to dropped values')
151 'key collision would lead to dropped values')
148 # If all OK, proceed by making the new dict that will be json-safe
152 # If all OK, proceed by making the new dict that will be json-safe
149 out = {}
153 out = {}
150 for k,v in obj.iteritems():
154 for k,v in obj.iteritems():
151 out[str(k)] = json_clean(v)
155 out[str(k)] = json_clean(v)
152 return out
156 return out
153
157
154 # If we get here, we don't know how to handle the object, so we just get
158 # If we get here, we don't know how to handle the object, so we just get
155 # its repr and return that. This will catch lambdas, open sockets, class
159 # its repr and return that. This will catch lambdas, open sockets, class
156 # objects, and any other complicated contraption that json can't encode
160 # objects, and any other complicated contraption that json can't encode
157 return repr(obj)
161 return repr(obj)
General Comments 0
You need to be logged in to leave comments. Login now