##// END OF EJS Templates
cborutil: port to Python 3...
Augie Fackler -
r37915:2ae6a313 default
parent child Browse files
Show More
@@ -1,258 +1,262 b''
1 # cborutil.py - CBOR extensions
1 # cborutil.py - CBOR extensions
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from ..thirdparty.cbor.cbor2 import (
12 from ..thirdparty.cbor.cbor2 import (
13 decoder as decodermod,
13 decoder as decodermod,
14 )
14 )
15
15
16 # Very short very of RFC 7049...
16 # Very short very of RFC 7049...
17 #
17 #
18 # Each item begins with a byte. The 3 high bits of that byte denote the
18 # Each item begins with a byte. The 3 high bits of that byte denote the
19 # "major type." The lower 5 bits denote the "subtype." Each major type
19 # "major type." The lower 5 bits denote the "subtype." Each major type
20 # has its own encoding mechanism.
20 # has its own encoding mechanism.
21 #
21 #
22 # Most types have lengths. However, bytestring, string, array, and map
22 # Most types have lengths. However, bytestring, string, array, and map
23 # can be indefinite length. These are denotes by a subtype with value 31.
23 # can be indefinite length. These are denotes by a subtype with value 31.
24 # Sub-components of those types then come afterwards and are terminated
24 # Sub-components of those types then come afterwards and are terminated
25 # by a "break" byte.
25 # by a "break" byte.
26
26
27 MAJOR_TYPE_UINT = 0
27 MAJOR_TYPE_UINT = 0
28 MAJOR_TYPE_NEGINT = 1
28 MAJOR_TYPE_NEGINT = 1
29 MAJOR_TYPE_BYTESTRING = 2
29 MAJOR_TYPE_BYTESTRING = 2
30 MAJOR_TYPE_STRING = 3
30 MAJOR_TYPE_STRING = 3
31 MAJOR_TYPE_ARRAY = 4
31 MAJOR_TYPE_ARRAY = 4
32 MAJOR_TYPE_MAP = 5
32 MAJOR_TYPE_MAP = 5
33 MAJOR_TYPE_SEMANTIC = 6
33 MAJOR_TYPE_SEMANTIC = 6
34 MAJOR_TYPE_SPECIAL = 7
34 MAJOR_TYPE_SPECIAL = 7
35
35
36 SUBTYPE_MASK = 0b00011111
36 SUBTYPE_MASK = 0b00011111
37
37
38 SUBTYPE_HALF_FLOAT = 25
38 SUBTYPE_HALF_FLOAT = 25
39 SUBTYPE_SINGLE_FLOAT = 26
39 SUBTYPE_SINGLE_FLOAT = 26
40 SUBTYPE_DOUBLE_FLOAT = 27
40 SUBTYPE_DOUBLE_FLOAT = 27
41 SUBTYPE_INDEFINITE = 31
41 SUBTYPE_INDEFINITE = 31
42
42
43 # Indefinite types begin with their major type ORd with information value 31.
43 # Indefinite types begin with their major type ORd with information value 31.
44 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
44 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
45 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
45 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
46 BEGIN_INDEFINITE_ARRAY = struct.pack(
46 BEGIN_INDEFINITE_ARRAY = struct.pack(
47 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
47 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
48 BEGIN_INDEFINITE_MAP = struct.pack(
48 BEGIN_INDEFINITE_MAP = struct.pack(
49 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
49 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
50
50
51 ENCODED_LENGTH_1 = struct.Struct(r'>B')
51 ENCODED_LENGTH_1 = struct.Struct(r'>B')
52 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
52 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
53 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
53 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
54 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
54 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
55 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
55 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
56
56
57 # The break ends an indefinite length item.
57 # The break ends an indefinite length item.
58 BREAK = b'\xff'
58 BREAK = b'\xff'
59 BREAK_INT = 255
59 BREAK_INT = 255
60
60
61 def encodelength(majortype, length):
61 def encodelength(majortype, length):
62 """Obtain a value encoding the major type and its length."""
62 """Obtain a value encoding the major type and its length."""
63 if length < 24:
63 if length < 24:
64 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
64 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
65 elif length < 256:
65 elif length < 256:
66 return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
66 return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
67 elif length < 65536:
67 elif length < 65536:
68 return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
68 return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
69 elif length < 4294967296:
69 elif length < 4294967296:
70 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
70 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
71 else:
71 else:
72 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
72 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
73
73
74 def streamencodebytestring(v):
74 def streamencodebytestring(v):
75 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
75 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
76 yield v
76 yield v
77
77
78 def streamencodebytestringfromiter(it):
78 def streamencodebytestringfromiter(it):
79 """Convert an iterator of chunks to an indefinite bytestring.
79 """Convert an iterator of chunks to an indefinite bytestring.
80
80
81 Given an input that is iterable and each element in the iterator is
81 Given an input that is iterable and each element in the iterator is
82 representable as bytes, emit an indefinite length bytestring.
82 representable as bytes, emit an indefinite length bytestring.
83 """
83 """
84 yield BEGIN_INDEFINITE_BYTESTRING
84 yield BEGIN_INDEFINITE_BYTESTRING
85
85
86 for chunk in it:
86 for chunk in it:
87 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
87 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
88 yield chunk
88 yield chunk
89
89
90 yield BREAK
90 yield BREAK
91
91
92 def streamencodeindefinitebytestring(source, chunksize=65536):
92 def streamencodeindefinitebytestring(source, chunksize=65536):
93 """Given a large source buffer, emit as an indefinite length bytestring.
93 """Given a large source buffer, emit as an indefinite length bytestring.
94
94
95 This is a generator of chunks constituting the encoded CBOR data.
95 This is a generator of chunks constituting the encoded CBOR data.
96 """
96 """
97 yield BEGIN_INDEFINITE_BYTESTRING
97 yield BEGIN_INDEFINITE_BYTESTRING
98
98
99 i = 0
99 i = 0
100 l = len(source)
100 l = len(source)
101
101
102 while True:
102 while True:
103 chunk = source[i:i + chunksize]
103 chunk = source[i:i + chunksize]
104 i += len(chunk)
104 i += len(chunk)
105
105
106 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
106 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
107 yield chunk
107 yield chunk
108
108
109 if i >= l:
109 if i >= l:
110 break
110 break
111
111
112 yield BREAK
112 yield BREAK
113
113
114 def streamencodeint(v):
114 def streamencodeint(v):
115 if v >= 18446744073709551616 or v < -18446744073709551616:
115 if v >= 18446744073709551616 or v < -18446744073709551616:
116 raise ValueError('big integers not supported')
116 raise ValueError('big integers not supported')
117
117
118 if v >= 0:
118 if v >= 0:
119 yield encodelength(MAJOR_TYPE_UINT, v)
119 yield encodelength(MAJOR_TYPE_UINT, v)
120 else:
120 else:
121 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
121 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
122
122
123 def streamencodearray(l):
123 def streamencodearray(l):
124 """Encode a known size iterable to an array."""
124 """Encode a known size iterable to an array."""
125
125
126 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
126 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
127
127
128 for i in l:
128 for i in l:
129 for chunk in streamencode(i):
129 for chunk in streamencode(i):
130 yield chunk
130 yield chunk
131
131
132 def streamencodearrayfromiter(it):
132 def streamencodearrayfromiter(it):
133 """Encode an iterator of items to an indefinite length array."""
133 """Encode an iterator of items to an indefinite length array."""
134
134
135 yield BEGIN_INDEFINITE_ARRAY
135 yield BEGIN_INDEFINITE_ARRAY
136
136
137 for i in it:
137 for i in it:
138 for chunk in streamencode(i):
138 for chunk in streamencode(i):
139 yield chunk
139 yield chunk
140
140
141 yield BREAK
141 yield BREAK
142
142
143 def _mixedtypesortkey(v):
144 return type(v).__name__, v
145
143 def streamencodeset(s):
146 def streamencodeset(s):
144 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
147 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
145 # semantic tag 258 for finite sets.
148 # semantic tag 258 for finite sets.
146 yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
149 yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
147
150
148 for chunk in streamencodearray(sorted(s)):
151 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
149 yield chunk
152 yield chunk
150
153
151 def streamencodemap(d):
154 def streamencodemap(d):
152 """Encode dictionary to a generator.
155 """Encode dictionary to a generator.
153
156
154 Does not supporting indefinite length dictionaries.
157 Does not supporting indefinite length dictionaries.
155 """
158 """
156 yield encodelength(MAJOR_TYPE_MAP, len(d))
159 yield encodelength(MAJOR_TYPE_MAP, len(d))
157
160
158 for key, value in sorted(d.iteritems()):
161 for key, value in sorted(d.iteritems(),
162 key=lambda x: _mixedtypesortkey(x[0])):
159 for chunk in streamencode(key):
163 for chunk in streamencode(key):
160 yield chunk
164 yield chunk
161 for chunk in streamencode(value):
165 for chunk in streamencode(value):
162 yield chunk
166 yield chunk
163
167
164 def streamencodemapfromiter(it):
168 def streamencodemapfromiter(it):
165 """Given an iterable of (key, value), encode to an indefinite length map."""
169 """Given an iterable of (key, value), encode to an indefinite length map."""
166 yield BEGIN_INDEFINITE_MAP
170 yield BEGIN_INDEFINITE_MAP
167
171
168 for key, value in it:
172 for key, value in it:
169 for chunk in streamencode(key):
173 for chunk in streamencode(key):
170 yield chunk
174 yield chunk
171 for chunk in streamencode(value):
175 for chunk in streamencode(value):
172 yield chunk
176 yield chunk
173
177
174 yield BREAK
178 yield BREAK
175
179
176 def streamencodebool(b):
180 def streamencodebool(b):
177 # major type 7, simple value 20 and 21.
181 # major type 7, simple value 20 and 21.
178 yield b'\xf5' if b else b'\xf4'
182 yield b'\xf5' if b else b'\xf4'
179
183
180 def streamencodenone(v):
184 def streamencodenone(v):
181 # major type 7, simple value 22.
185 # major type 7, simple value 22.
182 yield b'\xf6'
186 yield b'\xf6'
183
187
184 STREAM_ENCODERS = {
188 STREAM_ENCODERS = {
185 bytes: streamencodebytestring,
189 bytes: streamencodebytestring,
186 int: streamencodeint,
190 int: streamencodeint,
187 list: streamencodearray,
191 list: streamencodearray,
188 tuple: streamencodearray,
192 tuple: streamencodearray,
189 dict: streamencodemap,
193 dict: streamencodemap,
190 set: streamencodeset,
194 set: streamencodeset,
191 bool: streamencodebool,
195 bool: streamencodebool,
192 type(None): streamencodenone,
196 type(None): streamencodenone,
193 }
197 }
194
198
195 def streamencode(v):
199 def streamencode(v):
196 """Encode a value in a streaming manner.
200 """Encode a value in a streaming manner.
197
201
198 Given an input object, encode it to CBOR recursively.
202 Given an input object, encode it to CBOR recursively.
199
203
200 Returns a generator of CBOR encoded bytes. There is no guarantee
204 Returns a generator of CBOR encoded bytes. There is no guarantee
201 that each emitted chunk fully decodes to a value or sub-value.
205 that each emitted chunk fully decodes to a value or sub-value.
202
206
203 Encoding is deterministic - unordered collections are sorted.
207 Encoding is deterministic - unordered collections are sorted.
204 """
208 """
205 fn = STREAM_ENCODERS.get(v.__class__)
209 fn = STREAM_ENCODERS.get(v.__class__)
206
210
207 if not fn:
211 if not fn:
208 raise ValueError('do not know how to encode %s' % type(v))
212 raise ValueError('do not know how to encode %s' % type(v))
209
213
210 return fn(v)
214 return fn(v)
211
215
212 def readindefinitebytestringtoiter(fh, expectheader=True):
216 def readindefinitebytestringtoiter(fh, expectheader=True):
213 """Read an indefinite bytestring to a generator.
217 """Read an indefinite bytestring to a generator.
214
218
215 Receives an object with a ``read(X)`` method to read N bytes.
219 Receives an object with a ``read(X)`` method to read N bytes.
216
220
217 If ``expectheader`` is True, it is expected that the first byte read
221 If ``expectheader`` is True, it is expected that the first byte read
218 will represent an indefinite length bytestring. Otherwise, we
222 will represent an indefinite length bytestring. Otherwise, we
219 expect the first byte to be part of the first bytestring chunk.
223 expect the first byte to be part of the first bytestring chunk.
220 """
224 """
221 read = fh.read
225 read = fh.read
222 decodeuint = decodermod.decode_uint
226 decodeuint = decodermod.decode_uint
223 byteasinteger = decodermod.byte_as_integer
227 byteasinteger = decodermod.byte_as_integer
224
228
225 if expectheader:
229 if expectheader:
226 initial = decodermod.byte_as_integer(read(1))
230 initial = decodermod.byte_as_integer(read(1))
227
231
228 majortype = initial >> 5
232 majortype = initial >> 5
229 subtype = initial & SUBTYPE_MASK
233 subtype = initial & SUBTYPE_MASK
230
234
231 if majortype != MAJOR_TYPE_BYTESTRING:
235 if majortype != MAJOR_TYPE_BYTESTRING:
232 raise decodermod.CBORDecodeError(
236 raise decodermod.CBORDecodeError(
233 'expected major type %d; got %d' % (MAJOR_TYPE_BYTESTRING,
237 'expected major type %d; got %d' % (MAJOR_TYPE_BYTESTRING,
234 majortype))
238 majortype))
235
239
236 if subtype != SUBTYPE_INDEFINITE:
240 if subtype != SUBTYPE_INDEFINITE:
237 raise decodermod.CBORDecodeError(
241 raise decodermod.CBORDecodeError(
238 'expected indefinite subtype; got %d' % subtype)
242 'expected indefinite subtype; got %d' % subtype)
239
243
240 # The indefinite bytestring is composed of chunks of normal bytestrings.
244 # The indefinite bytestring is composed of chunks of normal bytestrings.
241 # Read chunks until we hit a BREAK byte.
245 # Read chunks until we hit a BREAK byte.
242
246
243 while True:
247 while True:
244 # We need to sniff for the BREAK byte.
248 # We need to sniff for the BREAK byte.
245 initial = byteasinteger(read(1))
249 initial = byteasinteger(read(1))
246
250
247 if initial == BREAK_INT:
251 if initial == BREAK_INT:
248 break
252 break
249
253
250 length = decodeuint(fh, initial & SUBTYPE_MASK)
254 length = decodeuint(fh, initial & SUBTYPE_MASK)
251 chunk = read(length)
255 chunk = read(length)
252
256
253 if len(chunk) != length:
257 if len(chunk) != length:
254 raise decodermod.CBORDecodeError(
258 raise decodermod.CBORDecodeError(
255 'failed to read bytestring chunk: got %d bytes; expected %d' % (
259 'failed to read bytestring chunk: got %d bytes; expected %d' % (
256 len(chunk), length))
260 len(chunk), length))
257
261
258 yield chunk
262 yield chunk
General Comments 0
You need to be logged in to leave comments. Login now