##// END OF EJS Templates
cborutil: cast bytearray to bytes...
Gregory Szorc -
r40160:b638219a default
parent child Browse files
Show More
@@ -1,990 +1,995 b''
1 # cborutil.py - CBOR extensions
1 # cborutil.py - CBOR extensions
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import sys
11 import sys
12
12
13 from .. import pycompat
13 from .. import pycompat
14
14
15 # Very short very of RFC 7049...
15 # Very short very of RFC 7049...
16 #
16 #
17 # Each item begins with a byte. The 3 high bits of that byte denote the
17 # Each item begins with a byte. The 3 high bits of that byte denote the
18 # "major type." The lower 5 bits denote the "subtype." Each major type
18 # "major type." The lower 5 bits denote the "subtype." Each major type
19 # has its own encoding mechanism.
19 # has its own encoding mechanism.
20 #
20 #
21 # Most types have lengths. However, bytestring, string, array, and map
21 # Most types have lengths. However, bytestring, string, array, and map
22 # can be indefinite length. These are denotes by a subtype with value 31.
22 # can be indefinite length. These are denotes by a subtype with value 31.
23 # Sub-components of those types then come afterwards and are terminated
23 # Sub-components of those types then come afterwards and are terminated
24 # by a "break" byte.
24 # by a "break" byte.
25
25
26 MAJOR_TYPE_UINT = 0
26 MAJOR_TYPE_UINT = 0
27 MAJOR_TYPE_NEGINT = 1
27 MAJOR_TYPE_NEGINT = 1
28 MAJOR_TYPE_BYTESTRING = 2
28 MAJOR_TYPE_BYTESTRING = 2
29 MAJOR_TYPE_STRING = 3
29 MAJOR_TYPE_STRING = 3
30 MAJOR_TYPE_ARRAY = 4
30 MAJOR_TYPE_ARRAY = 4
31 MAJOR_TYPE_MAP = 5
31 MAJOR_TYPE_MAP = 5
32 MAJOR_TYPE_SEMANTIC = 6
32 MAJOR_TYPE_SEMANTIC = 6
33 MAJOR_TYPE_SPECIAL = 7
33 MAJOR_TYPE_SPECIAL = 7
34
34
35 SUBTYPE_MASK = 0b00011111
35 SUBTYPE_MASK = 0b00011111
36
36
37 SUBTYPE_FALSE = 20
37 SUBTYPE_FALSE = 20
38 SUBTYPE_TRUE = 21
38 SUBTYPE_TRUE = 21
39 SUBTYPE_NULL = 22
39 SUBTYPE_NULL = 22
40 SUBTYPE_HALF_FLOAT = 25
40 SUBTYPE_HALF_FLOAT = 25
41 SUBTYPE_SINGLE_FLOAT = 26
41 SUBTYPE_SINGLE_FLOAT = 26
42 SUBTYPE_DOUBLE_FLOAT = 27
42 SUBTYPE_DOUBLE_FLOAT = 27
43 SUBTYPE_INDEFINITE = 31
43 SUBTYPE_INDEFINITE = 31
44
44
45 SEMANTIC_TAG_FINITE_SET = 258
45 SEMANTIC_TAG_FINITE_SET = 258
46
46
47 # Indefinite types begin with their major type ORd with information value 31.
47 # Indefinite types begin with their major type ORd with information value 31.
48 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
48 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
50 BEGIN_INDEFINITE_ARRAY = struct.pack(
50 BEGIN_INDEFINITE_ARRAY = struct.pack(
51 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
51 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
52 BEGIN_INDEFINITE_MAP = struct.pack(
52 BEGIN_INDEFINITE_MAP = struct.pack(
53 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
53 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
54
54
55 ENCODED_LENGTH_1 = struct.Struct(r'>B')
55 ENCODED_LENGTH_1 = struct.Struct(r'>B')
56 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
56 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
57 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
57 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
58 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
58 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
59 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
59 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
60
60
61 # The break ends an indefinite length item.
61 # The break ends an indefinite length item.
62 BREAK = b'\xff'
62 BREAK = b'\xff'
63 BREAK_INT = 255
63 BREAK_INT = 255
64
64
65 def encodelength(majortype, length):
65 def encodelength(majortype, length):
66 """Obtain a value encoding the major type and its length."""
66 """Obtain a value encoding the major type and its length."""
67 if length < 24:
67 if length < 24:
68 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
68 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
69 elif length < 256:
69 elif length < 256:
70 return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
70 return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
71 elif length < 65536:
71 elif length < 65536:
72 return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
72 return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
73 elif length < 4294967296:
73 elif length < 4294967296:
74 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
74 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
75 else:
75 else:
76 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
76 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
77
77
78 def streamencodebytestring(v):
78 def streamencodebytestring(v):
79 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
79 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
80 yield v
80 yield v
81
81
82 def streamencodebytestringfromiter(it):
82 def streamencodebytestringfromiter(it):
83 """Convert an iterator of chunks to an indefinite bytestring.
83 """Convert an iterator of chunks to an indefinite bytestring.
84
84
85 Given an input that is iterable and each element in the iterator is
85 Given an input that is iterable and each element in the iterator is
86 representable as bytes, emit an indefinite length bytestring.
86 representable as bytes, emit an indefinite length bytestring.
87 """
87 """
88 yield BEGIN_INDEFINITE_BYTESTRING
88 yield BEGIN_INDEFINITE_BYTESTRING
89
89
90 for chunk in it:
90 for chunk in it:
91 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
91 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
92 yield chunk
92 yield chunk
93
93
94 yield BREAK
94 yield BREAK
95
95
96 def streamencodeindefinitebytestring(source, chunksize=65536):
96 def streamencodeindefinitebytestring(source, chunksize=65536):
97 """Given a large source buffer, emit as an indefinite length bytestring.
97 """Given a large source buffer, emit as an indefinite length bytestring.
98
98
99 This is a generator of chunks constituting the encoded CBOR data.
99 This is a generator of chunks constituting the encoded CBOR data.
100 """
100 """
101 yield BEGIN_INDEFINITE_BYTESTRING
101 yield BEGIN_INDEFINITE_BYTESTRING
102
102
103 i = 0
103 i = 0
104 l = len(source)
104 l = len(source)
105
105
106 while True:
106 while True:
107 chunk = source[i:i + chunksize]
107 chunk = source[i:i + chunksize]
108 i += len(chunk)
108 i += len(chunk)
109
109
110 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
110 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
111 yield chunk
111 yield chunk
112
112
113 if i >= l:
113 if i >= l:
114 break
114 break
115
115
116 yield BREAK
116 yield BREAK
117
117
118 def streamencodeint(v):
118 def streamencodeint(v):
119 if v >= 18446744073709551616 or v < -18446744073709551616:
119 if v >= 18446744073709551616 or v < -18446744073709551616:
120 raise ValueError('big integers not supported')
120 raise ValueError('big integers not supported')
121
121
122 if v >= 0:
122 if v >= 0:
123 yield encodelength(MAJOR_TYPE_UINT, v)
123 yield encodelength(MAJOR_TYPE_UINT, v)
124 else:
124 else:
125 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
125 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
126
126
127 def streamencodearray(l):
127 def streamencodearray(l):
128 """Encode a known size iterable to an array."""
128 """Encode a known size iterable to an array."""
129
129
130 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
130 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
131
131
132 for i in l:
132 for i in l:
133 for chunk in streamencode(i):
133 for chunk in streamencode(i):
134 yield chunk
134 yield chunk
135
135
136 def streamencodearrayfromiter(it):
136 def streamencodearrayfromiter(it):
137 """Encode an iterator of items to an indefinite length array."""
137 """Encode an iterator of items to an indefinite length array."""
138
138
139 yield BEGIN_INDEFINITE_ARRAY
139 yield BEGIN_INDEFINITE_ARRAY
140
140
141 for i in it:
141 for i in it:
142 for chunk in streamencode(i):
142 for chunk in streamencode(i):
143 yield chunk
143 yield chunk
144
144
145 yield BREAK
145 yield BREAK
146
146
147 def _mixedtypesortkey(v):
147 def _mixedtypesortkey(v):
148 return type(v).__name__, v
148 return type(v).__name__, v
149
149
150 def streamencodeset(s):
150 def streamencodeset(s):
151 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
151 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
152 # semantic tag 258 for finite sets.
152 # semantic tag 258 for finite sets.
153 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
153 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
154
154
155 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
155 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
156 yield chunk
156 yield chunk
157
157
158 def streamencodemap(d):
158 def streamencodemap(d):
159 """Encode dictionary to a generator.
159 """Encode dictionary to a generator.
160
160
161 Does not supporting indefinite length dictionaries.
161 Does not supporting indefinite length dictionaries.
162 """
162 """
163 yield encodelength(MAJOR_TYPE_MAP, len(d))
163 yield encodelength(MAJOR_TYPE_MAP, len(d))
164
164
165 for key, value in sorted(d.iteritems(),
165 for key, value in sorted(d.iteritems(),
166 key=lambda x: _mixedtypesortkey(x[0])):
166 key=lambda x: _mixedtypesortkey(x[0])):
167 for chunk in streamencode(key):
167 for chunk in streamencode(key):
168 yield chunk
168 yield chunk
169 for chunk in streamencode(value):
169 for chunk in streamencode(value):
170 yield chunk
170 yield chunk
171
171
172 def streamencodemapfromiter(it):
172 def streamencodemapfromiter(it):
173 """Given an iterable of (key, value), encode to an indefinite length map."""
173 """Given an iterable of (key, value), encode to an indefinite length map."""
174 yield BEGIN_INDEFINITE_MAP
174 yield BEGIN_INDEFINITE_MAP
175
175
176 for key, value in it:
176 for key, value in it:
177 for chunk in streamencode(key):
177 for chunk in streamencode(key):
178 yield chunk
178 yield chunk
179 for chunk in streamencode(value):
179 for chunk in streamencode(value):
180 yield chunk
180 yield chunk
181
181
182 yield BREAK
182 yield BREAK
183
183
184 def streamencodebool(b):
184 def streamencodebool(b):
185 # major type 7, simple value 20 and 21.
185 # major type 7, simple value 20 and 21.
186 yield b'\xf5' if b else b'\xf4'
186 yield b'\xf5' if b else b'\xf4'
187
187
188 def streamencodenone(v):
188 def streamencodenone(v):
189 # major type 7, simple value 22.
189 # major type 7, simple value 22.
190 yield b'\xf6'
190 yield b'\xf6'
191
191
192 STREAM_ENCODERS = {
192 STREAM_ENCODERS = {
193 bytes: streamencodebytestring,
193 bytes: streamencodebytestring,
194 int: streamencodeint,
194 int: streamencodeint,
195 pycompat.long: streamencodeint,
195 pycompat.long: streamencodeint,
196 list: streamencodearray,
196 list: streamencodearray,
197 tuple: streamencodearray,
197 tuple: streamencodearray,
198 dict: streamencodemap,
198 dict: streamencodemap,
199 set: streamencodeset,
199 set: streamencodeset,
200 bool: streamencodebool,
200 bool: streamencodebool,
201 type(None): streamencodenone,
201 type(None): streamencodenone,
202 }
202 }
203
203
204 def streamencode(v):
204 def streamencode(v):
205 """Encode a value in a streaming manner.
205 """Encode a value in a streaming manner.
206
206
207 Given an input object, encode it to CBOR recursively.
207 Given an input object, encode it to CBOR recursively.
208
208
209 Returns a generator of CBOR encoded bytes. There is no guarantee
209 Returns a generator of CBOR encoded bytes. There is no guarantee
210 that each emitted chunk fully decodes to a value or sub-value.
210 that each emitted chunk fully decodes to a value or sub-value.
211
211
212 Encoding is deterministic - unordered collections are sorted.
212 Encoding is deterministic - unordered collections are sorted.
213 """
213 """
214 fn = STREAM_ENCODERS.get(v.__class__)
214 fn = STREAM_ENCODERS.get(v.__class__)
215
215
216 if not fn:
216 if not fn:
217 raise ValueError('do not know how to encode %s' % type(v))
217 raise ValueError('do not know how to encode %s' % type(v))
218
218
219 return fn(v)
219 return fn(v)
220
220
221 class CBORDecodeError(Exception):
221 class CBORDecodeError(Exception):
222 """Represents an error decoding CBOR."""
222 """Represents an error decoding CBOR."""
223
223
224 if sys.version_info.major >= 3:
224 if sys.version_info.major >= 3:
225 def _elementtointeger(b, i):
225 def _elementtointeger(b, i):
226 return b[i]
226 return b[i]
227 else:
227 else:
228 def _elementtointeger(b, i):
228 def _elementtointeger(b, i):
229 return ord(b[i])
229 return ord(b[i])
230
230
231 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
231 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
232 STRUCT_BIG_USHORT = struct.Struct('>H')
232 STRUCT_BIG_USHORT = struct.Struct('>H')
233 STRUCT_BIG_ULONG = struct.Struct('>L')
233 STRUCT_BIG_ULONG = struct.Struct('>L')
234 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
234 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
235
235
236 SPECIAL_NONE = 0
236 SPECIAL_NONE = 0
237 SPECIAL_START_INDEFINITE_BYTESTRING = 1
237 SPECIAL_START_INDEFINITE_BYTESTRING = 1
238 SPECIAL_START_ARRAY = 2
238 SPECIAL_START_ARRAY = 2
239 SPECIAL_START_MAP = 3
239 SPECIAL_START_MAP = 3
240 SPECIAL_START_SET = 4
240 SPECIAL_START_SET = 4
241 SPECIAL_INDEFINITE_BREAK = 5
241 SPECIAL_INDEFINITE_BREAK = 5
242
242
243 def decodeitem(b, offset=0):
243 def decodeitem(b, offset=0):
244 """Decode a new CBOR value from a buffer at offset.
244 """Decode a new CBOR value from a buffer at offset.
245
245
246 This function attempts to decode up to one complete CBOR value
246 This function attempts to decode up to one complete CBOR value
247 from ``b`` starting at offset ``offset``.
247 from ``b`` starting at offset ``offset``.
248
248
249 The beginning of a collection (such as an array, map, set, or
249 The beginning of a collection (such as an array, map, set, or
250 indefinite length bytestring) counts as a single value. For these
250 indefinite length bytestring) counts as a single value. For these
251 special cases, a state flag will indicate that a special value was seen.
251 special cases, a state flag will indicate that a special value was seen.
252
252
253 When called, the function either returns a decoded value or gives
253 When called, the function either returns a decoded value or gives
254 a hint as to how many more bytes are needed to do so. By calling
254 a hint as to how many more bytes are needed to do so. By calling
255 the function repeatedly given a stream of bytes, the caller can
255 the function repeatedly given a stream of bytes, the caller can
256 build up the original values.
256 build up the original values.
257
257
258 Returns a tuple with the following elements:
258 Returns a tuple with the following elements:
259
259
260 * Bool indicating whether a complete value was decoded.
260 * Bool indicating whether a complete value was decoded.
261 * A decoded value if first value is True otherwise None
261 * A decoded value if first value is True otherwise None
262 * Integer number of bytes. If positive, the number of bytes
262 * Integer number of bytes. If positive, the number of bytes
263 read. If negative, the number of bytes we need to read to
263 read. If negative, the number of bytes we need to read to
264 decode this value or the next chunk in this value.
264 decode this value or the next chunk in this value.
265 * One of the ``SPECIAL_*`` constants indicating special treatment
265 * One of the ``SPECIAL_*`` constants indicating special treatment
266 for this value. ``SPECIAL_NONE`` means this is a fully decoded
266 for this value. ``SPECIAL_NONE`` means this is a fully decoded
267 simple value (such as an integer or bool).
267 simple value (such as an integer or bool).
268 """
268 """
269
269
270 initial = _elementtointeger(b, offset)
270 initial = _elementtointeger(b, offset)
271 offset += 1
271 offset += 1
272
272
273 majortype = initial >> 5
273 majortype = initial >> 5
274 subtype = initial & SUBTYPE_MASK
274 subtype = initial & SUBTYPE_MASK
275
275
276 if majortype == MAJOR_TYPE_UINT:
276 if majortype == MAJOR_TYPE_UINT:
277 complete, value, readcount = decodeuint(subtype, b, offset)
277 complete, value, readcount = decodeuint(subtype, b, offset)
278
278
279 if complete:
279 if complete:
280 return True, value, readcount + 1, SPECIAL_NONE
280 return True, value, readcount + 1, SPECIAL_NONE
281 else:
281 else:
282 return False, None, readcount, SPECIAL_NONE
282 return False, None, readcount, SPECIAL_NONE
283
283
284 elif majortype == MAJOR_TYPE_NEGINT:
284 elif majortype == MAJOR_TYPE_NEGINT:
285 # Negative integers are the same as UINT except inverted minus 1.
285 # Negative integers are the same as UINT except inverted minus 1.
286 complete, value, readcount = decodeuint(subtype, b, offset)
286 complete, value, readcount = decodeuint(subtype, b, offset)
287
287
288 if complete:
288 if complete:
289 return True, -value - 1, readcount + 1, SPECIAL_NONE
289 return True, -value - 1, readcount + 1, SPECIAL_NONE
290 else:
290 else:
291 return False, None, readcount, SPECIAL_NONE
291 return False, None, readcount, SPECIAL_NONE
292
292
293 elif majortype == MAJOR_TYPE_BYTESTRING:
293 elif majortype == MAJOR_TYPE_BYTESTRING:
294 # Beginning of bytestrings are treated as uints in order to
294 # Beginning of bytestrings are treated as uints in order to
295 # decode their length, which may be indefinite.
295 # decode their length, which may be indefinite.
296 complete, size, readcount = decodeuint(subtype, b, offset,
296 complete, size, readcount = decodeuint(subtype, b, offset,
297 allowindefinite=True)
297 allowindefinite=True)
298
298
299 # We don't know the size of the bytestring. It must be a definitive
299 # We don't know the size of the bytestring. It must be a definitive
300 # length since the indefinite subtype would be encoded in the initial
300 # length since the indefinite subtype would be encoded in the initial
301 # byte.
301 # byte.
302 if not complete:
302 if not complete:
303 return False, None, readcount, SPECIAL_NONE
303 return False, None, readcount, SPECIAL_NONE
304
304
305 # We know the length of the bytestring.
305 # We know the length of the bytestring.
306 if size is not None:
306 if size is not None:
307 # And the data is available in the buffer.
307 # And the data is available in the buffer.
308 if offset + readcount + size <= len(b):
308 if offset + readcount + size <= len(b):
309 value = b[offset + readcount:offset + readcount + size]
309 value = b[offset + readcount:offset + readcount + size]
310 return True, value, readcount + size + 1, SPECIAL_NONE
310 return True, value, readcount + size + 1, SPECIAL_NONE
311
311
312 # And we need more data in order to return the bytestring.
312 # And we need more data in order to return the bytestring.
313 else:
313 else:
314 wanted = len(b) - offset - readcount - size
314 wanted = len(b) - offset - readcount - size
315 return False, None, wanted, SPECIAL_NONE
315 return False, None, wanted, SPECIAL_NONE
316
316
317 # It is an indefinite length bytestring.
317 # It is an indefinite length bytestring.
318 else:
318 else:
319 return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
319 return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
320
320
321 elif majortype == MAJOR_TYPE_STRING:
321 elif majortype == MAJOR_TYPE_STRING:
322 raise CBORDecodeError('string major type not supported')
322 raise CBORDecodeError('string major type not supported')
323
323
324 elif majortype == MAJOR_TYPE_ARRAY:
324 elif majortype == MAJOR_TYPE_ARRAY:
325 # Beginning of arrays are treated as uints in order to decode their
325 # Beginning of arrays are treated as uints in order to decode their
326 # length. We don't allow indefinite length arrays.
326 # length. We don't allow indefinite length arrays.
327 complete, size, readcount = decodeuint(subtype, b, offset)
327 complete, size, readcount = decodeuint(subtype, b, offset)
328
328
329 if complete:
329 if complete:
330 return True, size, readcount + 1, SPECIAL_START_ARRAY
330 return True, size, readcount + 1, SPECIAL_START_ARRAY
331 else:
331 else:
332 return False, None, readcount, SPECIAL_NONE
332 return False, None, readcount, SPECIAL_NONE
333
333
334 elif majortype == MAJOR_TYPE_MAP:
334 elif majortype == MAJOR_TYPE_MAP:
335 # Beginning of maps are treated as uints in order to decode their
335 # Beginning of maps are treated as uints in order to decode their
336 # number of elements. We don't allow indefinite length arrays.
336 # number of elements. We don't allow indefinite length arrays.
337 complete, size, readcount = decodeuint(subtype, b, offset)
337 complete, size, readcount = decodeuint(subtype, b, offset)
338
338
339 if complete:
339 if complete:
340 return True, size, readcount + 1, SPECIAL_START_MAP
340 return True, size, readcount + 1, SPECIAL_START_MAP
341 else:
341 else:
342 return False, None, readcount, SPECIAL_NONE
342 return False, None, readcount, SPECIAL_NONE
343
343
344 elif majortype == MAJOR_TYPE_SEMANTIC:
344 elif majortype == MAJOR_TYPE_SEMANTIC:
345 # Semantic tag value is read the same as a uint.
345 # Semantic tag value is read the same as a uint.
346 complete, tagvalue, readcount = decodeuint(subtype, b, offset)
346 complete, tagvalue, readcount = decodeuint(subtype, b, offset)
347
347
348 if not complete:
348 if not complete:
349 return False, None, readcount, SPECIAL_NONE
349 return False, None, readcount, SPECIAL_NONE
350
350
351 # This behavior here is a little wonky. The main type being "decorated"
351 # This behavior here is a little wonky. The main type being "decorated"
352 # by this semantic tag follows. A more robust parser would probably emit
352 # by this semantic tag follows. A more robust parser would probably emit
353 # a special flag indicating this as a semantic tag and let the caller
353 # a special flag indicating this as a semantic tag and let the caller
354 # deal with the types that follow. But since we don't support many
354 # deal with the types that follow. But since we don't support many
355 # semantic tags, it is easier to deal with the special cases here and
355 # semantic tags, it is easier to deal with the special cases here and
356 # hide complexity from the caller. If we add support for more semantic
356 # hide complexity from the caller. If we add support for more semantic
357 # tags, we should probably move semantic tag handling into the caller.
357 # tags, we should probably move semantic tag handling into the caller.
358 if tagvalue == SEMANTIC_TAG_FINITE_SET:
358 if tagvalue == SEMANTIC_TAG_FINITE_SET:
359 if offset + readcount >= len(b):
359 if offset + readcount >= len(b):
360 return False, None, -1, SPECIAL_NONE
360 return False, None, -1, SPECIAL_NONE
361
361
362 complete, size, readcount2, special = decodeitem(b,
362 complete, size, readcount2, special = decodeitem(b,
363 offset + readcount)
363 offset + readcount)
364
364
365 if not complete:
365 if not complete:
366 return False, None, readcount2, SPECIAL_NONE
366 return False, None, readcount2, SPECIAL_NONE
367
367
368 if special != SPECIAL_START_ARRAY:
368 if special != SPECIAL_START_ARRAY:
369 raise CBORDecodeError('expected array after finite set '
369 raise CBORDecodeError('expected array after finite set '
370 'semantic tag')
370 'semantic tag')
371
371
372 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
372 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
373
373
374 else:
374 else:
375 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
375 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
376
376
377 elif majortype == MAJOR_TYPE_SPECIAL:
377 elif majortype == MAJOR_TYPE_SPECIAL:
378 # Only specific values for the information field are allowed.
378 # Only specific values for the information field are allowed.
379 if subtype == SUBTYPE_FALSE:
379 if subtype == SUBTYPE_FALSE:
380 return True, False, 1, SPECIAL_NONE
380 return True, False, 1, SPECIAL_NONE
381 elif subtype == SUBTYPE_TRUE:
381 elif subtype == SUBTYPE_TRUE:
382 return True, True, 1, SPECIAL_NONE
382 return True, True, 1, SPECIAL_NONE
383 elif subtype == SUBTYPE_NULL:
383 elif subtype == SUBTYPE_NULL:
384 return True, None, 1, SPECIAL_NONE
384 return True, None, 1, SPECIAL_NONE
385 elif subtype == SUBTYPE_INDEFINITE:
385 elif subtype == SUBTYPE_INDEFINITE:
386 return True, None, 1, SPECIAL_INDEFINITE_BREAK
386 return True, None, 1, SPECIAL_INDEFINITE_BREAK
387 # If value is 24, subtype is in next byte.
387 # If value is 24, subtype is in next byte.
388 else:
388 else:
389 raise CBORDecodeError('special type %d not allowed' % subtype)
389 raise CBORDecodeError('special type %d not allowed' % subtype)
390 else:
390 else:
391 assert False
391 assert False
392
392
393 def decodeuint(subtype, b, offset=0, allowindefinite=False):
393 def decodeuint(subtype, b, offset=0, allowindefinite=False):
394 """Decode an unsigned integer.
394 """Decode an unsigned integer.
395
395
396 ``subtype`` is the lower 5 bits from the initial byte CBOR item
396 ``subtype`` is the lower 5 bits from the initial byte CBOR item
397 "header." ``b`` is a buffer containing bytes. ``offset`` points to
397 "header." ``b`` is a buffer containing bytes. ``offset`` points to
398 the index of the first byte after the byte that ``subtype`` was
398 the index of the first byte after the byte that ``subtype`` was
399 derived from.
399 derived from.
400
400
401 ``allowindefinite`` allows the special indefinite length value
401 ``allowindefinite`` allows the special indefinite length value
402 indicator.
402 indicator.
403
403
404 Returns a 3-tuple of (successful, value, count).
404 Returns a 3-tuple of (successful, value, count).
405
405
406 The first element is a bool indicating if decoding completed. The 2nd
406 The first element is a bool indicating if decoding completed. The 2nd
407 is the decoded integer value or None if not fully decoded or the subtype
407 is the decoded integer value or None if not fully decoded or the subtype
408 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
408 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
409 If positive, it is the number of additional bytes decoded. If negative,
409 If positive, it is the number of additional bytes decoded. If negative,
410 it is the number of additional bytes needed to decode this value.
410 it is the number of additional bytes needed to decode this value.
411 """
411 """
412
412
413 # Small values are inline.
413 # Small values are inline.
414 if subtype < 24:
414 if subtype < 24:
415 return True, subtype, 0
415 return True, subtype, 0
416 # Indefinite length specifier.
416 # Indefinite length specifier.
417 elif subtype == 31:
417 elif subtype == 31:
418 if allowindefinite:
418 if allowindefinite:
419 return True, None, 0
419 return True, None, 0
420 else:
420 else:
421 raise CBORDecodeError('indefinite length uint not allowed here')
421 raise CBORDecodeError('indefinite length uint not allowed here')
422 elif subtype >= 28:
422 elif subtype >= 28:
423 raise CBORDecodeError('unsupported subtype on integer type: %d' %
423 raise CBORDecodeError('unsupported subtype on integer type: %d' %
424 subtype)
424 subtype)
425
425
426 if subtype == 24:
426 if subtype == 24:
427 s = STRUCT_BIG_UBYTE
427 s = STRUCT_BIG_UBYTE
428 elif subtype == 25:
428 elif subtype == 25:
429 s = STRUCT_BIG_USHORT
429 s = STRUCT_BIG_USHORT
430 elif subtype == 26:
430 elif subtype == 26:
431 s = STRUCT_BIG_ULONG
431 s = STRUCT_BIG_ULONG
432 elif subtype == 27:
432 elif subtype == 27:
433 s = STRUCT_BIG_ULONGLONG
433 s = STRUCT_BIG_ULONGLONG
434 else:
434 else:
435 raise CBORDecodeError('bounds condition checking violation')
435 raise CBORDecodeError('bounds condition checking violation')
436
436
437 if len(b) - offset >= s.size:
437 if len(b) - offset >= s.size:
438 return True, s.unpack_from(b, offset)[0], s.size
438 return True, s.unpack_from(b, offset)[0], s.size
439 else:
439 else:
440 return False, None, len(b) - offset - s.size
440 return False, None, len(b) - offset - s.size
441
441
442 class bytestringchunk(bytes):
442 class bytestringchunk(bytes):
443 """Represents a chunk/segment in an indefinite length bytestring.
443 """Represents a chunk/segment in an indefinite length bytestring.
444
444
445 This behaves like a ``bytes`` but in addition has the ``isfirst``
445 This behaves like a ``bytes`` but in addition has the ``isfirst``
446 and ``islast`` attributes indicating whether this chunk is the first
446 and ``islast`` attributes indicating whether this chunk is the first
447 or last in an indefinite length bytestring.
447 or last in an indefinite length bytestring.
448 """
448 """
449
449
450 def __new__(cls, v, first=False, last=False):
450 def __new__(cls, v, first=False, last=False):
451 self = bytes.__new__(cls, v)
451 self = bytes.__new__(cls, v)
452 self.isfirst = first
452 self.isfirst = first
453 self.islast = last
453 self.islast = last
454
454
455 return self
455 return self
456
456
457 class sansiodecoder(object):
457 class sansiodecoder(object):
458 """A CBOR decoder that doesn't perform its own I/O.
458 """A CBOR decoder that doesn't perform its own I/O.
459
459
460 To use, construct an instance and feed it segments containing
460 To use, construct an instance and feed it segments containing
461 CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
461 CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
462 indicates whether a fully-decoded value is available, how many bytes
462 indicates whether a fully-decoded value is available, how many bytes
463 were consumed, and offers a hint as to how many bytes should be fed
463 were consumed, and offers a hint as to how many bytes should be fed
464 in next time to decode the next value.
464 in next time to decode the next value.
465
465
466 The decoder assumes it will decode N discrete CBOR values, not just
466 The decoder assumes it will decode N discrete CBOR values, not just
467 a single value. i.e. if the bytestream contains uints packed one after
467 a single value. i.e. if the bytestream contains uints packed one after
468 the other, the decoder will decode them all, rather than just the initial
468 the other, the decoder will decode them all, rather than just the initial
469 one.
469 one.
470
470
471 When ``decode()`` indicates a value is available, call ``getavailable()``
471 When ``decode()`` indicates a value is available, call ``getavailable()``
472 to return all fully decoded values.
472 to return all fully decoded values.
473
473
474 ``decode()`` can partially decode input. It is up to the caller to keep
474 ``decode()`` can partially decode input. It is up to the caller to keep
475 track of what data was consumed and to pass unconsumed data in on the
475 track of what data was consumed and to pass unconsumed data in on the
476 next invocation.
476 next invocation.
477
477
478 The decoder decodes atomically at the *item* level. See ``decodeitem()``.
478 The decoder decodes atomically at the *item* level. See ``decodeitem()``.
479 If an *item* cannot be fully decoded, the decoder won't record it as
479 If an *item* cannot be fully decoded, the decoder won't record it as
480 partially consumed. Instead, the caller will be instructed to pass in
480 partially consumed. Instead, the caller will be instructed to pass in
481 the initial bytes of this item on the next invocation. This does result
481 the initial bytes of this item on the next invocation. This does result
482 in some redundant parsing. But the overhead should be minimal.
482 in some redundant parsing. But the overhead should be minimal.
483
483
484 This decoder only supports a subset of CBOR as required by Mercurial.
484 This decoder only supports a subset of CBOR as required by Mercurial.
485 It lacks support for:
485 It lacks support for:
486
486
487 * Indefinite length arrays
487 * Indefinite length arrays
488 * Indefinite length maps
488 * Indefinite length maps
489 * Use of indefinite length bytestrings as keys or values within
489 * Use of indefinite length bytestrings as keys or values within
490 arrays, maps, or sets.
490 arrays, maps, or sets.
491 * Nested arrays, maps, or sets within sets
491 * Nested arrays, maps, or sets within sets
492 * Any semantic tag that isn't a mathematical finite set
492 * Any semantic tag that isn't a mathematical finite set
493 * Floating point numbers
493 * Floating point numbers
494 * Undefined special value
494 * Undefined special value
495
495
496 CBOR types are decoded to Python types as follows:
496 CBOR types are decoded to Python types as follows:
497
497
498 uint -> int
498 uint -> int
499 negint -> int
499 negint -> int
500 bytestring -> bytes
500 bytestring -> bytes
501 map -> dict
501 map -> dict
502 array -> list
502 array -> list
503 True -> bool
503 True -> bool
504 False -> bool
504 False -> bool
505 null -> None
505 null -> None
506 indefinite length bytestring chunk -> [bytestringchunk]
506 indefinite length bytestring chunk -> [bytestringchunk]
507
507
508 The only non-obvious mapping here is an indefinite length bytestring
508 The only non-obvious mapping here is an indefinite length bytestring
509 to the ``bytestringchunk`` type. This is to facilitate streaming
509 to the ``bytestringchunk`` type. This is to facilitate streaming
510 indefinite length bytestrings out of the decoder and to differentiate
510 indefinite length bytestrings out of the decoder and to differentiate
511 a regular bytestring from an indefinite length bytestring.
511 a regular bytestring from an indefinite length bytestring.
512 """
512 """
513
513
514 _STATE_NONE = 0
514 _STATE_NONE = 0
515 _STATE_WANT_MAP_KEY = 1
515 _STATE_WANT_MAP_KEY = 1
516 _STATE_WANT_MAP_VALUE = 2
516 _STATE_WANT_MAP_VALUE = 2
517 _STATE_WANT_ARRAY_VALUE = 3
517 _STATE_WANT_ARRAY_VALUE = 3
518 _STATE_WANT_SET_VALUE = 4
518 _STATE_WANT_SET_VALUE = 4
519 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
519 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
520 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
520 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
521
521
522 def __init__(self):
522 def __init__(self):
523 # TODO add support for limiting size of bytestrings
523 # TODO add support for limiting size of bytestrings
524 # TODO add support for limiting number of keys / values in collections
524 # TODO add support for limiting number of keys / values in collections
525 # TODO add support for limiting size of buffered partial values
525 # TODO add support for limiting size of buffered partial values
526
526
527 self.decodedbytecount = 0
527 self.decodedbytecount = 0
528
528
529 self._state = self._STATE_NONE
529 self._state = self._STATE_NONE
530
530
531 # Stack of active nested collections. Each entry is a dict describing
531 # Stack of active nested collections. Each entry is a dict describing
532 # the collection.
532 # the collection.
533 self._collectionstack = []
533 self._collectionstack = []
534
534
535 # Fully decoded key to use for the current map.
535 # Fully decoded key to use for the current map.
536 self._currentmapkey = None
536 self._currentmapkey = None
537
537
538 # Fully decoded values available for retrieval.
538 # Fully decoded values available for retrieval.
539 self._decodedvalues = []
539 self._decodedvalues = []
540
540
541 @property
541 @property
542 def inprogress(self):
542 def inprogress(self):
543 """Whether the decoder has partially decoded a value."""
543 """Whether the decoder has partially decoded a value."""
544 return self._state != self._STATE_NONE
544 return self._state != self._STATE_NONE
545
545
546 def decode(self, b, offset=0):
546 def decode(self, b, offset=0):
547 """Attempt to decode bytes from an input buffer.
547 """Attempt to decode bytes from an input buffer.
548
548
549 ``b`` is a collection of bytes and ``offset`` is the byte
549 ``b`` is a collection of bytes and ``offset`` is the byte
550 offset within that buffer from which to begin reading data.
550 offset within that buffer from which to begin reading data.
551
551
552 ``b`` must support ``len()`` and accessing bytes slices via
552 ``b`` must support ``len()`` and accessing bytes slices via
553 ``__slice__``. Typically ``bytes`` instances are used.
553 ``__slice__``. Typically ``bytes`` instances are used.
554
554
555 Returns a tuple with the following fields:
555 Returns a tuple with the following fields:
556
556
557 * Bool indicating whether values are available for retrieval.
557 * Bool indicating whether values are available for retrieval.
558 * Integer indicating the number of bytes that were fully consumed,
558 * Integer indicating the number of bytes that were fully consumed,
559 starting from ``offset``.
559 starting from ``offset``.
560 * Integer indicating the number of bytes that are desired for the
560 * Integer indicating the number of bytes that are desired for the
561 next call in order to decode an item.
561 next call in order to decode an item.
562 """
562 """
563 if not b:
563 if not b:
564 return bool(self._decodedvalues), 0, 0
564 return bool(self._decodedvalues), 0, 0
565
565
566 initialoffset = offset
566 initialoffset = offset
567
567
568 # We could easily split the body of this loop into a function. But
568 # We could easily split the body of this loop into a function. But
569 # Python performance is sensitive to function calls and collections
569 # Python performance is sensitive to function calls and collections
570 # are composed of many items. So leaving as a while loop could help
570 # are composed of many items. So leaving as a while loop could help
571 # with performance. One thing that may not help is the use of
571 # with performance. One thing that may not help is the use of
572 # if..elif versus a lookup/dispatch table. There may be value
572 # if..elif versus a lookup/dispatch table. There may be value
573 # in switching that.
573 # in switching that.
574 while offset < len(b):
574 while offset < len(b):
575 # Attempt to decode an item. This could be a whole value or a
575 # Attempt to decode an item. This could be a whole value or a
576 # special value indicating an event, such as start or end of a
576 # special value indicating an event, such as start or end of a
577 # collection or indefinite length type.
577 # collection or indefinite length type.
578 complete, value, readcount, special = decodeitem(b, offset)
578 complete, value, readcount, special = decodeitem(b, offset)
579
579
580 if readcount > 0:
580 if readcount > 0:
581 self.decodedbytecount += readcount
581 self.decodedbytecount += readcount
582
582
583 if not complete:
583 if not complete:
584 assert readcount < 0
584 assert readcount < 0
585 return (
585 return (
586 bool(self._decodedvalues),
586 bool(self._decodedvalues),
587 offset - initialoffset,
587 offset - initialoffset,
588 -readcount,
588 -readcount,
589 )
589 )
590
590
591 offset += readcount
591 offset += readcount
592
592
593 # No nested state. We either have a full value or beginning of a
593 # No nested state. We either have a full value or beginning of a
594 # complex value to deal with.
594 # complex value to deal with.
595 if self._state == self._STATE_NONE:
595 if self._state == self._STATE_NONE:
596 # A normal value.
596 # A normal value.
597 if special == SPECIAL_NONE:
597 if special == SPECIAL_NONE:
598 self._decodedvalues.append(value)
598 self._decodedvalues.append(value)
599
599
600 elif special == SPECIAL_START_ARRAY:
600 elif special == SPECIAL_START_ARRAY:
601 self._collectionstack.append({
601 self._collectionstack.append({
602 'remaining': value,
602 'remaining': value,
603 'v': [],
603 'v': [],
604 })
604 })
605 self._state = self._STATE_WANT_ARRAY_VALUE
605 self._state = self._STATE_WANT_ARRAY_VALUE
606
606
607 elif special == SPECIAL_START_MAP:
607 elif special == SPECIAL_START_MAP:
608 self._collectionstack.append({
608 self._collectionstack.append({
609 'remaining': value,
609 'remaining': value,
610 'v': {},
610 'v': {},
611 })
611 })
612 self._state = self._STATE_WANT_MAP_KEY
612 self._state = self._STATE_WANT_MAP_KEY
613
613
614 elif special == SPECIAL_START_SET:
614 elif special == SPECIAL_START_SET:
615 self._collectionstack.append({
615 self._collectionstack.append({
616 'remaining': value,
616 'remaining': value,
617 'v': set(),
617 'v': set(),
618 })
618 })
619 self._state = self._STATE_WANT_SET_VALUE
619 self._state = self._STATE_WANT_SET_VALUE
620
620
621 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
621 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
622 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
622 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
623
623
624 else:
624 else:
625 raise CBORDecodeError('unhandled special state: %d' %
625 raise CBORDecodeError('unhandled special state: %d' %
626 special)
626 special)
627
627
628 # This value becomes an element of the current array.
628 # This value becomes an element of the current array.
629 elif self._state == self._STATE_WANT_ARRAY_VALUE:
629 elif self._state == self._STATE_WANT_ARRAY_VALUE:
630 # Simple values get appended.
630 # Simple values get appended.
631 if special == SPECIAL_NONE:
631 if special == SPECIAL_NONE:
632 c = self._collectionstack[-1]
632 c = self._collectionstack[-1]
633 c['v'].append(value)
633 c['v'].append(value)
634 c['remaining'] -= 1
634 c['remaining'] -= 1
635
635
636 # self._state doesn't need changed.
636 # self._state doesn't need changed.
637
637
638 # An array nested within an array.
638 # An array nested within an array.
639 elif special == SPECIAL_START_ARRAY:
639 elif special == SPECIAL_START_ARRAY:
640 lastc = self._collectionstack[-1]
640 lastc = self._collectionstack[-1]
641 newvalue = []
641 newvalue = []
642
642
643 lastc['v'].append(newvalue)
643 lastc['v'].append(newvalue)
644 lastc['remaining'] -= 1
644 lastc['remaining'] -= 1
645
645
646 self._collectionstack.append({
646 self._collectionstack.append({
647 'remaining': value,
647 'remaining': value,
648 'v': newvalue,
648 'v': newvalue,
649 })
649 })
650
650
651 # self._state doesn't need changed.
651 # self._state doesn't need changed.
652
652
653 # A map nested within an array.
653 # A map nested within an array.
654 elif special == SPECIAL_START_MAP:
654 elif special == SPECIAL_START_MAP:
655 lastc = self._collectionstack[-1]
655 lastc = self._collectionstack[-1]
656 newvalue = {}
656 newvalue = {}
657
657
658 lastc['v'].append(newvalue)
658 lastc['v'].append(newvalue)
659 lastc['remaining'] -= 1
659 lastc['remaining'] -= 1
660
660
661 self._collectionstack.append({
661 self._collectionstack.append({
662 'remaining': value,
662 'remaining': value,
663 'v': newvalue
663 'v': newvalue
664 })
664 })
665
665
666 self._state = self._STATE_WANT_MAP_KEY
666 self._state = self._STATE_WANT_MAP_KEY
667
667
668 elif special == SPECIAL_START_SET:
668 elif special == SPECIAL_START_SET:
669 lastc = self._collectionstack[-1]
669 lastc = self._collectionstack[-1]
670 newvalue = set()
670 newvalue = set()
671
671
672 lastc['v'].append(newvalue)
672 lastc['v'].append(newvalue)
673 lastc['remaining'] -= 1
673 lastc['remaining'] -= 1
674
674
675 self._collectionstack.append({
675 self._collectionstack.append({
676 'remaining': value,
676 'remaining': value,
677 'v': newvalue,
677 'v': newvalue,
678 })
678 })
679
679
680 self._state = self._STATE_WANT_SET_VALUE
680 self._state = self._STATE_WANT_SET_VALUE
681
681
682 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
682 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
683 raise CBORDecodeError('indefinite length bytestrings '
683 raise CBORDecodeError('indefinite length bytestrings '
684 'not allowed as array values')
684 'not allowed as array values')
685
685
686 else:
686 else:
687 raise CBORDecodeError('unhandled special item when '
687 raise CBORDecodeError('unhandled special item when '
688 'expecting array value: %d' % special)
688 'expecting array value: %d' % special)
689
689
690 # This value becomes the key of the current map instance.
690 # This value becomes the key of the current map instance.
691 elif self._state == self._STATE_WANT_MAP_KEY:
691 elif self._state == self._STATE_WANT_MAP_KEY:
692 if special == SPECIAL_NONE:
692 if special == SPECIAL_NONE:
693 self._currentmapkey = value
693 self._currentmapkey = value
694 self._state = self._STATE_WANT_MAP_VALUE
694 self._state = self._STATE_WANT_MAP_VALUE
695
695
696 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
696 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
697 raise CBORDecodeError('indefinite length bytestrings '
697 raise CBORDecodeError('indefinite length bytestrings '
698 'not allowed as map keys')
698 'not allowed as map keys')
699
699
700 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
700 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
701 SPECIAL_START_SET):
701 SPECIAL_START_SET):
702 raise CBORDecodeError('collections not supported as map '
702 raise CBORDecodeError('collections not supported as map '
703 'keys')
703 'keys')
704
704
705 # We do not allow special values to be used as map keys.
705 # We do not allow special values to be used as map keys.
706 else:
706 else:
707 raise CBORDecodeError('unhandled special item when '
707 raise CBORDecodeError('unhandled special item when '
708 'expecting map key: %d' % special)
708 'expecting map key: %d' % special)
709
709
710 # This value becomes the value of the current map key.
710 # This value becomes the value of the current map key.
711 elif self._state == self._STATE_WANT_MAP_VALUE:
711 elif self._state == self._STATE_WANT_MAP_VALUE:
712 # Simple values simply get inserted into the map.
712 # Simple values simply get inserted into the map.
713 if special == SPECIAL_NONE:
713 if special == SPECIAL_NONE:
714 lastc = self._collectionstack[-1]
714 lastc = self._collectionstack[-1]
715 lastc['v'][self._currentmapkey] = value
715 lastc['v'][self._currentmapkey] = value
716 lastc['remaining'] -= 1
716 lastc['remaining'] -= 1
717
717
718 self._state = self._STATE_WANT_MAP_KEY
718 self._state = self._STATE_WANT_MAP_KEY
719
719
720 # A new array is used as the map value.
720 # A new array is used as the map value.
721 elif special == SPECIAL_START_ARRAY:
721 elif special == SPECIAL_START_ARRAY:
722 lastc = self._collectionstack[-1]
722 lastc = self._collectionstack[-1]
723 newvalue = []
723 newvalue = []
724
724
725 lastc['v'][self._currentmapkey] = newvalue
725 lastc['v'][self._currentmapkey] = newvalue
726 lastc['remaining'] -= 1
726 lastc['remaining'] -= 1
727
727
728 self._collectionstack.append({
728 self._collectionstack.append({
729 'remaining': value,
729 'remaining': value,
730 'v': newvalue,
730 'v': newvalue,
731 })
731 })
732
732
733 self._state = self._STATE_WANT_ARRAY_VALUE
733 self._state = self._STATE_WANT_ARRAY_VALUE
734
734
735 # A new map is used as the map value.
735 # A new map is used as the map value.
736 elif special == SPECIAL_START_MAP:
736 elif special == SPECIAL_START_MAP:
737 lastc = self._collectionstack[-1]
737 lastc = self._collectionstack[-1]
738 newvalue = {}
738 newvalue = {}
739
739
740 lastc['v'][self._currentmapkey] = newvalue
740 lastc['v'][self._currentmapkey] = newvalue
741 lastc['remaining'] -= 1
741 lastc['remaining'] -= 1
742
742
743 self._collectionstack.append({
743 self._collectionstack.append({
744 'remaining': value,
744 'remaining': value,
745 'v': newvalue,
745 'v': newvalue,
746 })
746 })
747
747
748 self._state = self._STATE_WANT_MAP_KEY
748 self._state = self._STATE_WANT_MAP_KEY
749
749
750 # A new set is used as the map value.
750 # A new set is used as the map value.
751 elif special == SPECIAL_START_SET:
751 elif special == SPECIAL_START_SET:
752 lastc = self._collectionstack[-1]
752 lastc = self._collectionstack[-1]
753 newvalue = set()
753 newvalue = set()
754
754
755 lastc['v'][self._currentmapkey] = newvalue
755 lastc['v'][self._currentmapkey] = newvalue
756 lastc['remaining'] -= 1
756 lastc['remaining'] -= 1
757
757
758 self._collectionstack.append({
758 self._collectionstack.append({
759 'remaining': value,
759 'remaining': value,
760 'v': newvalue,
760 'v': newvalue,
761 })
761 })
762
762
763 self._state = self._STATE_WANT_SET_VALUE
763 self._state = self._STATE_WANT_SET_VALUE
764
764
765 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
765 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
766 raise CBORDecodeError('indefinite length bytestrings not '
766 raise CBORDecodeError('indefinite length bytestrings not '
767 'allowed as map values')
767 'allowed as map values')
768
768
769 else:
769 else:
770 raise CBORDecodeError('unhandled special item when '
770 raise CBORDecodeError('unhandled special item when '
771 'expecting map value: %d' % special)
771 'expecting map value: %d' % special)
772
772
773 self._currentmapkey = None
773 self._currentmapkey = None
774
774
775 # This value is added to the current set.
775 # This value is added to the current set.
776 elif self._state == self._STATE_WANT_SET_VALUE:
776 elif self._state == self._STATE_WANT_SET_VALUE:
777 if special == SPECIAL_NONE:
777 if special == SPECIAL_NONE:
778 lastc = self._collectionstack[-1]
778 lastc = self._collectionstack[-1]
779 lastc['v'].add(value)
779 lastc['v'].add(value)
780 lastc['remaining'] -= 1
780 lastc['remaining'] -= 1
781
781
782 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
782 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
783 raise CBORDecodeError('indefinite length bytestrings not '
783 raise CBORDecodeError('indefinite length bytestrings not '
784 'allowed as set values')
784 'allowed as set values')
785
785
786 elif special in (SPECIAL_START_ARRAY,
786 elif special in (SPECIAL_START_ARRAY,
787 SPECIAL_START_MAP,
787 SPECIAL_START_MAP,
788 SPECIAL_START_SET):
788 SPECIAL_START_SET):
789 raise CBORDecodeError('collections not allowed as set '
789 raise CBORDecodeError('collections not allowed as set '
790 'values')
790 'values')
791
791
792 # We don't allow non-trivial types to exist as set values.
792 # We don't allow non-trivial types to exist as set values.
793 else:
793 else:
794 raise CBORDecodeError('unhandled special item when '
794 raise CBORDecodeError('unhandled special item when '
795 'expecting set value: %d' % special)
795 'expecting set value: %d' % special)
796
796
797 # This value represents the first chunk in an indefinite length
797 # This value represents the first chunk in an indefinite length
798 # bytestring.
798 # bytestring.
799 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
799 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
800 # We received a full chunk.
800 # We received a full chunk.
801 if special == SPECIAL_NONE:
801 if special == SPECIAL_NONE:
802 self._decodedvalues.append(bytestringchunk(value,
802 self._decodedvalues.append(bytestringchunk(value,
803 first=True))
803 first=True))
804
804
805 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
805 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
806
806
807 # The end of stream marker. This means it is an empty
807 # The end of stream marker. This means it is an empty
808 # indefinite length bytestring.
808 # indefinite length bytestring.
809 elif special == SPECIAL_INDEFINITE_BREAK:
809 elif special == SPECIAL_INDEFINITE_BREAK:
810 # We /could/ convert this to a b''. But we want to preserve
810 # We /could/ convert this to a b''. But we want to preserve
811 # the nature of the underlying data so consumers expecting
811 # the nature of the underlying data so consumers expecting
812 # an indefinite length bytestring get one.
812 # an indefinite length bytestring get one.
813 self._decodedvalues.append(bytestringchunk(b'',
813 self._decodedvalues.append(bytestringchunk(b'',
814 first=True,
814 first=True,
815 last=True))
815 last=True))
816
816
817 # Since indefinite length bytestrings can't be used in
817 # Since indefinite length bytestrings can't be used in
818 # collections, we must be at the root level.
818 # collections, we must be at the root level.
819 assert not self._collectionstack
819 assert not self._collectionstack
820 self._state = self._STATE_NONE
820 self._state = self._STATE_NONE
821
821
822 else:
822 else:
823 raise CBORDecodeError('unexpected special value when '
823 raise CBORDecodeError('unexpected special value when '
824 'expecting bytestring chunk: %d' %
824 'expecting bytestring chunk: %d' %
825 special)
825 special)
826
826
827 # This value represents the non-initial chunk in an indefinite
827 # This value represents the non-initial chunk in an indefinite
828 # length bytestring.
828 # length bytestring.
829 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
829 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
830 # We received a full chunk.
830 # We received a full chunk.
831 if special == SPECIAL_NONE:
831 if special == SPECIAL_NONE:
832 self._decodedvalues.append(bytestringchunk(value))
832 self._decodedvalues.append(bytestringchunk(value))
833
833
834 # The end of stream marker.
834 # The end of stream marker.
835 elif special == SPECIAL_INDEFINITE_BREAK:
835 elif special == SPECIAL_INDEFINITE_BREAK:
836 self._decodedvalues.append(bytestringchunk(b'', last=True))
836 self._decodedvalues.append(bytestringchunk(b'', last=True))
837
837
838 # Since indefinite length bytestrings can't be used in
838 # Since indefinite length bytestrings can't be used in
839 # collections, we must be at the root level.
839 # collections, we must be at the root level.
840 assert not self._collectionstack
840 assert not self._collectionstack
841 self._state = self._STATE_NONE
841 self._state = self._STATE_NONE
842
842
843 else:
843 else:
844 raise CBORDecodeError('unexpected special value when '
844 raise CBORDecodeError('unexpected special value when '
845 'expecting bytestring chunk: %d' %
845 'expecting bytestring chunk: %d' %
846 special)
846 special)
847
847
848 else:
848 else:
849 raise CBORDecodeError('unhandled decoder state: %d' %
849 raise CBORDecodeError('unhandled decoder state: %d' %
850 self._state)
850 self._state)
851
851
852 # We could have just added the final value in a collection. End
852 # We could have just added the final value in a collection. End
853 # all complete collections at the top of the stack.
853 # all complete collections at the top of the stack.
854 while True:
854 while True:
855 # Bail if we're not waiting on a new collection item.
855 # Bail if we're not waiting on a new collection item.
856 if self._state not in (self._STATE_WANT_ARRAY_VALUE,
856 if self._state not in (self._STATE_WANT_ARRAY_VALUE,
857 self._STATE_WANT_MAP_KEY,
857 self._STATE_WANT_MAP_KEY,
858 self._STATE_WANT_SET_VALUE):
858 self._STATE_WANT_SET_VALUE):
859 break
859 break
860
860
861 # Or we are expecting more items for this collection.
861 # Or we are expecting more items for this collection.
862 lastc = self._collectionstack[-1]
862 lastc = self._collectionstack[-1]
863
863
864 if lastc['remaining']:
864 if lastc['remaining']:
865 break
865 break
866
866
867 # The collection at the top of the stack is complete.
867 # The collection at the top of the stack is complete.
868
868
869 # Discard it, as it isn't needed for future items.
869 # Discard it, as it isn't needed for future items.
870 self._collectionstack.pop()
870 self._collectionstack.pop()
871
871
872 # If this is a nested collection, we don't emit it, since it
872 # If this is a nested collection, we don't emit it, since it
873 # will be emitted by its parent collection. But we do need to
873 # will be emitted by its parent collection. But we do need to
874 # update state to reflect what the new top-most collection
874 # update state to reflect what the new top-most collection
875 # on the stack is.
875 # on the stack is.
876 if self._collectionstack:
876 if self._collectionstack:
877 self._state = {
877 self._state = {
878 list: self._STATE_WANT_ARRAY_VALUE,
878 list: self._STATE_WANT_ARRAY_VALUE,
879 dict: self._STATE_WANT_MAP_KEY,
879 dict: self._STATE_WANT_MAP_KEY,
880 set: self._STATE_WANT_SET_VALUE,
880 set: self._STATE_WANT_SET_VALUE,
881 }[type(self._collectionstack[-1]['v'])]
881 }[type(self._collectionstack[-1]['v'])]
882
882
883 # If this is the root collection, emit it.
883 # If this is the root collection, emit it.
884 else:
884 else:
885 self._decodedvalues.append(lastc['v'])
885 self._decodedvalues.append(lastc['v'])
886 self._state = self._STATE_NONE
886 self._state = self._STATE_NONE
887
887
888 return (
888 return (
889 bool(self._decodedvalues),
889 bool(self._decodedvalues),
890 offset - initialoffset,
890 offset - initialoffset,
891 0,
891 0,
892 )
892 )
893
893
894 def getavailable(self):
894 def getavailable(self):
895 """Returns an iterator over fully decoded values.
895 """Returns an iterator over fully decoded values.
896
896
897 Once values are retrieved, they won't be available on the next call.
897 Once values are retrieved, they won't be available on the next call.
898 """
898 """
899
899
900 l = list(self._decodedvalues)
900 l = list(self._decodedvalues)
901 self._decodedvalues = []
901 self._decodedvalues = []
902 return l
902 return l
903
903
904 class bufferingdecoder(object):
904 class bufferingdecoder(object):
905 """A CBOR decoder that buffers undecoded input.
905 """A CBOR decoder that buffers undecoded input.
906
906
907 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
907 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
908 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
908 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
909 and concatenated with any new input that arrives later.
909 and concatenated with any new input that arrives later.
910
910
911 TODO consider adding limits as to the maximum amount of data that can
911 TODO consider adding limits as to the maximum amount of data that can
912 be buffered.
912 be buffered.
913 """
913 """
914 def __init__(self):
914 def __init__(self):
915 self._decoder = sansiodecoder()
915 self._decoder = sansiodecoder()
916 self._chunks = []
916 self._chunks = []
917 self._wanted = 0
917 self._wanted = 0
918
918
919 def decode(self, b):
919 def decode(self, b):
920 """Attempt to decode bytes to CBOR values.
920 """Attempt to decode bytes to CBOR values.
921
921
922 Returns a tuple with the following fields:
922 Returns a tuple with the following fields:
923
923
924 * Bool indicating whether new values are available for retrieval.
924 * Bool indicating whether new values are available for retrieval.
925 * Integer number of bytes decoded from the new input.
925 * Integer number of bytes decoded from the new input.
926 * Integer number of bytes wanted to decode the next value.
926 * Integer number of bytes wanted to decode the next value.
927 """
927 """
928 # We /might/ be able to support passing a bytearray all the
929 # way through. For now, let's cheat.
930 if isinstance(b, bytearray):
931 b = bytes(b)
932
928 # Our strategy for buffering is to aggregate the incoming chunks in a
933 # Our strategy for buffering is to aggregate the incoming chunks in a
929 # list until we've received enough data to decode the next item.
934 # list until we've received enough data to decode the next item.
930 # This is slightly more complicated than using an ``io.BytesIO``
935 # This is slightly more complicated than using an ``io.BytesIO``
931 # or continuously concatenating incoming data. However, because it
936 # or continuously concatenating incoming data. However, because it
932 # isn't constantly reallocating backing memory for a growing buffer,
937 # isn't constantly reallocating backing memory for a growing buffer,
933 # it prevents excessive memory thrashing and is significantly faster,
938 # it prevents excessive memory thrashing and is significantly faster,
934 # especially in cases where the percentage of input chunks that don't
939 # especially in cases where the percentage of input chunks that don't
935 # decode into a full item is high.
940 # decode into a full item is high.
936
941
937 if self._chunks:
942 if self._chunks:
938 # A previous call said we needed N bytes to decode the next item.
943 # A previous call said we needed N bytes to decode the next item.
939 # But this call doesn't provide enough data. We buffer the incoming
944 # But this call doesn't provide enough data. We buffer the incoming
940 # chunk without attempting to decode.
945 # chunk without attempting to decode.
941 if len(b) < self._wanted:
946 if len(b) < self._wanted:
942 self._chunks.append(b)
947 self._chunks.append(b)
943 self._wanted -= len(b)
948 self._wanted -= len(b)
944 return False, 0, self._wanted
949 return False, 0, self._wanted
945
950
946 # Else we may have enough data to decode the next item. Aggregate
951 # Else we may have enough data to decode the next item. Aggregate
947 # old data with new and reset the buffer.
952 # old data with new and reset the buffer.
948 newlen = len(b)
953 newlen = len(b)
949 self._chunks.append(b)
954 self._chunks.append(b)
950 b = b''.join(self._chunks)
955 b = b''.join(self._chunks)
951 self._chunks = []
956 self._chunks = []
952 oldlen = len(b) - newlen
957 oldlen = len(b) - newlen
953
958
954 else:
959 else:
955 oldlen = 0
960 oldlen = 0
956
961
957 available, readcount, wanted = self._decoder.decode(b)
962 available, readcount, wanted = self._decoder.decode(b)
958 self._wanted = wanted
963 self._wanted = wanted
959
964
960 if readcount < len(b):
965 if readcount < len(b):
961 self._chunks.append(b[readcount:])
966 self._chunks.append(b[readcount:])
962
967
963 return available, readcount - oldlen, wanted
968 return available, readcount - oldlen, wanted
964
969
965 def getavailable(self):
970 def getavailable(self):
966 return self._decoder.getavailable()
971 return self._decoder.getavailable()
967
972
968 def decodeall(b):
973 def decodeall(b):
969 """Decode all CBOR items present in an iterable of bytes.
974 """Decode all CBOR items present in an iterable of bytes.
970
975
971 In addition to regular decode errors, raises CBORDecodeError if the
976 In addition to regular decode errors, raises CBORDecodeError if the
972 entirety of the passed buffer does not fully decode to complete CBOR
977 entirety of the passed buffer does not fully decode to complete CBOR
973 values. This includes failure to decode any value, incomplete collection
978 values. This includes failure to decode any value, incomplete collection
974 types, incomplete indefinite length items, and extra data at the end of
979 types, incomplete indefinite length items, and extra data at the end of
975 the buffer.
980 the buffer.
976 """
981 """
977 if not b:
982 if not b:
978 return []
983 return []
979
984
980 decoder = sansiodecoder()
985 decoder = sansiodecoder()
981
986
982 havevalues, readcount, wantbytes = decoder.decode(b)
987 havevalues, readcount, wantbytes = decoder.decode(b)
983
988
984 if readcount != len(b):
989 if readcount != len(b):
985 raise CBORDecodeError('input data not fully consumed')
990 raise CBORDecodeError('input data not fully consumed')
986
991
987 if decoder.inprogress:
992 if decoder.inprogress:
988 raise CBORDecodeError('input data not complete')
993 raise CBORDecodeError('input data not complete')
989
994
990 return decoder.getavailable()
995 return decoder.getavailable()
@@ -1,984 +1,992 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import unittest
3 import unittest
4
4
5 from mercurial.thirdparty import (
5 from mercurial.thirdparty import (
6 cbor,
6 cbor,
7 )
7 )
8 from mercurial.utils import (
8 from mercurial.utils import (
9 cborutil,
9 cborutil,
10 )
10 )
11
11
12 class TestCase(unittest.TestCase):
12 class TestCase(unittest.TestCase):
13 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
13 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
14 # Python 3.7 deprecates the regex*p* version, but 2.7 lacks
14 # Python 3.7 deprecates the regex*p* version, but 2.7 lacks
15 # the regex version.
15 # the regex version.
16 assertRaisesRegex = (# camelcase-required
16 assertRaisesRegex = (# camelcase-required
17 unittest.TestCase.assertRaisesRegexp)
17 unittest.TestCase.assertRaisesRegexp)
18
18
19 def loadit(it):
19 def loadit(it):
20 return cbor.loads(b''.join(it))
20 return cbor.loads(b''.join(it))
21
21
22 class BytestringTests(TestCase):
22 class BytestringTests(TestCase):
23 def testsimple(self):
23 def testsimple(self):
24 self.assertEqual(
24 self.assertEqual(
25 list(cborutil.streamencode(b'foobar')),
25 list(cborutil.streamencode(b'foobar')),
26 [b'\x46', b'foobar'])
26 [b'\x46', b'foobar'])
27
27
28 self.assertEqual(
28 self.assertEqual(
29 loadit(cborutil.streamencode(b'foobar')),
29 loadit(cborutil.streamencode(b'foobar')),
30 b'foobar')
30 b'foobar')
31
31
32 self.assertEqual(cborutil.decodeall(b'\x46foobar'),
32 self.assertEqual(cborutil.decodeall(b'\x46foobar'),
33 [b'foobar'])
33 [b'foobar'])
34
34
35 self.assertEqual(cborutil.decodeall(b'\x46foobar\x45fizbi'),
35 self.assertEqual(cborutil.decodeall(b'\x46foobar\x45fizbi'),
36 [b'foobar', b'fizbi'])
36 [b'foobar', b'fizbi'])
37
37
38 def testlong(self):
38 def testlong(self):
39 source = b'x' * 1048576
39 source = b'x' * 1048576
40
40
41 self.assertEqual(loadit(cborutil.streamencode(source)), source)
41 self.assertEqual(loadit(cborutil.streamencode(source)), source)
42
42
43 encoded = b''.join(cborutil.streamencode(source))
43 encoded = b''.join(cborutil.streamencode(source))
44 self.assertEqual(cborutil.decodeall(encoded), [source])
44 self.assertEqual(cborutil.decodeall(encoded), [source])
45
45
46 def testfromiter(self):
46 def testfromiter(self):
47 # This is the example from RFC 7049 Section 2.2.2.
47 # This is the example from RFC 7049 Section 2.2.2.
48 source = [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99']
48 source = [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99']
49
49
50 self.assertEqual(
50 self.assertEqual(
51 list(cborutil.streamencodebytestringfromiter(source)),
51 list(cborutil.streamencodebytestringfromiter(source)),
52 [
52 [
53 b'\x5f',
53 b'\x5f',
54 b'\x44',
54 b'\x44',
55 b'\xaa\xbb\xcc\xdd',
55 b'\xaa\xbb\xcc\xdd',
56 b'\x43',
56 b'\x43',
57 b'\xee\xff\x99',
57 b'\xee\xff\x99',
58 b'\xff',
58 b'\xff',
59 ])
59 ])
60
60
61 self.assertEqual(
61 self.assertEqual(
62 loadit(cborutil.streamencodebytestringfromiter(source)),
62 loadit(cborutil.streamencodebytestringfromiter(source)),
63 b''.join(source))
63 b''.join(source))
64
64
65 self.assertEqual(cborutil.decodeall(b'\x5f\x44\xaa\xbb\xcc\xdd'
65 self.assertEqual(cborutil.decodeall(b'\x5f\x44\xaa\xbb\xcc\xdd'
66 b'\x43\xee\xff\x99\xff'),
66 b'\x43\xee\xff\x99\xff'),
67 [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99', b''])
67 [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99', b''])
68
68
69 for i, chunk in enumerate(
69 for i, chunk in enumerate(
70 cborutil.decodeall(b'\x5f\x44\xaa\xbb\xcc\xdd'
70 cborutil.decodeall(b'\x5f\x44\xaa\xbb\xcc\xdd'
71 b'\x43\xee\xff\x99\xff')):
71 b'\x43\xee\xff\x99\xff')):
72 self.assertIsInstance(chunk, cborutil.bytestringchunk)
72 self.assertIsInstance(chunk, cborutil.bytestringchunk)
73
73
74 if i == 0:
74 if i == 0:
75 self.assertTrue(chunk.isfirst)
75 self.assertTrue(chunk.isfirst)
76 else:
76 else:
77 self.assertFalse(chunk.isfirst)
77 self.assertFalse(chunk.isfirst)
78
78
79 if i == 2:
79 if i == 2:
80 self.assertTrue(chunk.islast)
80 self.assertTrue(chunk.islast)
81 else:
81 else:
82 self.assertFalse(chunk.islast)
82 self.assertFalse(chunk.islast)
83
83
84 def testfromiterlarge(self):
84 def testfromiterlarge(self):
85 source = [b'a' * 16, b'b' * 128, b'c' * 1024, b'd' * 1048576]
85 source = [b'a' * 16, b'b' * 128, b'c' * 1024, b'd' * 1048576]
86
86
87 self.assertEqual(
87 self.assertEqual(
88 loadit(cborutil.streamencodebytestringfromiter(source)),
88 loadit(cborutil.streamencodebytestringfromiter(source)),
89 b''.join(source))
89 b''.join(source))
90
90
91 def testindefinite(self):
91 def testindefinite(self):
92 source = b'\x00\x01\x02\x03' + b'\xff' * 16384
92 source = b'\x00\x01\x02\x03' + b'\xff' * 16384
93
93
94 it = cborutil.streamencodeindefinitebytestring(source, chunksize=2)
94 it = cborutil.streamencodeindefinitebytestring(source, chunksize=2)
95
95
96 self.assertEqual(next(it), b'\x5f')
96 self.assertEqual(next(it), b'\x5f')
97 self.assertEqual(next(it), b'\x42')
97 self.assertEqual(next(it), b'\x42')
98 self.assertEqual(next(it), b'\x00\x01')
98 self.assertEqual(next(it), b'\x00\x01')
99 self.assertEqual(next(it), b'\x42')
99 self.assertEqual(next(it), b'\x42')
100 self.assertEqual(next(it), b'\x02\x03')
100 self.assertEqual(next(it), b'\x02\x03')
101 self.assertEqual(next(it), b'\x42')
101 self.assertEqual(next(it), b'\x42')
102 self.assertEqual(next(it), b'\xff\xff')
102 self.assertEqual(next(it), b'\xff\xff')
103
103
104 dest = b''.join(cborutil.streamencodeindefinitebytestring(
104 dest = b''.join(cborutil.streamencodeindefinitebytestring(
105 source, chunksize=42))
105 source, chunksize=42))
106 self.assertEqual(cbor.loads(dest), source)
106 self.assertEqual(cbor.loads(dest), source)
107
107
108 self.assertEqual(b''.join(cborutil.decodeall(dest)), source)
108 self.assertEqual(b''.join(cborutil.decodeall(dest)), source)
109
109
110 for chunk in cborutil.decodeall(dest):
110 for chunk in cborutil.decodeall(dest):
111 self.assertIsInstance(chunk, cborutil.bytestringchunk)
111 self.assertIsInstance(chunk, cborutil.bytestringchunk)
112 self.assertIn(len(chunk), (0, 8, 42))
112 self.assertIn(len(chunk), (0, 8, 42))
113
113
114 encoded = b'\x5f\xff'
114 encoded = b'\x5f\xff'
115 b = cborutil.decodeall(encoded)
115 b = cborutil.decodeall(encoded)
116 self.assertEqual(b, [b''])
116 self.assertEqual(b, [b''])
117 self.assertTrue(b[0].isfirst)
117 self.assertTrue(b[0].isfirst)
118 self.assertTrue(b[0].islast)
118 self.assertTrue(b[0].islast)
119
119
120 def testdecodevariouslengths(self):
120 def testdecodevariouslengths(self):
121 for i in (0, 1, 22, 23, 24, 25, 254, 255, 256, 65534, 65535, 65536):
121 for i in (0, 1, 22, 23, 24, 25, 254, 255, 256, 65534, 65535, 65536):
122 source = b'x' * i
122 source = b'x' * i
123 encoded = b''.join(cborutil.streamencode(source))
123 encoded = b''.join(cborutil.streamencode(source))
124
124
125 if len(source) < 24:
125 if len(source) < 24:
126 hlen = 1
126 hlen = 1
127 elif len(source) < 256:
127 elif len(source) < 256:
128 hlen = 2
128 hlen = 2
129 elif len(source) < 65536:
129 elif len(source) < 65536:
130 hlen = 3
130 hlen = 3
131 elif len(source) < 1048576:
131 elif len(source) < 1048576:
132 hlen = 5
132 hlen = 5
133
133
134 self.assertEqual(cborutil.decodeitem(encoded),
134 self.assertEqual(cborutil.decodeitem(encoded),
135 (True, source, hlen + len(source),
135 (True, source, hlen + len(source),
136 cborutil.SPECIAL_NONE))
136 cborutil.SPECIAL_NONE))
137
137
138 def testpartialdecode(self):
138 def testpartialdecode(self):
139 encoded = b''.join(cborutil.streamencode(b'foobar'))
139 encoded = b''.join(cborutil.streamencode(b'foobar'))
140
140
141 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
141 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
142 (False, None, -6, cborutil.SPECIAL_NONE))
142 (False, None, -6, cborutil.SPECIAL_NONE))
143 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
143 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
144 (False, None, -5, cborutil.SPECIAL_NONE))
144 (False, None, -5, cborutil.SPECIAL_NONE))
145 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
145 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
146 (False, None, -4, cborutil.SPECIAL_NONE))
146 (False, None, -4, cborutil.SPECIAL_NONE))
147 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
147 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
148 (False, None, -3, cborutil.SPECIAL_NONE))
148 (False, None, -3, cborutil.SPECIAL_NONE))
149 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
149 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
150 (False, None, -2, cborutil.SPECIAL_NONE))
150 (False, None, -2, cborutil.SPECIAL_NONE))
151 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
151 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
152 (False, None, -1, cborutil.SPECIAL_NONE))
152 (False, None, -1, cborutil.SPECIAL_NONE))
153 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
153 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
154 (True, b'foobar', 7, cborutil.SPECIAL_NONE))
154 (True, b'foobar', 7, cborutil.SPECIAL_NONE))
155
155
156 def testpartialdecodevariouslengths(self):
156 def testpartialdecodevariouslengths(self):
157 lens = [
157 lens = [
158 2,
158 2,
159 3,
159 3,
160 10,
160 10,
161 23,
161 23,
162 24,
162 24,
163 25,
163 25,
164 31,
164 31,
165 100,
165 100,
166 254,
166 254,
167 255,
167 255,
168 256,
168 256,
169 257,
169 257,
170 16384,
170 16384,
171 65534,
171 65534,
172 65535,
172 65535,
173 65536,
173 65536,
174 65537,
174 65537,
175 131071,
175 131071,
176 131072,
176 131072,
177 131073,
177 131073,
178 1048575,
178 1048575,
179 1048576,
179 1048576,
180 1048577,
180 1048577,
181 ]
181 ]
182
182
183 for size in lens:
183 for size in lens:
184 if size < 24:
184 if size < 24:
185 hlen = 1
185 hlen = 1
186 elif size < 2**8:
186 elif size < 2**8:
187 hlen = 2
187 hlen = 2
188 elif size < 2**16:
188 elif size < 2**16:
189 hlen = 3
189 hlen = 3
190 elif size < 2**32:
190 elif size < 2**32:
191 hlen = 5
191 hlen = 5
192 else:
192 else:
193 assert False
193 assert False
194
194
195 source = b'x' * size
195 source = b'x' * size
196 encoded = b''.join(cborutil.streamencode(source))
196 encoded = b''.join(cborutil.streamencode(source))
197
197
198 res = cborutil.decodeitem(encoded[0:1])
198 res = cborutil.decodeitem(encoded[0:1])
199
199
200 if hlen > 1:
200 if hlen > 1:
201 self.assertEqual(res, (False, None, -(hlen - 1),
201 self.assertEqual(res, (False, None, -(hlen - 1),
202 cborutil.SPECIAL_NONE))
202 cborutil.SPECIAL_NONE))
203 else:
203 else:
204 self.assertEqual(res, (False, None, -(size + hlen - 1),
204 self.assertEqual(res, (False, None, -(size + hlen - 1),
205 cborutil.SPECIAL_NONE))
205 cborutil.SPECIAL_NONE))
206
206
207 # Decoding partial header reports remaining header size.
207 # Decoding partial header reports remaining header size.
208 for i in range(hlen - 1):
208 for i in range(hlen - 1):
209 self.assertEqual(cborutil.decodeitem(encoded[0:i + 1]),
209 self.assertEqual(cborutil.decodeitem(encoded[0:i + 1]),
210 (False, None, -(hlen - i - 1),
210 (False, None, -(hlen - i - 1),
211 cborutil.SPECIAL_NONE))
211 cborutil.SPECIAL_NONE))
212
212
213 # Decoding complete header reports item size.
213 # Decoding complete header reports item size.
214 self.assertEqual(cborutil.decodeitem(encoded[0:hlen]),
214 self.assertEqual(cborutil.decodeitem(encoded[0:hlen]),
215 (False, None, -size, cborutil.SPECIAL_NONE))
215 (False, None, -size, cborutil.SPECIAL_NONE))
216
216
217 # Decoding single byte after header reports item size - 1
217 # Decoding single byte after header reports item size - 1
218 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + 1]),
218 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + 1]),
219 (False, None, -(size - 1), cborutil.SPECIAL_NONE))
219 (False, None, -(size - 1), cborutil.SPECIAL_NONE))
220
220
221 # Decoding all but the last byte reports -1 needed.
221 # Decoding all but the last byte reports -1 needed.
222 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + size - 1]),
222 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + size - 1]),
223 (False, None, -1, cborutil.SPECIAL_NONE))
223 (False, None, -1, cborutil.SPECIAL_NONE))
224
224
225 # Decoding last byte retrieves value.
225 # Decoding last byte retrieves value.
226 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + size]),
226 self.assertEqual(cborutil.decodeitem(encoded[0:hlen + size]),
227 (True, source, hlen + size, cborutil.SPECIAL_NONE))
227 (True, source, hlen + size, cborutil.SPECIAL_NONE))
228
228
229 def testindefinitepartialdecode(self):
229 def testindefinitepartialdecode(self):
230 encoded = b''.join(cborutil.streamencodebytestringfromiter(
230 encoded = b''.join(cborutil.streamencodebytestringfromiter(
231 [b'foobar', b'biz']))
231 [b'foobar', b'biz']))
232
232
233 # First item should be begin of bytestring special.
233 # First item should be begin of bytestring special.
234 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
234 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
235 (True, None, 1,
235 (True, None, 1,
236 cborutil.SPECIAL_START_INDEFINITE_BYTESTRING))
236 cborutil.SPECIAL_START_INDEFINITE_BYTESTRING))
237
237
238 # Second item should be the first chunk. But only available when
238 # Second item should be the first chunk. But only available when
239 # we give it 7 bytes (1 byte header + 6 byte chunk).
239 # we give it 7 bytes (1 byte header + 6 byte chunk).
240 self.assertEqual(cborutil.decodeitem(encoded[1:2]),
240 self.assertEqual(cborutil.decodeitem(encoded[1:2]),
241 (False, None, -6, cborutil.SPECIAL_NONE))
241 (False, None, -6, cborutil.SPECIAL_NONE))
242 self.assertEqual(cborutil.decodeitem(encoded[1:3]),
242 self.assertEqual(cborutil.decodeitem(encoded[1:3]),
243 (False, None, -5, cborutil.SPECIAL_NONE))
243 (False, None, -5, cborutil.SPECIAL_NONE))
244 self.assertEqual(cborutil.decodeitem(encoded[1:4]),
244 self.assertEqual(cborutil.decodeitem(encoded[1:4]),
245 (False, None, -4, cborutil.SPECIAL_NONE))
245 (False, None, -4, cborutil.SPECIAL_NONE))
246 self.assertEqual(cborutil.decodeitem(encoded[1:5]),
246 self.assertEqual(cborutil.decodeitem(encoded[1:5]),
247 (False, None, -3, cborutil.SPECIAL_NONE))
247 (False, None, -3, cborutil.SPECIAL_NONE))
248 self.assertEqual(cborutil.decodeitem(encoded[1:6]),
248 self.assertEqual(cborutil.decodeitem(encoded[1:6]),
249 (False, None, -2, cborutil.SPECIAL_NONE))
249 (False, None, -2, cborutil.SPECIAL_NONE))
250 self.assertEqual(cborutil.decodeitem(encoded[1:7]),
250 self.assertEqual(cborutil.decodeitem(encoded[1:7]),
251 (False, None, -1, cborutil.SPECIAL_NONE))
251 (False, None, -1, cborutil.SPECIAL_NONE))
252
252
253 self.assertEqual(cborutil.decodeitem(encoded[1:8]),
253 self.assertEqual(cborutil.decodeitem(encoded[1:8]),
254 (True, b'foobar', 7, cborutil.SPECIAL_NONE))
254 (True, b'foobar', 7, cborutil.SPECIAL_NONE))
255
255
256 # Third item should be second chunk. But only available when
256 # Third item should be second chunk. But only available when
257 # we give it 4 bytes (1 byte header + 3 byte chunk).
257 # we give it 4 bytes (1 byte header + 3 byte chunk).
258 self.assertEqual(cborutil.decodeitem(encoded[8:9]),
258 self.assertEqual(cborutil.decodeitem(encoded[8:9]),
259 (False, None, -3, cborutil.SPECIAL_NONE))
259 (False, None, -3, cborutil.SPECIAL_NONE))
260 self.assertEqual(cborutil.decodeitem(encoded[8:10]),
260 self.assertEqual(cborutil.decodeitem(encoded[8:10]),
261 (False, None, -2, cborutil.SPECIAL_NONE))
261 (False, None, -2, cborutil.SPECIAL_NONE))
262 self.assertEqual(cborutil.decodeitem(encoded[8:11]),
262 self.assertEqual(cborutil.decodeitem(encoded[8:11]),
263 (False, None, -1, cborutil.SPECIAL_NONE))
263 (False, None, -1, cborutil.SPECIAL_NONE))
264
264
265 self.assertEqual(cborutil.decodeitem(encoded[8:12]),
265 self.assertEqual(cborutil.decodeitem(encoded[8:12]),
266 (True, b'biz', 4, cborutil.SPECIAL_NONE))
266 (True, b'biz', 4, cborutil.SPECIAL_NONE))
267
267
268 # Fourth item should be end of indefinite stream marker.
268 # Fourth item should be end of indefinite stream marker.
269 self.assertEqual(cborutil.decodeitem(encoded[12:13]),
269 self.assertEqual(cborutil.decodeitem(encoded[12:13]),
270 (True, None, 1, cborutil.SPECIAL_INDEFINITE_BREAK))
270 (True, None, 1, cborutil.SPECIAL_INDEFINITE_BREAK))
271
271
272 # Now test the behavior when going through the decoder.
272 # Now test the behavior when going through the decoder.
273
273
274 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:1]),
274 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:1]),
275 (False, 1, 0))
275 (False, 1, 0))
276 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:2]),
276 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:2]),
277 (False, 1, 6))
277 (False, 1, 6))
278 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:3]),
278 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:3]),
279 (False, 1, 5))
279 (False, 1, 5))
280 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:4]),
280 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:4]),
281 (False, 1, 4))
281 (False, 1, 4))
282 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:5]),
282 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:5]),
283 (False, 1, 3))
283 (False, 1, 3))
284 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:6]),
284 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:6]),
285 (False, 1, 2))
285 (False, 1, 2))
286 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:7]),
286 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:7]),
287 (False, 1, 1))
287 (False, 1, 1))
288 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:8]),
288 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:8]),
289 (True, 8, 0))
289 (True, 8, 0))
290
290
291 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:9]),
291 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:9]),
292 (True, 8, 3))
292 (True, 8, 3))
293 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:10]),
293 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:10]),
294 (True, 8, 2))
294 (True, 8, 2))
295 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:11]),
295 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:11]),
296 (True, 8, 1))
296 (True, 8, 1))
297 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:12]),
297 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:12]),
298 (True, 12, 0))
298 (True, 12, 0))
299
299
300 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:13]),
300 self.assertEqual(cborutil.sansiodecoder().decode(encoded[0:13]),
301 (True, 13, 0))
301 (True, 13, 0))
302
302
303 decoder = cborutil.sansiodecoder()
303 decoder = cborutil.sansiodecoder()
304 decoder.decode(encoded[0:8])
304 decoder.decode(encoded[0:8])
305 values = decoder.getavailable()
305 values = decoder.getavailable()
306 self.assertEqual(values, [b'foobar'])
306 self.assertEqual(values, [b'foobar'])
307 self.assertTrue(values[0].isfirst)
307 self.assertTrue(values[0].isfirst)
308 self.assertFalse(values[0].islast)
308 self.assertFalse(values[0].islast)
309
309
310 self.assertEqual(decoder.decode(encoded[8:12]),
310 self.assertEqual(decoder.decode(encoded[8:12]),
311 (True, 4, 0))
311 (True, 4, 0))
312 values = decoder.getavailable()
312 values = decoder.getavailable()
313 self.assertEqual(values, [b'biz'])
313 self.assertEqual(values, [b'biz'])
314 self.assertFalse(values[0].isfirst)
314 self.assertFalse(values[0].isfirst)
315 self.assertFalse(values[0].islast)
315 self.assertFalse(values[0].islast)
316
316
317 self.assertEqual(decoder.decode(encoded[12:]),
317 self.assertEqual(decoder.decode(encoded[12:]),
318 (True, 1, 0))
318 (True, 1, 0))
319 values = decoder.getavailable()
319 values = decoder.getavailable()
320 self.assertEqual(values, [b''])
320 self.assertEqual(values, [b''])
321 self.assertFalse(values[0].isfirst)
321 self.assertFalse(values[0].isfirst)
322 self.assertTrue(values[0].islast)
322 self.assertTrue(values[0].islast)
323
323
324 class StringTests(TestCase):
324 class StringTests(TestCase):
325 def testdecodeforbidden(self):
325 def testdecodeforbidden(self):
326 encoded = b'\x63foo'
326 encoded = b'\x63foo'
327 with self.assertRaisesRegex(cborutil.CBORDecodeError,
327 with self.assertRaisesRegex(cborutil.CBORDecodeError,
328 'string major type not supported'):
328 'string major type not supported'):
329 cborutil.decodeall(encoded)
329 cborutil.decodeall(encoded)
330
330
331 class IntTests(TestCase):
331 class IntTests(TestCase):
332 def testsmall(self):
332 def testsmall(self):
333 self.assertEqual(list(cborutil.streamencode(0)), [b'\x00'])
333 self.assertEqual(list(cborutil.streamencode(0)), [b'\x00'])
334 self.assertEqual(cborutil.decodeall(b'\x00'), [0])
334 self.assertEqual(cborutil.decodeall(b'\x00'), [0])
335
335
336 self.assertEqual(list(cborutil.streamencode(1)), [b'\x01'])
336 self.assertEqual(list(cborutil.streamencode(1)), [b'\x01'])
337 self.assertEqual(cborutil.decodeall(b'\x01'), [1])
337 self.assertEqual(cborutil.decodeall(b'\x01'), [1])
338
338
339 self.assertEqual(list(cborutil.streamencode(2)), [b'\x02'])
339 self.assertEqual(list(cborutil.streamencode(2)), [b'\x02'])
340 self.assertEqual(cborutil.decodeall(b'\x02'), [2])
340 self.assertEqual(cborutil.decodeall(b'\x02'), [2])
341
341
342 self.assertEqual(list(cborutil.streamencode(3)), [b'\x03'])
342 self.assertEqual(list(cborutil.streamencode(3)), [b'\x03'])
343 self.assertEqual(cborutil.decodeall(b'\x03'), [3])
343 self.assertEqual(cborutil.decodeall(b'\x03'), [3])
344
344
345 self.assertEqual(list(cborutil.streamencode(4)), [b'\x04'])
345 self.assertEqual(list(cborutil.streamencode(4)), [b'\x04'])
346 self.assertEqual(cborutil.decodeall(b'\x04'), [4])
346 self.assertEqual(cborutil.decodeall(b'\x04'), [4])
347
347
348 # Multiple value decode works.
348 # Multiple value decode works.
349 self.assertEqual(cborutil.decodeall(b'\x00\x01\x02\x03\x04'),
349 self.assertEqual(cborutil.decodeall(b'\x00\x01\x02\x03\x04'),
350 [0, 1, 2, 3, 4])
350 [0, 1, 2, 3, 4])
351
351
352 def testnegativesmall(self):
352 def testnegativesmall(self):
353 self.assertEqual(list(cborutil.streamencode(-1)), [b'\x20'])
353 self.assertEqual(list(cborutil.streamencode(-1)), [b'\x20'])
354 self.assertEqual(cborutil.decodeall(b'\x20'), [-1])
354 self.assertEqual(cborutil.decodeall(b'\x20'), [-1])
355
355
356 self.assertEqual(list(cborutil.streamencode(-2)), [b'\x21'])
356 self.assertEqual(list(cborutil.streamencode(-2)), [b'\x21'])
357 self.assertEqual(cborutil.decodeall(b'\x21'), [-2])
357 self.assertEqual(cborutil.decodeall(b'\x21'), [-2])
358
358
359 self.assertEqual(list(cborutil.streamencode(-3)), [b'\x22'])
359 self.assertEqual(list(cborutil.streamencode(-3)), [b'\x22'])
360 self.assertEqual(cborutil.decodeall(b'\x22'), [-3])
360 self.assertEqual(cborutil.decodeall(b'\x22'), [-3])
361
361
362 self.assertEqual(list(cborutil.streamencode(-4)), [b'\x23'])
362 self.assertEqual(list(cborutil.streamencode(-4)), [b'\x23'])
363 self.assertEqual(cborutil.decodeall(b'\x23'), [-4])
363 self.assertEqual(cborutil.decodeall(b'\x23'), [-4])
364
364
365 self.assertEqual(list(cborutil.streamencode(-5)), [b'\x24'])
365 self.assertEqual(list(cborutil.streamencode(-5)), [b'\x24'])
366 self.assertEqual(cborutil.decodeall(b'\x24'), [-5])
366 self.assertEqual(cborutil.decodeall(b'\x24'), [-5])
367
367
368 # Multiple value decode works.
368 # Multiple value decode works.
369 self.assertEqual(cborutil.decodeall(b'\x20\x21\x22\x23\x24'),
369 self.assertEqual(cborutil.decodeall(b'\x20\x21\x22\x23\x24'),
370 [-1, -2, -3, -4, -5])
370 [-1, -2, -3, -4, -5])
371
371
372 def testrange(self):
372 def testrange(self):
373 for i in range(-70000, 70000, 10):
373 for i in range(-70000, 70000, 10):
374 encoded = b''.join(cborutil.streamencode(i))
374 encoded = b''.join(cborutil.streamencode(i))
375
375
376 self.assertEqual(encoded, cbor.dumps(i))
376 self.assertEqual(encoded, cbor.dumps(i))
377 self.assertEqual(cborutil.decodeall(encoded), [i])
377 self.assertEqual(cborutil.decodeall(encoded), [i])
378
378
379 def testdecodepartialubyte(self):
379 def testdecodepartialubyte(self):
380 encoded = b''.join(cborutil.streamencode(250))
380 encoded = b''.join(cborutil.streamencode(250))
381
381
382 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
382 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
383 (False, None, -1, cborutil.SPECIAL_NONE))
383 (False, None, -1, cborutil.SPECIAL_NONE))
384 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
384 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
385 (True, 250, 2, cborutil.SPECIAL_NONE))
385 (True, 250, 2, cborutil.SPECIAL_NONE))
386
386
387 def testdecodepartialbyte(self):
387 def testdecodepartialbyte(self):
388 encoded = b''.join(cborutil.streamencode(-42))
388 encoded = b''.join(cborutil.streamencode(-42))
389 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
389 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
390 (False, None, -1, cborutil.SPECIAL_NONE))
390 (False, None, -1, cborutil.SPECIAL_NONE))
391 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
391 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
392 (True, -42, 2, cborutil.SPECIAL_NONE))
392 (True, -42, 2, cborutil.SPECIAL_NONE))
393
393
394 def testdecodepartialushort(self):
394 def testdecodepartialushort(self):
395 encoded = b''.join(cborutil.streamencode(2**15))
395 encoded = b''.join(cborutil.streamencode(2**15))
396
396
397 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
397 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
398 (False, None, -2, cborutil.SPECIAL_NONE))
398 (False, None, -2, cborutil.SPECIAL_NONE))
399 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
399 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
400 (False, None, -1, cborutil.SPECIAL_NONE))
400 (False, None, -1, cborutil.SPECIAL_NONE))
401 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
401 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
402 (True, 2**15, 3, cborutil.SPECIAL_NONE))
402 (True, 2**15, 3, cborutil.SPECIAL_NONE))
403
403
404 def testdecodepartialshort(self):
404 def testdecodepartialshort(self):
405 encoded = b''.join(cborutil.streamencode(-1024))
405 encoded = b''.join(cborutil.streamencode(-1024))
406
406
407 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
407 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
408 (False, None, -2, cborutil.SPECIAL_NONE))
408 (False, None, -2, cborutil.SPECIAL_NONE))
409 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
409 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
410 (False, None, -1, cborutil.SPECIAL_NONE))
410 (False, None, -1, cborutil.SPECIAL_NONE))
411 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
411 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
412 (True, -1024, 3, cborutil.SPECIAL_NONE))
412 (True, -1024, 3, cborutil.SPECIAL_NONE))
413
413
414 def testdecodepartialulong(self):
414 def testdecodepartialulong(self):
415 encoded = b''.join(cborutil.streamencode(2**28))
415 encoded = b''.join(cborutil.streamencode(2**28))
416
416
417 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
417 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
418 (False, None, -4, cborutil.SPECIAL_NONE))
418 (False, None, -4, cborutil.SPECIAL_NONE))
419 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
419 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
420 (False, None, -3, cborutil.SPECIAL_NONE))
420 (False, None, -3, cborutil.SPECIAL_NONE))
421 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
421 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
422 (False, None, -2, cborutil.SPECIAL_NONE))
422 (False, None, -2, cborutil.SPECIAL_NONE))
423 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
423 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
424 (False, None, -1, cborutil.SPECIAL_NONE))
424 (False, None, -1, cborutil.SPECIAL_NONE))
425 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
425 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
426 (True, 2**28, 5, cborutil.SPECIAL_NONE))
426 (True, 2**28, 5, cborutil.SPECIAL_NONE))
427
427
428 def testdecodepartiallong(self):
428 def testdecodepartiallong(self):
429 encoded = b''.join(cborutil.streamencode(-1048580))
429 encoded = b''.join(cborutil.streamencode(-1048580))
430
430
431 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
431 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
432 (False, None, -4, cborutil.SPECIAL_NONE))
432 (False, None, -4, cborutil.SPECIAL_NONE))
433 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
433 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
434 (False, None, -3, cborutil.SPECIAL_NONE))
434 (False, None, -3, cborutil.SPECIAL_NONE))
435 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
435 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
436 (False, None, -2, cborutil.SPECIAL_NONE))
436 (False, None, -2, cborutil.SPECIAL_NONE))
437 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
437 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
438 (False, None, -1, cborutil.SPECIAL_NONE))
438 (False, None, -1, cborutil.SPECIAL_NONE))
439 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
439 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
440 (True, -1048580, 5, cborutil.SPECIAL_NONE))
440 (True, -1048580, 5, cborutil.SPECIAL_NONE))
441
441
442 def testdecodepartialulonglong(self):
442 def testdecodepartialulonglong(self):
443 encoded = b''.join(cborutil.streamencode(2**32))
443 encoded = b''.join(cborutil.streamencode(2**32))
444
444
445 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
445 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
446 (False, None, -8, cborutil.SPECIAL_NONE))
446 (False, None, -8, cborutil.SPECIAL_NONE))
447 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
447 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
448 (False, None, -7, cborutil.SPECIAL_NONE))
448 (False, None, -7, cborutil.SPECIAL_NONE))
449 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
449 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
450 (False, None, -6, cborutil.SPECIAL_NONE))
450 (False, None, -6, cborutil.SPECIAL_NONE))
451 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
451 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
452 (False, None, -5, cborutil.SPECIAL_NONE))
452 (False, None, -5, cborutil.SPECIAL_NONE))
453 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
453 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
454 (False, None, -4, cborutil.SPECIAL_NONE))
454 (False, None, -4, cborutil.SPECIAL_NONE))
455 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
455 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
456 (False, None, -3, cborutil.SPECIAL_NONE))
456 (False, None, -3, cborutil.SPECIAL_NONE))
457 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
457 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
458 (False, None, -2, cborutil.SPECIAL_NONE))
458 (False, None, -2, cborutil.SPECIAL_NONE))
459 self.assertEqual(cborutil.decodeitem(encoded[0:8]),
459 self.assertEqual(cborutil.decodeitem(encoded[0:8]),
460 (False, None, -1, cborutil.SPECIAL_NONE))
460 (False, None, -1, cborutil.SPECIAL_NONE))
461 self.assertEqual(cborutil.decodeitem(encoded[0:9]),
461 self.assertEqual(cborutil.decodeitem(encoded[0:9]),
462 (True, 2**32, 9, cborutil.SPECIAL_NONE))
462 (True, 2**32, 9, cborutil.SPECIAL_NONE))
463
463
464 with self.assertRaisesRegex(
464 with self.assertRaisesRegex(
465 cborutil.CBORDecodeError, 'input data not fully consumed'):
465 cborutil.CBORDecodeError, 'input data not fully consumed'):
466 cborutil.decodeall(encoded[0:1])
466 cborutil.decodeall(encoded[0:1])
467
467
468 with self.assertRaisesRegex(
468 with self.assertRaisesRegex(
469 cborutil.CBORDecodeError, 'input data not fully consumed'):
469 cborutil.CBORDecodeError, 'input data not fully consumed'):
470 cborutil.decodeall(encoded[0:2])
470 cborutil.decodeall(encoded[0:2])
471
471
472 def testdecodepartiallonglong(self):
472 def testdecodepartiallonglong(self):
473 encoded = b''.join(cborutil.streamencode(-7000000000))
473 encoded = b''.join(cborutil.streamencode(-7000000000))
474
474
475 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
475 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
476 (False, None, -8, cborutil.SPECIAL_NONE))
476 (False, None, -8, cborutil.SPECIAL_NONE))
477 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
477 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
478 (False, None, -7, cborutil.SPECIAL_NONE))
478 (False, None, -7, cborutil.SPECIAL_NONE))
479 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
479 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
480 (False, None, -6, cborutil.SPECIAL_NONE))
480 (False, None, -6, cborutil.SPECIAL_NONE))
481 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
481 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
482 (False, None, -5, cborutil.SPECIAL_NONE))
482 (False, None, -5, cborutil.SPECIAL_NONE))
483 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
483 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
484 (False, None, -4, cborutil.SPECIAL_NONE))
484 (False, None, -4, cborutil.SPECIAL_NONE))
485 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
485 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
486 (False, None, -3, cborutil.SPECIAL_NONE))
486 (False, None, -3, cborutil.SPECIAL_NONE))
487 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
487 self.assertEqual(cborutil.decodeitem(encoded[0:7]),
488 (False, None, -2, cborutil.SPECIAL_NONE))
488 (False, None, -2, cborutil.SPECIAL_NONE))
489 self.assertEqual(cborutil.decodeitem(encoded[0:8]),
489 self.assertEqual(cborutil.decodeitem(encoded[0:8]),
490 (False, None, -1, cborutil.SPECIAL_NONE))
490 (False, None, -1, cborutil.SPECIAL_NONE))
491 self.assertEqual(cborutil.decodeitem(encoded[0:9]),
491 self.assertEqual(cborutil.decodeitem(encoded[0:9]),
492 (True, -7000000000, 9, cborutil.SPECIAL_NONE))
492 (True, -7000000000, 9, cborutil.SPECIAL_NONE))
493
493
494 class ArrayTests(TestCase):
494 class ArrayTests(TestCase):
495 def testempty(self):
495 def testempty(self):
496 self.assertEqual(list(cborutil.streamencode([])), [b'\x80'])
496 self.assertEqual(list(cborutil.streamencode([])), [b'\x80'])
497 self.assertEqual(loadit(cborutil.streamencode([])), [])
497 self.assertEqual(loadit(cborutil.streamencode([])), [])
498
498
499 self.assertEqual(cborutil.decodeall(b'\x80'), [[]])
499 self.assertEqual(cborutil.decodeall(b'\x80'), [[]])
500
500
501 def testbasic(self):
501 def testbasic(self):
502 source = [b'foo', b'bar', 1, -10]
502 source = [b'foo', b'bar', 1, -10]
503
503
504 chunks = [
504 chunks = [
505 b'\x84', b'\x43', b'foo', b'\x43', b'bar', b'\x01', b'\x29']
505 b'\x84', b'\x43', b'foo', b'\x43', b'bar', b'\x01', b'\x29']
506
506
507 self.assertEqual(list(cborutil.streamencode(source)), chunks)
507 self.assertEqual(list(cborutil.streamencode(source)), chunks)
508
508
509 self.assertEqual(cborutil.decodeall(b''.join(chunks)), [source])
509 self.assertEqual(cborutil.decodeall(b''.join(chunks)), [source])
510
510
511 def testemptyfromiter(self):
511 def testemptyfromiter(self):
512 self.assertEqual(b''.join(cborutil.streamencodearrayfromiter([])),
512 self.assertEqual(b''.join(cborutil.streamencodearrayfromiter([])),
513 b'\x9f\xff')
513 b'\x9f\xff')
514
514
515 with self.assertRaisesRegex(cborutil.CBORDecodeError,
515 with self.assertRaisesRegex(cborutil.CBORDecodeError,
516 'indefinite length uint not allowed'):
516 'indefinite length uint not allowed'):
517 cborutil.decodeall(b'\x9f\xff')
517 cborutil.decodeall(b'\x9f\xff')
518
518
519 def testfromiter1(self):
519 def testfromiter1(self):
520 source = [b'foo']
520 source = [b'foo']
521
521
522 self.assertEqual(list(cborutil.streamencodearrayfromiter(source)), [
522 self.assertEqual(list(cborutil.streamencodearrayfromiter(source)), [
523 b'\x9f',
523 b'\x9f',
524 b'\x43', b'foo',
524 b'\x43', b'foo',
525 b'\xff',
525 b'\xff',
526 ])
526 ])
527
527
528 dest = b''.join(cborutil.streamencodearrayfromiter(source))
528 dest = b''.join(cborutil.streamencodearrayfromiter(source))
529 self.assertEqual(cbor.loads(dest), source)
529 self.assertEqual(cbor.loads(dest), source)
530
530
531 with self.assertRaisesRegex(cborutil.CBORDecodeError,
531 with self.assertRaisesRegex(cborutil.CBORDecodeError,
532 'indefinite length uint not allowed'):
532 'indefinite length uint not allowed'):
533 cborutil.decodeall(dest)
533 cborutil.decodeall(dest)
534
534
535 def testtuple(self):
535 def testtuple(self):
536 source = (b'foo', None, 42)
536 source = (b'foo', None, 42)
537 encoded = b''.join(cborutil.streamencode(source))
537 encoded = b''.join(cborutil.streamencode(source))
538
538
539 self.assertEqual(cbor.loads(encoded), list(source))
539 self.assertEqual(cbor.loads(encoded), list(source))
540
540
541 self.assertEqual(cborutil.decodeall(encoded), [list(source)])
541 self.assertEqual(cborutil.decodeall(encoded), [list(source)])
542
542
543 def testpartialdecode(self):
543 def testpartialdecode(self):
544 source = list(range(4))
544 source = list(range(4))
545 encoded = b''.join(cborutil.streamencode(source))
545 encoded = b''.join(cborutil.streamencode(source))
546 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
546 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
547 (True, 4, 1, cborutil.SPECIAL_START_ARRAY))
547 (True, 4, 1, cborutil.SPECIAL_START_ARRAY))
548 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
548 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
549 (True, 4, 1, cborutil.SPECIAL_START_ARRAY))
549 (True, 4, 1, cborutil.SPECIAL_START_ARRAY))
550
550
551 source = list(range(23))
551 source = list(range(23))
552 encoded = b''.join(cborutil.streamencode(source))
552 encoded = b''.join(cborutil.streamencode(source))
553 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
553 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
554 (True, 23, 1, cborutil.SPECIAL_START_ARRAY))
554 (True, 23, 1, cborutil.SPECIAL_START_ARRAY))
555 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
555 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
556 (True, 23, 1, cborutil.SPECIAL_START_ARRAY))
556 (True, 23, 1, cborutil.SPECIAL_START_ARRAY))
557
557
558 source = list(range(24))
558 source = list(range(24))
559 encoded = b''.join(cborutil.streamencode(source))
559 encoded = b''.join(cborutil.streamencode(source))
560 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
560 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
561 (False, None, -1, cborutil.SPECIAL_NONE))
561 (False, None, -1, cborutil.SPECIAL_NONE))
562 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
562 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
563 (True, 24, 2, cborutil.SPECIAL_START_ARRAY))
563 (True, 24, 2, cborutil.SPECIAL_START_ARRAY))
564 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
564 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
565 (True, 24, 2, cborutil.SPECIAL_START_ARRAY))
565 (True, 24, 2, cborutil.SPECIAL_START_ARRAY))
566
566
567 source = list(range(256))
567 source = list(range(256))
568 encoded = b''.join(cborutil.streamencode(source))
568 encoded = b''.join(cborutil.streamencode(source))
569 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
569 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
570 (False, None, -2, cborutil.SPECIAL_NONE))
570 (False, None, -2, cborutil.SPECIAL_NONE))
571 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
571 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
572 (False, None, -1, cborutil.SPECIAL_NONE))
572 (False, None, -1, cborutil.SPECIAL_NONE))
573 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
573 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
574 (True, 256, 3, cborutil.SPECIAL_START_ARRAY))
574 (True, 256, 3, cborutil.SPECIAL_START_ARRAY))
575 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
575 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
576 (True, 256, 3, cborutil.SPECIAL_START_ARRAY))
576 (True, 256, 3, cborutil.SPECIAL_START_ARRAY))
577
577
578 def testnested(self):
578 def testnested(self):
579 source = [[], [], [[], [], []]]
579 source = [[], [], [[], [], []]]
580 encoded = b''.join(cborutil.streamencode(source))
580 encoded = b''.join(cborutil.streamencode(source))
581 self.assertEqual(cborutil.decodeall(encoded), [source])
581 self.assertEqual(cborutil.decodeall(encoded), [source])
582
582
583 source = [True, None, [True, 0, 2], [None], [], [[[]], -87]]
583 source = [True, None, [True, 0, 2], [None], [], [[[]], -87]]
584 encoded = b''.join(cborutil.streamencode(source))
584 encoded = b''.join(cborutil.streamencode(source))
585 self.assertEqual(cborutil.decodeall(encoded), [source])
585 self.assertEqual(cborutil.decodeall(encoded), [source])
586
586
587 # A set within an array.
587 # A set within an array.
588 source = [None, {b'foo', b'bar', None, False}, set()]
588 source = [None, {b'foo', b'bar', None, False}, set()]
589 encoded = b''.join(cborutil.streamencode(source))
589 encoded = b''.join(cborutil.streamencode(source))
590 self.assertEqual(cborutil.decodeall(encoded), [source])
590 self.assertEqual(cborutil.decodeall(encoded), [source])
591
591
592 # A map within an array.
592 # A map within an array.
593 source = [None, {}, {b'foo': b'bar', True: False}, [{}]]
593 source = [None, {}, {b'foo': b'bar', True: False}, [{}]]
594 encoded = b''.join(cborutil.streamencode(source))
594 encoded = b''.join(cborutil.streamencode(source))
595 self.assertEqual(cborutil.decodeall(encoded), [source])
595 self.assertEqual(cborutil.decodeall(encoded), [source])
596
596
597 def testindefinitebytestringvalues(self):
597 def testindefinitebytestringvalues(self):
598 # Single value array whose value is an empty indefinite bytestring.
598 # Single value array whose value is an empty indefinite bytestring.
599 encoded = b'\x81\x5f\x40\xff'
599 encoded = b'\x81\x5f\x40\xff'
600
600
601 with self.assertRaisesRegex(cborutil.CBORDecodeError,
601 with self.assertRaisesRegex(cborutil.CBORDecodeError,
602 'indefinite length bytestrings not '
602 'indefinite length bytestrings not '
603 'allowed as array values'):
603 'allowed as array values'):
604 cborutil.decodeall(encoded)
604 cborutil.decodeall(encoded)
605
605
606 class SetTests(TestCase):
606 class SetTests(TestCase):
607 def testempty(self):
607 def testempty(self):
608 self.assertEqual(list(cborutil.streamencode(set())), [
608 self.assertEqual(list(cborutil.streamencode(set())), [
609 b'\xd9\x01\x02',
609 b'\xd9\x01\x02',
610 b'\x80',
610 b'\x80',
611 ])
611 ])
612
612
613 self.assertEqual(cborutil.decodeall(b'\xd9\x01\x02\x80'), [set()])
613 self.assertEqual(cborutil.decodeall(b'\xd9\x01\x02\x80'), [set()])
614
614
615 def testset(self):
615 def testset(self):
616 source = {b'foo', None, 42}
616 source = {b'foo', None, 42}
617 encoded = b''.join(cborutil.streamencode(source))
617 encoded = b''.join(cborutil.streamencode(source))
618
618
619 self.assertEqual(cbor.loads(encoded), source)
619 self.assertEqual(cbor.loads(encoded), source)
620
620
621 self.assertEqual(cborutil.decodeall(encoded), [source])
621 self.assertEqual(cborutil.decodeall(encoded), [source])
622
622
623 def testinvalidtag(self):
623 def testinvalidtag(self):
624 # Must use array to encode sets.
624 # Must use array to encode sets.
625 encoded = b'\xd9\x01\x02\xa0'
625 encoded = b'\xd9\x01\x02\xa0'
626
626
627 with self.assertRaisesRegex(cborutil.CBORDecodeError,
627 with self.assertRaisesRegex(cborutil.CBORDecodeError,
628 'expected array after finite set '
628 'expected array after finite set '
629 'semantic tag'):
629 'semantic tag'):
630 cborutil.decodeall(encoded)
630 cborutil.decodeall(encoded)
631
631
632 def testpartialdecode(self):
632 def testpartialdecode(self):
633 # Semantic tag item will be 3 bytes. Set header will be variable
633 # Semantic tag item will be 3 bytes. Set header will be variable
634 # depending on length.
634 # depending on length.
635 encoded = b''.join(cborutil.streamencode({i for i in range(23)}))
635 encoded = b''.join(cborutil.streamencode({i for i in range(23)}))
636 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
636 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
637 (False, None, -2, cborutil.SPECIAL_NONE))
637 (False, None, -2, cborutil.SPECIAL_NONE))
638 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
638 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
639 (False, None, -1, cborutil.SPECIAL_NONE))
639 (False, None, -1, cborutil.SPECIAL_NONE))
640 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
640 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
641 (False, None, -1, cborutil.SPECIAL_NONE))
641 (False, None, -1, cborutil.SPECIAL_NONE))
642 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
642 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
643 (True, 23, 4, cborutil.SPECIAL_START_SET))
643 (True, 23, 4, cborutil.SPECIAL_START_SET))
644 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
644 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
645 (True, 23, 4, cborutil.SPECIAL_START_SET))
645 (True, 23, 4, cborutil.SPECIAL_START_SET))
646
646
647 encoded = b''.join(cborutil.streamencode({i for i in range(24)}))
647 encoded = b''.join(cborutil.streamencode({i for i in range(24)}))
648 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
648 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
649 (False, None, -2, cborutil.SPECIAL_NONE))
649 (False, None, -2, cborutil.SPECIAL_NONE))
650 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
650 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
651 (False, None, -1, cborutil.SPECIAL_NONE))
651 (False, None, -1, cborutil.SPECIAL_NONE))
652 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
652 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
653 (False, None, -1, cborutil.SPECIAL_NONE))
653 (False, None, -1, cborutil.SPECIAL_NONE))
654 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
654 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
655 (False, None, -1, cborutil.SPECIAL_NONE))
655 (False, None, -1, cborutil.SPECIAL_NONE))
656 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
656 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
657 (True, 24, 5, cborutil.SPECIAL_START_SET))
657 (True, 24, 5, cborutil.SPECIAL_START_SET))
658 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
658 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
659 (True, 24, 5, cborutil.SPECIAL_START_SET))
659 (True, 24, 5, cborutil.SPECIAL_START_SET))
660
660
661 encoded = b''.join(cborutil.streamencode({i for i in range(256)}))
661 encoded = b''.join(cborutil.streamencode({i for i in range(256)}))
662 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
662 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
663 (False, None, -2, cborutil.SPECIAL_NONE))
663 (False, None, -2, cborutil.SPECIAL_NONE))
664 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
664 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
665 (False, None, -1, cborutil.SPECIAL_NONE))
665 (False, None, -1, cborutil.SPECIAL_NONE))
666 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
666 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
667 (False, None, -1, cborutil.SPECIAL_NONE))
667 (False, None, -1, cborutil.SPECIAL_NONE))
668 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
668 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
669 (False, None, -2, cborutil.SPECIAL_NONE))
669 (False, None, -2, cborutil.SPECIAL_NONE))
670 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
670 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
671 (False, None, -1, cborutil.SPECIAL_NONE))
671 (False, None, -1, cborutil.SPECIAL_NONE))
672 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
672 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
673 (True, 256, 6, cborutil.SPECIAL_START_SET))
673 (True, 256, 6, cborutil.SPECIAL_START_SET))
674
674
675 def testinvalidvalue(self):
675 def testinvalidvalue(self):
676 encoded = b''.join([
676 encoded = b''.join([
677 b'\xd9\x01\x02', # semantic tag
677 b'\xd9\x01\x02', # semantic tag
678 b'\x81', # array of size 1
678 b'\x81', # array of size 1
679 b'\x5f\x43foo\xff', # indefinite length bytestring "foo"
679 b'\x5f\x43foo\xff', # indefinite length bytestring "foo"
680 ])
680 ])
681
681
682 with self.assertRaisesRegex(cborutil.CBORDecodeError,
682 with self.assertRaisesRegex(cborutil.CBORDecodeError,
683 'indefinite length bytestrings not '
683 'indefinite length bytestrings not '
684 'allowed as set values'):
684 'allowed as set values'):
685 cborutil.decodeall(encoded)
685 cborutil.decodeall(encoded)
686
686
687 encoded = b''.join([
687 encoded = b''.join([
688 b'\xd9\x01\x02',
688 b'\xd9\x01\x02',
689 b'\x81',
689 b'\x81',
690 b'\x80', # empty array
690 b'\x80', # empty array
691 ])
691 ])
692
692
693 with self.assertRaisesRegex(cborutil.CBORDecodeError,
693 with self.assertRaisesRegex(cborutil.CBORDecodeError,
694 'collections not allowed as set values'):
694 'collections not allowed as set values'):
695 cborutil.decodeall(encoded)
695 cborutil.decodeall(encoded)
696
696
697 encoded = b''.join([
697 encoded = b''.join([
698 b'\xd9\x01\x02',
698 b'\xd9\x01\x02',
699 b'\x81',
699 b'\x81',
700 b'\xa0', # empty map
700 b'\xa0', # empty map
701 ])
701 ])
702
702
703 with self.assertRaisesRegex(cborutil.CBORDecodeError,
703 with self.assertRaisesRegex(cborutil.CBORDecodeError,
704 'collections not allowed as set values'):
704 'collections not allowed as set values'):
705 cborutil.decodeall(encoded)
705 cborutil.decodeall(encoded)
706
706
707 encoded = b''.join([
707 encoded = b''.join([
708 b'\xd9\x01\x02',
708 b'\xd9\x01\x02',
709 b'\x81',
709 b'\x81',
710 b'\xd9\x01\x02\x81\x01', # set with integer 1
710 b'\xd9\x01\x02\x81\x01', # set with integer 1
711 ])
711 ])
712
712
713 with self.assertRaisesRegex(cborutil.CBORDecodeError,
713 with self.assertRaisesRegex(cborutil.CBORDecodeError,
714 'collections not allowed as set values'):
714 'collections not allowed as set values'):
715 cborutil.decodeall(encoded)
715 cborutil.decodeall(encoded)
716
716
717 class BoolTests(TestCase):
717 class BoolTests(TestCase):
718 def testbasic(self):
718 def testbasic(self):
719 self.assertEqual(list(cborutil.streamencode(True)), [b'\xf5'])
719 self.assertEqual(list(cborutil.streamencode(True)), [b'\xf5'])
720 self.assertEqual(list(cborutil.streamencode(False)), [b'\xf4'])
720 self.assertEqual(list(cborutil.streamencode(False)), [b'\xf4'])
721
721
722 self.assertIs(loadit(cborutil.streamencode(True)), True)
722 self.assertIs(loadit(cborutil.streamencode(True)), True)
723 self.assertIs(loadit(cborutil.streamencode(False)), False)
723 self.assertIs(loadit(cborutil.streamencode(False)), False)
724
724
725 self.assertEqual(cborutil.decodeall(b'\xf4'), [False])
725 self.assertEqual(cborutil.decodeall(b'\xf4'), [False])
726 self.assertEqual(cborutil.decodeall(b'\xf5'), [True])
726 self.assertEqual(cborutil.decodeall(b'\xf5'), [True])
727
727
728 self.assertEqual(cborutil.decodeall(b'\xf4\xf5\xf5\xf4'),
728 self.assertEqual(cborutil.decodeall(b'\xf4\xf5\xf5\xf4'),
729 [False, True, True, False])
729 [False, True, True, False])
730
730
731 class NoneTests(TestCase):
731 class NoneTests(TestCase):
732 def testbasic(self):
732 def testbasic(self):
733 self.assertEqual(list(cborutil.streamencode(None)), [b'\xf6'])
733 self.assertEqual(list(cborutil.streamencode(None)), [b'\xf6'])
734
734
735 self.assertIs(loadit(cborutil.streamencode(None)), None)
735 self.assertIs(loadit(cborutil.streamencode(None)), None)
736
736
737 self.assertEqual(cborutil.decodeall(b'\xf6'), [None])
737 self.assertEqual(cborutil.decodeall(b'\xf6'), [None])
738 self.assertEqual(cborutil.decodeall(b'\xf6\xf6'), [None, None])
738 self.assertEqual(cborutil.decodeall(b'\xf6\xf6'), [None, None])
739
739
740 class MapTests(TestCase):
740 class MapTests(TestCase):
741 def testempty(self):
741 def testempty(self):
742 self.assertEqual(list(cborutil.streamencode({})), [b'\xa0'])
742 self.assertEqual(list(cborutil.streamencode({})), [b'\xa0'])
743 self.assertEqual(loadit(cborutil.streamencode({})), {})
743 self.assertEqual(loadit(cborutil.streamencode({})), {})
744
744
745 self.assertEqual(cborutil.decodeall(b'\xa0'), [{}])
745 self.assertEqual(cborutil.decodeall(b'\xa0'), [{}])
746
746
747 def testemptyindefinite(self):
747 def testemptyindefinite(self):
748 self.assertEqual(list(cborutil.streamencodemapfromiter([])), [
748 self.assertEqual(list(cborutil.streamencodemapfromiter([])), [
749 b'\xbf', b'\xff'])
749 b'\xbf', b'\xff'])
750
750
751 self.assertEqual(loadit(cborutil.streamencodemapfromiter([])), {})
751 self.assertEqual(loadit(cborutil.streamencodemapfromiter([])), {})
752
752
753 with self.assertRaisesRegex(cborutil.CBORDecodeError,
753 with self.assertRaisesRegex(cborutil.CBORDecodeError,
754 'indefinite length uint not allowed'):
754 'indefinite length uint not allowed'):
755 cborutil.decodeall(b'\xbf\xff')
755 cborutil.decodeall(b'\xbf\xff')
756
756
757 def testone(self):
757 def testone(self):
758 source = {b'foo': b'bar'}
758 source = {b'foo': b'bar'}
759 self.assertEqual(list(cborutil.streamencode(source)), [
759 self.assertEqual(list(cborutil.streamencode(source)), [
760 b'\xa1', b'\x43', b'foo', b'\x43', b'bar'])
760 b'\xa1', b'\x43', b'foo', b'\x43', b'bar'])
761
761
762 self.assertEqual(loadit(cborutil.streamencode(source)), source)
762 self.assertEqual(loadit(cborutil.streamencode(source)), source)
763
763
764 self.assertEqual(cborutil.decodeall(b'\xa1\x43foo\x43bar'), [source])
764 self.assertEqual(cborutil.decodeall(b'\xa1\x43foo\x43bar'), [source])
765
765
766 def testmultiple(self):
766 def testmultiple(self):
767 source = {
767 source = {
768 b'foo': b'bar',
768 b'foo': b'bar',
769 b'baz': b'value1',
769 b'baz': b'value1',
770 }
770 }
771
771
772 self.assertEqual(loadit(cborutil.streamencode(source)), source)
772 self.assertEqual(loadit(cborutil.streamencode(source)), source)
773
773
774 self.assertEqual(
774 self.assertEqual(
775 loadit(cborutil.streamencodemapfromiter(source.items())),
775 loadit(cborutil.streamencodemapfromiter(source.items())),
776 source)
776 source)
777
777
778 encoded = b''.join(cborutil.streamencode(source))
778 encoded = b''.join(cborutil.streamencode(source))
779 self.assertEqual(cborutil.decodeall(encoded), [source])
779 self.assertEqual(cborutil.decodeall(encoded), [source])
780
780
781 def testcomplex(self):
781 def testcomplex(self):
782 source = {
782 source = {
783 b'key': 1,
783 b'key': 1,
784 2: -10,
784 2: -10,
785 }
785 }
786
786
787 self.assertEqual(loadit(cborutil.streamencode(source)),
787 self.assertEqual(loadit(cborutil.streamencode(source)),
788 source)
788 source)
789
789
790 self.assertEqual(
790 self.assertEqual(
791 loadit(cborutil.streamencodemapfromiter(source.items())),
791 loadit(cborutil.streamencodemapfromiter(source.items())),
792 source)
792 source)
793
793
794 encoded = b''.join(cborutil.streamencode(source))
794 encoded = b''.join(cborutil.streamencode(source))
795 self.assertEqual(cborutil.decodeall(encoded), [source])
795 self.assertEqual(cborutil.decodeall(encoded), [source])
796
796
797 def testnested(self):
797 def testnested(self):
798 source = {b'key1': None, b'key2': {b'sub1': b'sub2'}, b'sub2': {}}
798 source = {b'key1': None, b'key2': {b'sub1': b'sub2'}, b'sub2': {}}
799 encoded = b''.join(cborutil.streamencode(source))
799 encoded = b''.join(cborutil.streamencode(source))
800
800
801 self.assertEqual(cborutil.decodeall(encoded), [source])
801 self.assertEqual(cborutil.decodeall(encoded), [source])
802
802
803 source = {
803 source = {
804 b'key1': [],
804 b'key1': [],
805 b'key2': [None, False],
805 b'key2': [None, False],
806 b'key3': {b'foo', b'bar'},
806 b'key3': {b'foo', b'bar'},
807 b'key4': {},
807 b'key4': {},
808 }
808 }
809 encoded = b''.join(cborutil.streamencode(source))
809 encoded = b''.join(cborutil.streamencode(source))
810 self.assertEqual(cborutil.decodeall(encoded), [source])
810 self.assertEqual(cborutil.decodeall(encoded), [source])
811
811
812 def testillegalkey(self):
812 def testillegalkey(self):
813 encoded = b''.join([
813 encoded = b''.join([
814 # map header + len 1
814 # map header + len 1
815 b'\xa1',
815 b'\xa1',
816 # indefinite length bytestring "foo" in key position
816 # indefinite length bytestring "foo" in key position
817 b'\x5f\x03foo\xff'
817 b'\x5f\x03foo\xff'
818 ])
818 ])
819
819
820 with self.assertRaisesRegex(cborutil.CBORDecodeError,
820 with self.assertRaisesRegex(cborutil.CBORDecodeError,
821 'indefinite length bytestrings not '
821 'indefinite length bytestrings not '
822 'allowed as map keys'):
822 'allowed as map keys'):
823 cborutil.decodeall(encoded)
823 cborutil.decodeall(encoded)
824
824
825 encoded = b''.join([
825 encoded = b''.join([
826 b'\xa1',
826 b'\xa1',
827 b'\x80', # empty array
827 b'\x80', # empty array
828 b'\x43foo',
828 b'\x43foo',
829 ])
829 ])
830
830
831 with self.assertRaisesRegex(cborutil.CBORDecodeError,
831 with self.assertRaisesRegex(cborutil.CBORDecodeError,
832 'collections not supported as map keys'):
832 'collections not supported as map keys'):
833 cborutil.decodeall(encoded)
833 cborutil.decodeall(encoded)
834
834
835 def testillegalvalue(self):
835 def testillegalvalue(self):
836 encoded = b''.join([
836 encoded = b''.join([
837 b'\xa1', # map headers
837 b'\xa1', # map headers
838 b'\x43foo', # key
838 b'\x43foo', # key
839 b'\x5f\x03bar\xff', # indefinite length value
839 b'\x5f\x03bar\xff', # indefinite length value
840 ])
840 ])
841
841
842 with self.assertRaisesRegex(cborutil.CBORDecodeError,
842 with self.assertRaisesRegex(cborutil.CBORDecodeError,
843 'indefinite length bytestrings not '
843 'indefinite length bytestrings not '
844 'allowed as map values'):
844 'allowed as map values'):
845 cborutil.decodeall(encoded)
845 cborutil.decodeall(encoded)
846
846
847 def testpartialdecode(self):
847 def testpartialdecode(self):
848 source = {b'key1': b'value1'}
848 source = {b'key1': b'value1'}
849 encoded = b''.join(cborutil.streamencode(source))
849 encoded = b''.join(cborutil.streamencode(source))
850
850
851 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
851 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
852 (True, 1, 1, cborutil.SPECIAL_START_MAP))
852 (True, 1, 1, cborutil.SPECIAL_START_MAP))
853 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
853 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
854 (True, 1, 1, cborutil.SPECIAL_START_MAP))
854 (True, 1, 1, cborutil.SPECIAL_START_MAP))
855
855
856 source = {b'key%d' % i: None for i in range(23)}
856 source = {b'key%d' % i: None for i in range(23)}
857 encoded = b''.join(cborutil.streamencode(source))
857 encoded = b''.join(cborutil.streamencode(source))
858 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
858 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
859 (True, 23, 1, cborutil.SPECIAL_START_MAP))
859 (True, 23, 1, cborutil.SPECIAL_START_MAP))
860
860
861 source = {b'key%d' % i: None for i in range(24)}
861 source = {b'key%d' % i: None for i in range(24)}
862 encoded = b''.join(cborutil.streamencode(source))
862 encoded = b''.join(cborutil.streamencode(source))
863 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
863 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
864 (False, None, -1, cborutil.SPECIAL_NONE))
864 (False, None, -1, cborutil.SPECIAL_NONE))
865 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
865 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
866 (True, 24, 2, cborutil.SPECIAL_START_MAP))
866 (True, 24, 2, cborutil.SPECIAL_START_MAP))
867 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
867 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
868 (True, 24, 2, cborutil.SPECIAL_START_MAP))
868 (True, 24, 2, cborutil.SPECIAL_START_MAP))
869
869
870 source = {b'key%d' % i: None for i in range(256)}
870 source = {b'key%d' % i: None for i in range(256)}
871 encoded = b''.join(cborutil.streamencode(source))
871 encoded = b''.join(cborutil.streamencode(source))
872 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
872 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
873 (False, None, -2, cborutil.SPECIAL_NONE))
873 (False, None, -2, cborutil.SPECIAL_NONE))
874 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
874 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
875 (False, None, -1, cborutil.SPECIAL_NONE))
875 (False, None, -1, cborutil.SPECIAL_NONE))
876 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
876 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
877 (True, 256, 3, cborutil.SPECIAL_START_MAP))
877 (True, 256, 3, cborutil.SPECIAL_START_MAP))
878 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
878 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
879 (True, 256, 3, cborutil.SPECIAL_START_MAP))
879 (True, 256, 3, cborutil.SPECIAL_START_MAP))
880
880
881 source = {b'key%d' % i: None for i in range(65536)}
881 source = {b'key%d' % i: None for i in range(65536)}
882 encoded = b''.join(cborutil.streamencode(source))
882 encoded = b''.join(cborutil.streamencode(source))
883 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
883 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
884 (False, None, -4, cborutil.SPECIAL_NONE))
884 (False, None, -4, cborutil.SPECIAL_NONE))
885 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
885 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
886 (False, None, -3, cborutil.SPECIAL_NONE))
886 (False, None, -3, cborutil.SPECIAL_NONE))
887 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
887 self.assertEqual(cborutil.decodeitem(encoded[0:3]),
888 (False, None, -2, cborutil.SPECIAL_NONE))
888 (False, None, -2, cborutil.SPECIAL_NONE))
889 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
889 self.assertEqual(cborutil.decodeitem(encoded[0:4]),
890 (False, None, -1, cborutil.SPECIAL_NONE))
890 (False, None, -1, cborutil.SPECIAL_NONE))
891 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
891 self.assertEqual(cborutil.decodeitem(encoded[0:5]),
892 (True, 65536, 5, cborutil.SPECIAL_START_MAP))
892 (True, 65536, 5, cborutil.SPECIAL_START_MAP))
893 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
893 self.assertEqual(cborutil.decodeitem(encoded[0:6]),
894 (True, 65536, 5, cborutil.SPECIAL_START_MAP))
894 (True, 65536, 5, cborutil.SPECIAL_START_MAP))
895
895
896 class SemanticTagTests(TestCase):
896 class SemanticTagTests(TestCase):
897 def testdecodeforbidden(self):
897 def testdecodeforbidden(self):
898 for i in range(500):
898 for i in range(500):
899 if i == cborutil.SEMANTIC_TAG_FINITE_SET:
899 if i == cborutil.SEMANTIC_TAG_FINITE_SET:
900 continue
900 continue
901
901
902 tag = cborutil.encodelength(cborutil.MAJOR_TYPE_SEMANTIC,
902 tag = cborutil.encodelength(cborutil.MAJOR_TYPE_SEMANTIC,
903 i)
903 i)
904
904
905 encoded = tag + cborutil.encodelength(cborutil.MAJOR_TYPE_UINT, 42)
905 encoded = tag + cborutil.encodelength(cborutil.MAJOR_TYPE_UINT, 42)
906
906
907 # Partial decode is incomplete.
907 # Partial decode is incomplete.
908 if i < 24:
908 if i < 24:
909 pass
909 pass
910 elif i < 256:
910 elif i < 256:
911 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
911 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
912 (False, None, -1, cborutil.SPECIAL_NONE))
912 (False, None, -1, cborutil.SPECIAL_NONE))
913 elif i < 65536:
913 elif i < 65536:
914 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
914 self.assertEqual(cborutil.decodeitem(encoded[0:1]),
915 (False, None, -2, cborutil.SPECIAL_NONE))
915 (False, None, -2, cborutil.SPECIAL_NONE))
916 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
916 self.assertEqual(cborutil.decodeitem(encoded[0:2]),
917 (False, None, -1, cborutil.SPECIAL_NONE))
917 (False, None, -1, cborutil.SPECIAL_NONE))
918
918
919 with self.assertRaisesRegex(cborutil.CBORDecodeError,
919 with self.assertRaisesRegex(cborutil.CBORDecodeError,
920 'semantic tag \d+ not allowed'):
920 'semantic tag \d+ not allowed'):
921 cborutil.decodeitem(encoded)
921 cborutil.decodeitem(encoded)
922
922
923 class SpecialTypesTests(TestCase):
923 class SpecialTypesTests(TestCase):
924 def testforbiddentypes(self):
924 def testforbiddentypes(self):
925 for i in range(256):
925 for i in range(256):
926 if i == cborutil.SUBTYPE_FALSE:
926 if i == cborutil.SUBTYPE_FALSE:
927 continue
927 continue
928 elif i == cborutil.SUBTYPE_TRUE:
928 elif i == cborutil.SUBTYPE_TRUE:
929 continue
929 continue
930 elif i == cborutil.SUBTYPE_NULL:
930 elif i == cborutil.SUBTYPE_NULL:
931 continue
931 continue
932
932
933 encoded = cborutil.encodelength(cborutil.MAJOR_TYPE_SPECIAL, i)
933 encoded = cborutil.encodelength(cborutil.MAJOR_TYPE_SPECIAL, i)
934
934
935 with self.assertRaisesRegex(cborutil.CBORDecodeError,
935 with self.assertRaisesRegex(cborutil.CBORDecodeError,
936 'special type \d+ not allowed'):
936 'special type \d+ not allowed'):
937 cborutil.decodeitem(encoded)
937 cborutil.decodeitem(encoded)
938
938
939 class SansIODecoderTests(TestCase):
939 class SansIODecoderTests(TestCase):
940 def testemptyinput(self):
940 def testemptyinput(self):
941 decoder = cborutil.sansiodecoder()
941 decoder = cborutil.sansiodecoder()
942 self.assertEqual(decoder.decode(b''), (False, 0, 0))
942 self.assertEqual(decoder.decode(b''), (False, 0, 0))
943
943
944 class BufferingDecoderTests(TestCase):
944 class BufferingDecoderTests(TestCase):
945 def testsimple(self):
945 def testsimple(self):
946 source = [
946 source = [
947 b'foobar',
947 b'foobar',
948 b'x' * 128,
948 b'x' * 128,
949 {b'foo': b'bar'},
949 {b'foo': b'bar'},
950 True,
950 True,
951 False,
951 False,
952 None,
952 None,
953 [None for i in range(128)],
953 [None for i in range(128)],
954 ]
954 ]
955
955
956 encoded = b''.join(cborutil.streamencode(source))
956 encoded = b''.join(cborutil.streamencode(source))
957
957
958 for step in range(1, 32):
958 for step in range(1, 32):
959 decoder = cborutil.bufferingdecoder()
959 decoder = cborutil.bufferingdecoder()
960 start = 0
960 start = 0
961
961
962 while start < len(encoded):
962 while start < len(encoded):
963 decoder.decode(encoded[start:start + step])
963 decoder.decode(encoded[start:start + step])
964 start += step
964 start += step
965
965
966 self.assertEqual(decoder.getavailable(), [source])
966 self.assertEqual(decoder.getavailable(), [source])
967
967
968 def testbytearray(self):
969 source = b''.join(cborutil.streamencode(b'foobar'))
970
971 decoder = cborutil.bufferingdecoder()
972 decoder.decode(bytearray(source))
973
974 self.assertEqual(decoder.getavailable(), [b'foobar'])
975
968 class DecodeallTests(TestCase):
976 class DecodeallTests(TestCase):
969 def testemptyinput(self):
977 def testemptyinput(self):
970 self.assertEqual(cborutil.decodeall(b''), [])
978 self.assertEqual(cborutil.decodeall(b''), [])
971
979
972 def testpartialinput(self):
980 def testpartialinput(self):
973 encoded = b''.join([
981 encoded = b''.join([
974 b'\x82', # array of 2 elements
982 b'\x82', # array of 2 elements
975 b'\x01', # integer 1
983 b'\x01', # integer 1
976 ])
984 ])
977
985
978 with self.assertRaisesRegex(cborutil.CBORDecodeError,
986 with self.assertRaisesRegex(cborutil.CBORDecodeError,
979 'input data not complete'):
987 'input data not complete'):
980 cborutil.decodeall(encoded)
988 cborutil.decodeall(encoded)
981
989
982 if __name__ == '__main__':
990 if __name__ == '__main__':
983 import silenttestrunner
991 import silenttestrunner
984 silenttestrunner.main(__name__)
992 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now