upstream/mercurial-mirror Commit - r40066:62160d30

1

# cborutil.py - CBOR extensions

1

# cborutil.py - CBOR extensions

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import struct

10

import struct

11

import sys

11

import sys

12

13

from .. import pycompat

13

from .. import pycompat

14

15

# Very short very of RFC 7049...

15

# Very short very of RFC 7049...

16

#

16

#

17

# Each item begins with a byte. The 3 high bits of that byte denote the

17

# Each item begins with a byte. The 3 high bits of that byte denote the

18

# "major type." The lower 5 bits denote the "subtype." Each major type

18

# "major type." The lower 5 bits denote the "subtype." Each major type

19

# has its own encoding mechanism.

19

# has its own encoding mechanism.

20

#

20

#

21

# Most types have lengths. However, bytestring, string, array, and map

21

# Most types have lengths. However, bytestring, string, array, and map

22

# can be indefinite length. These are denotes by a subtype with value 31.

22

# can be indefinite length. These are denotes by a subtype with value 31.

23

# Sub-components of those types then come afterwards and are terminated

23

# Sub-components of those types then come afterwards and are terminated

24

# by a "break" byte.

24

# by a "break" byte.

25

26

MAJOR_TYPE_UINT = 0

26

MAJOR_TYPE_UINT = 0

27

MAJOR_TYPE_NEGINT = 1

27

MAJOR_TYPE_NEGINT = 1

28

MAJOR_TYPE_BYTESTRING = 2

28

MAJOR_TYPE_BYTESTRING = 2

29

MAJOR_TYPE_STRING = 3

29

MAJOR_TYPE_STRING = 3

30

MAJOR_TYPE_ARRAY = 4

30

MAJOR_TYPE_ARRAY = 4

31

MAJOR_TYPE_MAP = 5

31

MAJOR_TYPE_MAP = 5

32

MAJOR_TYPE_SEMANTIC = 6

32

MAJOR_TYPE_SEMANTIC = 6

33

MAJOR_TYPE_SPECIAL = 7

33

MAJOR_TYPE_SPECIAL = 7

34

35

SUBTYPE_MASK = 0b00011111

35

SUBTYPE_MASK = 0b00011111

36

37

SUBTYPE_FALSE = 20

37

SUBTYPE_FALSE = 20

38

SUBTYPE_TRUE = 21

38

SUBTYPE_TRUE = 21

39

SUBTYPE_NULL = 22

39

SUBTYPE_NULL = 22

40

SUBTYPE_HALF_FLOAT = 25

40

SUBTYPE_HALF_FLOAT = 25

41

SUBTYPE_SINGLE_FLOAT = 26

41

SUBTYPE_SINGLE_FLOAT = 26

42

SUBTYPE_DOUBLE_FLOAT = 27

42

SUBTYPE_DOUBLE_FLOAT = 27

43

SUBTYPE_INDEFINITE = 31

43

SUBTYPE_INDEFINITE = 31

44

45

SEMANTIC_TAG_FINITE_SET = 258

45

SEMANTIC_TAG_FINITE_SET = 258

46

47

# Indefinite types begin with their major type ORd with information value 31.

47

# Indefinite types begin with their major type ORd with information value 31.

48

BEGIN_INDEFINITE_BYTESTRING = struct.pack(

48

BEGIN_INDEFINITE_BYTESTRING = struct.pack(

49

r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)

49

r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)

50

BEGIN_INDEFINITE_ARRAY = struct.pack(

50

BEGIN_INDEFINITE_ARRAY = struct.pack(

51

r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)

51

r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)

52

BEGIN_INDEFINITE_MAP = struct.pack(

52

BEGIN_INDEFINITE_MAP = struct.pack(

53

r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)

53

r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)

54

55

ENCODED_LENGTH_1 = struct.Struct(r'>B')

55

ENCODED_LENGTH_1 = struct.Struct(r'>B')

56

ENCODED_LENGTH_2 = struct.Struct(r'>BB')

56

ENCODED_LENGTH_2 = struct.Struct(r'>BB')

57

ENCODED_LENGTH_3 = struct.Struct(r'>BH')

57

ENCODED_LENGTH_3 = struct.Struct(r'>BH')

58

ENCODED_LENGTH_4 = struct.Struct(r'>BL')

58

ENCODED_LENGTH_4 = struct.Struct(r'>BL')

59

ENCODED_LENGTH_5 = struct.Struct(r'>BQ')

59

ENCODED_LENGTH_5 = struct.Struct(r'>BQ')

60

61

# The break ends an indefinite length item.

61

# The break ends an indefinite length item.

62

BREAK = b'\xff'

62

BREAK = b'\xff'

63

BREAK_INT = 255

63

BREAK_INT = 255

64

65

def encodelength(majortype, length):

65

def encodelength(majortype, length):

66

"""Obtain a value encoding the major type and its length."""

66

"""Obtain a value encoding the major type and its length."""

67

if length < 24:

67

if length < 24:

68

return ENCODED_LENGTH_1.pack(majortype << 5 | length)

68

return ENCODED_LENGTH_1.pack(majortype << 5 | length)

69

elif length < 256:

69

elif length < 256:

70

return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)

70

return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)

71

elif length < 65536:

71

elif length < 65536:

72

return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)

72

return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)

73

elif length < 4294967296:

73

elif length < 4294967296:

74

return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)

74

return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)

75

else:

75

else:

76

return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)

76

return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)

77

78

def streamencodebytestring(v):

78

def streamencodebytestring(v):

79

yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))

79

yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))

80

yield v

80

yield v

81

82

def streamencodebytestringfromiter(it):

82

def streamencodebytestringfromiter(it):

83

"""Convert an iterator of chunks to an indefinite bytestring.

83

"""Convert an iterator of chunks to an indefinite bytestring.

84

85

Given an input that is iterable and each element in the iterator is

85

Given an input that is iterable and each element in the iterator is

86

representable as bytes, emit an indefinite length bytestring.

86

representable as bytes, emit an indefinite length bytestring.

87

"""

87

"""

88

yield BEGIN_INDEFINITE_BYTESTRING

88

yield BEGIN_INDEFINITE_BYTESTRING

89

90

for chunk in it:

90

for chunk in it:

91

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

91

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

92

yield chunk

92

yield chunk

93

94

yield BREAK

94

yield BREAK

95

96

def streamencodeindefinitebytestring(source, chunksize=65536):

96

def streamencodeindefinitebytestring(source, chunksize=65536):

97

"""Given a large source buffer, emit as an indefinite length bytestring.

97

"""Given a large source buffer, emit as an indefinite length bytestring.

98

99

This is a generator of chunks constituting the encoded CBOR data.

99

This is a generator of chunks constituting the encoded CBOR data.

100

"""

100

"""

101

yield BEGIN_INDEFINITE_BYTESTRING

101

yield BEGIN_INDEFINITE_BYTESTRING

102

103

i = 0

103

i = 0

104

l = len(source)

104

l = len(source)

105

106

while True:

106

while True:

107

chunk = source[i:i + chunksize]

107

chunk = source[i:i + chunksize]

108

i += len(chunk)

108

i += len(chunk)

109

110

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

110

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

111

yield chunk

111

yield chunk

112

113

if i >= l:

113

if i >= l:

114

break

114

break

115

116

yield BREAK

116

yield BREAK

117

118

def streamencodeint(v):

118

def streamencodeint(v):

119

if v >= 18446744073709551616 or v < -18446744073709551616:

119

if v >= 18446744073709551616 or v < -18446744073709551616:

120

raise ValueError('big integers not supported')

120

raise ValueError('big integers not supported')

121

122

if v >= 0:

122

if v >= 0:

123

yield encodelength(MAJOR_TYPE_UINT, v)

123

yield encodelength(MAJOR_TYPE_UINT, v)

124

else:

124

else:

125

yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)

125

yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)

126

127

def streamencodearray(l):

127

def streamencodearray(l):

128

"""Encode a known size iterable to an array."""

128

"""Encode a known size iterable to an array."""

129

130

yield encodelength(MAJOR_TYPE_ARRAY, len(l))

130

yield encodelength(MAJOR_TYPE_ARRAY, len(l))

131

132

for i in l:

132

for i in l:

133

for chunk in streamencode(i):

133

for chunk in streamencode(i):

134

yield chunk

134

yield chunk

135

136

def streamencodearrayfromiter(it):

136

def streamencodearrayfromiter(it):

137

"""Encode an iterator of items to an indefinite length array."""

137

"""Encode an iterator of items to an indefinite length array."""

138

139

yield BEGIN_INDEFINITE_ARRAY

139

yield BEGIN_INDEFINITE_ARRAY

140

141

for i in it:

141

for i in it:

142

for chunk in streamencode(i):

142

for chunk in streamencode(i):

143

yield chunk

143

yield chunk

144

145

yield BREAK

145

yield BREAK

146

147

def _mixedtypesortkey(v):

147

def _mixedtypesortkey(v):

148

return type(v).__name__, v

148

return type(v).__name__, v

149

150

def streamencodeset(s):

150

def streamencodeset(s):

151

# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines

151

# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines

152

# semantic tag 258 for finite sets.

152

# semantic tag 258 for finite sets.

153

yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)

153

yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)

154

155

for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):

155

for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):

156

yield chunk

156

yield chunk

157

158

def streamencodemap(d):

158

def streamencodemap(d):

159

"""Encode dictionary to a generator.

159

"""Encode dictionary to a generator.

160

161

Does not supporting indefinite length dictionaries.

161

Does not supporting indefinite length dictionaries.

162

"""

162

"""

163

yield encodelength(MAJOR_TYPE_MAP, len(d))

163

yield encodelength(MAJOR_TYPE_MAP, len(d))

164

165

for key, value in sorted(d.iteritems(),

165

for key, value in sorted(d.iteritems(),

166

key=lambda x: _mixedtypesortkey(x[0])):

166

key=lambda x: _mixedtypesortkey(x[0])):

167

for chunk in streamencode(key):

167

for chunk in streamencode(key):

168

yield chunk

168

yield chunk

169

for chunk in streamencode(value):

169

for chunk in streamencode(value):

170

yield chunk

170

yield chunk

171

172

def streamencodemapfromiter(it):

172

def streamencodemapfromiter(it):

173

"""Given an iterable of (key, value), encode to an indefinite length map."""

173

"""Given an iterable of (key, value), encode to an indefinite length map."""

174

yield BEGIN_INDEFINITE_MAP

174

yield BEGIN_INDEFINITE_MAP

175

176

for key, value in it:

176

for key, value in it:

177

for chunk in streamencode(key):

177

for chunk in streamencode(key):

178

yield chunk

178

yield chunk

179

for chunk in streamencode(value):

179

for chunk in streamencode(value):

180

yield chunk

180

yield chunk

181

182

yield BREAK

182

yield BREAK

183

184

def streamencodebool(b):

184

def streamencodebool(b):

185

# major type 7, simple value 20 and 21.

185

# major type 7, simple value 20 and 21.

186

yield b'\xf5' if b else b'\xf4'

186

yield b'\xf5' if b else b'\xf4'

187

188

def streamencodenone(v):

188

def streamencodenone(v):

189

# major type 7, simple value 22.

189

# major type 7, simple value 22.

190

yield b'\xf6'

190

yield b'\xf6'

191

192

STREAM_ENCODERS = {

192

STREAM_ENCODERS = {

193

bytes: streamencodebytestring,

193

bytes: streamencodebytestring,

194

int: streamencodeint,

194

int: streamencodeint,

195

pycompat.long: streamencodeint,

195

pycompat.long: streamencodeint,

196

list: streamencodearray,

196

list: streamencodearray,

197

tuple: streamencodearray,

197

tuple: streamencodearray,

198

dict: streamencodemap,

198

dict: streamencodemap,

199

set: streamencodeset,

199

set: streamencodeset,

200

bool: streamencodebool,

200

bool: streamencodebool,

201

type(None): streamencodenone,

201

type(None): streamencodenone,

202

}

202

}

203

204

def streamencode(v):

204

def streamencode(v):

205

"""Encode a value in a streaming manner.

205

"""Encode a value in a streaming manner.

206

207

Given an input object, encode it to CBOR recursively.

207

Given an input object, encode it to CBOR recursively.

208

209

Returns a generator of CBOR encoded bytes. There is no guarantee

209

Returns a generator of CBOR encoded bytes. There is no guarantee

210

that each emitted chunk fully decodes to a value or sub-value.

210

that each emitted chunk fully decodes to a value or sub-value.

211

212

Encoding is deterministic - unordered collections are sorted.

212

Encoding is deterministic - unordered collections are sorted.

213

"""

213

"""

214

fn = STREAM_ENCODERS.get(v.__class__)

214

fn = STREAM_ENCODERS.get(v.__class__)

215

216

if not fn:

216

if not fn:

217

raise ValueError('do not know how to encode %s' % type(v))

217

raise ValueError('do not know how to encode %s' % type(v))

218

219

return fn(v)

219

return fn(v)

220

221

class CBORDecodeError(Exception):

221

class CBORDecodeError(Exception):

222

"""Represents an error decoding CBOR."""

222

"""Represents an error decoding CBOR."""

223

224

if sys.version_info.major >= 3:

224

if sys.version_info.major >= 3:

225

def _elementtointeger(b, i):

225

def _elementtointeger(b, i):

226

return b[i]

226

return b[i]

227

else:

227

else:

228

def _elementtointeger(b, i):

228

def _elementtointeger(b, i):

229

return ord(b[i])

229

return ord(b[i])

230

231

STRUCT_BIG_UBYTE = struct.Struct(r'>B')

231

STRUCT_BIG_UBYTE = struct.Struct(r'>B')

232

STRUCT_BIG_USHORT = struct.Struct('>H')

232

STRUCT_BIG_USHORT = struct.Struct('>H')

233

STRUCT_BIG_ULONG = struct.Struct('>L')

233

STRUCT_BIG_ULONG = struct.Struct('>L')

234

STRUCT_BIG_ULONGLONG = struct.Struct('>Q')

234

STRUCT_BIG_ULONGLONG = struct.Struct('>Q')

235

236

SPECIAL_NONE = 0

236

SPECIAL_NONE = 0

237

SPECIAL_START_INDEFINITE_BYTESTRING = 1

237

SPECIAL_START_INDEFINITE_BYTESTRING = 1

238

SPECIAL_START_ARRAY = 2

238

SPECIAL_START_ARRAY = 2

239

SPECIAL_START_MAP = 3

239

SPECIAL_START_MAP = 3

240

SPECIAL_START_SET = 4

240

SPECIAL_START_SET = 4

241

SPECIAL_INDEFINITE_BREAK = 5

241

SPECIAL_INDEFINITE_BREAK = 5

242

243

def decodeitem(b, offset=0):

243

def decodeitem(b, offset=0):

244

"""Decode a new CBOR value from a buffer at offset.

244

"""Decode a new CBOR value from a buffer at offset.

245

246

This function attempts to decode up to one complete CBOR value

246

This function attempts to decode up to one complete CBOR value

247

from ``b`` starting at offset ``offset``.

247

from ``b`` starting at offset ``offset``.

248

249

The beginning of a collection (such as an array, map, set, or

249

The beginning of a collection (such as an array, map, set, or

250

indefinite length bytestring) counts as a single value. For these

250

indefinite length bytestring) counts as a single value. For these

251

special cases, a state flag will indicate that a special value was seen.

251

special cases, a state flag will indicate that a special value was seen.

252

253

When called, the function either returns a decoded value or gives

253

When called, the function either returns a decoded value or gives

254

a hint as to how many more bytes are needed to do so. By calling

254

a hint as to how many more bytes are needed to do so. By calling

255

the function repeatedly given a stream of bytes, the caller can

255

the function repeatedly given a stream of bytes, the caller can

256

build up the original values.

256

build up the original values.

257

258

Returns a tuple with the following elements:

258

Returns a tuple with the following elements:

259

260

* Bool indicating whether a complete value was decoded.

260

* Bool indicating whether a complete value was decoded.

261

* A decoded value if first value is True otherwise None

261

* A decoded value if first value is True otherwise None

262

* Integer number of bytes. If positive, the number of bytes

262

* Integer number of bytes. If positive, the number of bytes

263

read. If negative, the number of bytes we need to read to

263

read. If negative, the number of bytes we need to read to

264

decode this value or the next chunk in this value.

264

decode this value or the next chunk in this value.

265

* One of the ``SPECIAL_*`` constants indicating special treatment

265

* One of the ``SPECIAL_*`` constants indicating special treatment

266

for this value. ``SPECIAL_NONE`` means this is a fully decoded

266

for this value. ``SPECIAL_NONE`` means this is a fully decoded

267

simple value (such as an integer or bool).

267

simple value (such as an integer or bool).

268

"""

268

"""

269

270

initial = _elementtointeger(b, offset)

270

initial = _elementtointeger(b, offset)

271

offset += 1

271

offset += 1

272

273

majortype = initial >> 5

273

majortype = initial >> 5

274

subtype = initial & SUBTYPE_MASK

274

subtype = initial & SUBTYPE_MASK

275

276

if majortype == MAJOR_TYPE_UINT:

276

if majortype == MAJOR_TYPE_UINT:

277

complete, value, readcount = decodeuint(subtype, b, offset)

277

complete, value, readcount = decodeuint(subtype, b, offset)

278

279

if complete:

279

if complete:

280

return True, value, readcount + 1, SPECIAL_NONE

280

return True, value, readcount + 1, SPECIAL_NONE

281

else:

281

else:

282

return False, None, readcount, SPECIAL_NONE

282

return False, None, readcount, SPECIAL_NONE

283

284

elif majortype == MAJOR_TYPE_NEGINT:

284

elif majortype == MAJOR_TYPE_NEGINT:

285

# Negative integers are the same as UINT except inverted minus 1.

285

# Negative integers are the same as UINT except inverted minus 1.

286

complete, value, readcount = decodeuint(subtype, b, offset)

286

complete, value, readcount = decodeuint(subtype, b, offset)

287

288

if complete:

288

if complete:

289

return True, -value - 1, readcount + 1, SPECIAL_NONE

289

return True, -value - 1, readcount + 1, SPECIAL_NONE

290

else:

290

else:

291

return False, None, readcount, SPECIAL_NONE

291

return False, None, readcount, SPECIAL_NONE

292

293

elif majortype == MAJOR_TYPE_BYTESTRING:

293

elif majortype == MAJOR_TYPE_BYTESTRING:

294

# Beginning of bytestrings are treated as uints in order to

294

# Beginning of bytestrings are treated as uints in order to

295

# decode their length, which may be indefinite.

295

# decode their length, which may be indefinite.

296

complete, size, readcount = decodeuint(subtype, b, offset,

296

complete, size, readcount = decodeuint(subtype, b, offset,

297

allowindefinite=True)

297

allowindefinite=True)

298

299

# We don't know the size of the bytestring. It must be a definitive

299

# We don't know the size of the bytestring. It must be a definitive

300

# length since the indefinite subtype would be encoded in the initial

300

# length since the indefinite subtype would be encoded in the initial

301

# byte.

301

# byte.

302

if not complete:

302

if not complete:

303

return False, None, readcount, SPECIAL_NONE

303

return False, None, readcount, SPECIAL_NONE

304

305

# We know the length of the bytestring.

305

# We know the length of the bytestring.

306

if size is not None:

306

if size is not None:

307

# And the data is available in the buffer.

307

# And the data is available in the buffer.

308

if offset + readcount + size <= len(b):

308

if offset + readcount + size <= len(b):

309

value = b[offset + readcount:offset + readcount + size]

309

value = b[offset + readcount:offset + readcount + size]

310

return True, value, readcount + size + 1, SPECIAL_NONE

310

return True, value, readcount + size + 1, SPECIAL_NONE

311

312

# And we need more data in order to return the bytestring.

312

# And we need more data in order to return the bytestring.

313

else:

313

else:

314

wanted = len(b) - offset - readcount - size

314

wanted = len(b) - offset - readcount - size

315

return False, None, wanted, SPECIAL_NONE

315

return False, None, wanted, SPECIAL_NONE

316

317

# It is an indefinite length bytestring.

317

# It is an indefinite length bytestring.

318

else:

318

else:

319

return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING

319

return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING

320

321

elif majortype == MAJOR_TYPE_STRING:

321

elif majortype == MAJOR_TYPE_STRING:

322

raise CBORDecodeError('string major type not supported')

322

raise CBORDecodeError('string major type not supported')

323

324

elif majortype == MAJOR_TYPE_ARRAY:

324

elif majortype == MAJOR_TYPE_ARRAY:

325

# Beginning of arrays are treated as uints in order to decode their

325

# Beginning of arrays are treated as uints in order to decode their

326

# length. We don't allow indefinite length arrays.

326

# length. We don't allow indefinite length arrays.

327

complete, size, readcount = decodeuint(subtype, b, offset)

327

complete, size, readcount = decodeuint(subtype, b, offset)

328

329

if complete:

329

if complete:

330

return True, size, readcount + 1, SPECIAL_START_ARRAY

330

return True, size, readcount + 1, SPECIAL_START_ARRAY

331

else:

331

else:

332

return False, None, readcount, SPECIAL_NONE

332

return False, None, readcount, SPECIAL_NONE

333

334

elif majortype == MAJOR_TYPE_MAP:

334

elif majortype == MAJOR_TYPE_MAP:

335

# Beginning of maps are treated as uints in order to decode their

335

# Beginning of maps are treated as uints in order to decode their

336

# number of elements. We don't allow indefinite length arrays.

336

# number of elements. We don't allow indefinite length arrays.

337

complete, size, readcount = decodeuint(subtype, b, offset)

337

complete, size, readcount = decodeuint(subtype, b, offset)

338

339

if complete:

339

if complete:

340

return True, size, readcount + 1, SPECIAL_START_MAP

340

return True, size, readcount + 1, SPECIAL_START_MAP

341

else:

341

else:

342

return False, None, readcount, SPECIAL_NONE

342

return False, None, readcount, SPECIAL_NONE

343

344

elif majortype == MAJOR_TYPE_SEMANTIC:

344

elif majortype == MAJOR_TYPE_SEMANTIC:

345

# Semantic tag value is read the same as a uint.

345

# Semantic tag value is read the same as a uint.

346

complete, tagvalue, readcount = decodeuint(subtype, b, offset)

346

complete, tagvalue, readcount = decodeuint(subtype, b, offset)

347

348

if not complete:

348

if not complete:

349

return False, None, readcount, SPECIAL_NONE

349

return False, None, readcount, SPECIAL_NONE

350

351

# This behavior here is a little wonky. The main type being "decorated"

351

# This behavior here is a little wonky. The main type being "decorated"

352

# by this semantic tag follows. A more robust parser would probably emit

352

# by this semantic tag follows. A more robust parser would probably emit

353

# a special flag indicating this as a semantic tag and let the caller

353

# a special flag indicating this as a semantic tag and let the caller

354

# deal with the types that follow. But since we don't support many

354

# deal with the types that follow. But since we don't support many

355

# semantic tags, it is easier to deal with the special cases here and

355

# semantic tags, it is easier to deal with the special cases here and

356

# hide complexity from the caller. If we add support for more semantic

356

# hide complexity from the caller. If we add support for more semantic

357

# tags, we should probably move semantic tag handling into the caller.

357

# tags, we should probably move semantic tag handling into the caller.

358

if tagvalue == SEMANTIC_TAG_FINITE_SET:

358

if tagvalue == SEMANTIC_TAG_FINITE_SET:

359

if offset + readcount >= len(b):

359

if offset + readcount >= len(b):

360

return False, None, -1, SPECIAL_NONE

360

return False, None, -1, SPECIAL_NONE

361

362

complete, size, readcount2, special = decodeitem(b,

362

complete, size, readcount2, special = decodeitem(b,

363

offset + readcount)

363

offset + readcount)

364

365

if not complete:

365

if not complete:

366

return False, None, readcount2, SPECIAL_NONE

366

return False, None, readcount2, SPECIAL_NONE

367

368

if special != SPECIAL_START_ARRAY:

368

if special != SPECIAL_START_ARRAY:

369

raise CBORDecodeError('expected array after finite set '

369

raise CBORDecodeError('expected array after finite set '

370

'semantic tag')

370

'semantic tag')

371

372

return True, size, readcount + readcount2 + 1, SPECIAL_START_SET

372

return True, size, readcount + readcount2 + 1, SPECIAL_START_SET

373

374

else:

374

else:

375

raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)

375

raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)

376

377

elif majortype == MAJOR_TYPE_SPECIAL:

377

elif majortype == MAJOR_TYPE_SPECIAL:

378

# Only specific values for the information field are allowed.

378

# Only specific values for the information field are allowed.

379

if subtype == SUBTYPE_FALSE:

379

if subtype == SUBTYPE_FALSE:

380

return True, False, 1, SPECIAL_NONE

380

return True, False, 1, SPECIAL_NONE

381

elif subtype == SUBTYPE_TRUE:

381

elif subtype == SUBTYPE_TRUE:

382

return True, True, 1, SPECIAL_NONE

382

return True, True, 1, SPECIAL_NONE

383

elif subtype == SUBTYPE_NULL:

383

elif subtype == SUBTYPE_NULL:

384

return True, None, 1, SPECIAL_NONE

384

return True, None, 1, SPECIAL_NONE

385

elif subtype == SUBTYPE_INDEFINITE:

385

elif subtype == SUBTYPE_INDEFINITE:

386

return True, None, 1, SPECIAL_INDEFINITE_BREAK

386

return True, None, 1, SPECIAL_INDEFINITE_BREAK

387

# If value is 24, subtype is in next byte.

387

# If value is 24, subtype is in next byte.

388

else:

388

else:

389

raise CBORDecodeError('special type %d not allowed' % subtype)

389

raise CBORDecodeError('special type %d not allowed' % subtype)

390

else:

390

else:

391

assert False

391

assert False

392

393

def decodeuint(subtype, b, offset=0, allowindefinite=False):

393

def decodeuint(subtype, b, offset=0, allowindefinite=False):

394

"""Decode an unsigned integer.

394

"""Decode an unsigned integer.

395

396

``subtype`` is the lower 5 bits from the initial byte CBOR item

396

``subtype`` is the lower 5 bits from the initial byte CBOR item

397

"header." ``b`` is a buffer containing bytes. ``offset`` points to

397

"header." ``b`` is a buffer containing bytes. ``offset`` points to

398

the index of the first byte after the byte that ``subtype`` was

398

the index of the first byte after the byte that ``subtype`` was

399

derived from.

399

derived from.

400

401

``allowindefinite`` allows the special indefinite length value

401

``allowindefinite`` allows the special indefinite length value

402

indicator.

402

indicator.

403

404

Returns a 3-tuple of (successful, value, count).

404

Returns a 3-tuple of (successful, value, count).

405

406

The first element is a bool indicating if decoding completed. The 2nd

406

The first element is a bool indicating if decoding completed. The 2nd

407

is the decoded integer value or None if not fully decoded or the subtype

407

is the decoded integer value or None if not fully decoded or the subtype

408

is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.

408

is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.

409

If positive, it is the number of additional bytes decoded. If negative,

409

If positive, it is the number of additional bytes decoded. If negative,

410

it is the number of additional bytes needed to decode this value.

410

it is the number of additional bytes needed to decode this value.

411

"""

411

"""

412

413

# Small values are inline.

413

# Small values are inline.

414

if subtype < 24:

414

if subtype < 24:

415

return True, subtype, 0

415

return True, subtype, 0

416

# Indefinite length specifier.

416

# Indefinite length specifier.

417

elif subtype == 31:

417

elif subtype == 31:

418

if allowindefinite:

418

if allowindefinite:

419

return True, None, 0

419

return True, None, 0

420

else:

420

else:

421

raise CBORDecodeError('indefinite length uint not allowed here')

421

raise CBORDecodeError('indefinite length uint not allowed here')

422

elif subtype >= 28:

422

elif subtype >= 28:

423

raise CBORDecodeError('unsupported subtype on integer type: %d' %

423

raise CBORDecodeError('unsupported subtype on integer type: %d' %

424

subtype)

424

subtype)

425

426

if subtype == 24:

426

if subtype == 24:

427

s = STRUCT_BIG_UBYTE

427

s = STRUCT_BIG_UBYTE

428

elif subtype == 25:

428

elif subtype == 25:

429

s = STRUCT_BIG_USHORT

429

s = STRUCT_BIG_USHORT

430

elif subtype == 26:

430

elif subtype == 26:

431

s = STRUCT_BIG_ULONG

431

s = STRUCT_BIG_ULONG

432

elif subtype == 27:

432

elif subtype == 27:

433

s = STRUCT_BIG_ULONGLONG

433

s = STRUCT_BIG_ULONGLONG

434

else:

434

else:

435

raise CBORDecodeError('bounds condition checking violation')

435

raise CBORDecodeError('bounds condition checking violation')

436

437

if len(b) - offset >= s.size:

437

if len(b) - offset >= s.size:

438

return True, s.unpack_from(b, offset)[0], s.size

438

return True, s.unpack_from(b, offset)[0], s.size

439

else:

439

else:

440

return False, None, len(b) - offset - s.size

440

return False, None, len(b) - offset - s.size

441

442

class bytestringchunk(bytes):

442

class bytestringchunk(bytes):

443

"""Represents a chunk/segment in an indefinite length bytestring.

443

"""Represents a chunk/segment in an indefinite length bytestring.

444

445

This behaves like a ``bytes`` but in addition has the ``isfirst``

445

This behaves like a ``bytes`` but in addition has the ``isfirst``

446

and ``islast`` attributes indicating whether this chunk is the first

446

and ``islast`` attributes indicating whether this chunk is the first

447

or last in an indefinite length bytestring.

447

or last in an indefinite length bytestring.

448

"""

448

"""

449

450

def __new__(cls, v, first=False, last=False):

450

def __new__(cls, v, first=False, last=False):

451

self = bytes.__new__(cls, v)

451

self = bytes.__new__(cls, v)

452

self.isfirst = first

452

self.isfirst = first

453

self.islast = last

453

self.islast = last

454

455

return self

455

return self

456

457

class sansiodecoder(object):

457

class sansiodecoder(object):

458

"""A CBOR decoder that doesn't perform its own I/O.

458

"""A CBOR decoder that doesn't perform its own I/O.

459

460

To use, construct an instance and feed it segments containing

460

To use, construct an instance and feed it segments containing

461

CBOR-encoded bytes via ``decode()``. The return value from ``decode()``

461

CBOR-encoded bytes via ``decode()``. The return value from ``decode()``

462

indicates whether a fully-decoded value is available, how many bytes

462

indicates whether a fully-decoded value is available, how many bytes

463

were consumed, and offers a hint as to how many bytes should be fed

463

were consumed, and offers a hint as to how many bytes should be fed

464

in next time to decode the next value.

464

in next time to decode the next value.

465

466

The decoder assumes it will decode N discrete CBOR values, not just

466

The decoder assumes it will decode N discrete CBOR values, not just

467

a single value. i.e. if the bytestream contains uints packed one after

467

a single value. i.e. if the bytestream contains uints packed one after

468

the other, the decoder will decode them all, rather than just the initial

468

the other, the decoder will decode them all, rather than just the initial

469

one.

469

one.

470

471

When ``decode()`` indicates a value is available, call ``getavailable()``

471

When ``decode()`` indicates a value is available, call ``getavailable()``

472

to return all fully decoded values.

472

to return all fully decoded values.

473

474

``decode()`` can partially decode input. It is up to the caller to keep

474

``decode()`` can partially decode input. It is up to the caller to keep

475

track of what data was consumed and to pass unconsumed data in on the

475

track of what data was consumed and to pass unconsumed data in on the

476

next invocation.

476

next invocation.

477

478

The decoder decodes atomically at the *item* level. See ``decodeitem()``.

478

The decoder decodes atomically at the *item* level. See ``decodeitem()``.

479

If an *item* cannot be fully decoded, the decoder won't record it as

479

If an *item* cannot be fully decoded, the decoder won't record it as

480

partially consumed. Instead, the caller will be instructed to pass in

480

partially consumed. Instead, the caller will be instructed to pass in

481

the initial bytes of this item on the next invocation. This does result

481

the initial bytes of this item on the next invocation. This does result

482

in some redundant parsing. But the overhead should be minimal.

482

in some redundant parsing. But the overhead should be minimal.

483

484

This decoder only supports a subset of CBOR as required by Mercurial.

484

This decoder only supports a subset of CBOR as required by Mercurial.

485

It lacks support for:

485

It lacks support for:

486

487

* Indefinite length arrays

487

* Indefinite length arrays

488

* Indefinite length maps

488

* Indefinite length maps

489

* Use of indefinite length bytestrings as keys or values within

489

* Use of indefinite length bytestrings as keys or values within

490

arrays, maps, or sets.

490

arrays, maps, or sets.

491

* Nested arrays, maps, or sets within sets

491

* Nested arrays, maps, or sets within sets

492

* Any semantic tag that isn't a mathematical finite set

492

* Any semantic tag that isn't a mathematical finite set

493

* Floating point numbers

493

* Floating point numbers

494

* Undefined special value

494

* Undefined special value

495

496

CBOR types are decoded to Python types as follows:

496

CBOR types are decoded to Python types as follows:

497

498

uint -> int

498

uint -> int

499

negint -> int

499

negint -> int

500

bytestring -> bytes

500

bytestring -> bytes

501

map -> dict

501

map -> dict

502

array -> list

502

array -> list

503

True -> bool

503

True -> bool

504

False -> bool

504

False -> bool

505

null -> None

505

null -> None

506

indefinite length bytestring chunk -> [bytestringchunk]

506

indefinite length bytestring chunk -> [bytestringchunk]

507

508

The only non-obvious mapping here is an indefinite length bytestring

508

The only non-obvious mapping here is an indefinite length bytestring

509

to the ``bytestringchunk`` type. This is to facilitate streaming

509

to the ``bytestringchunk`` type. This is to facilitate streaming

510

indefinite length bytestrings out of the decoder and to differentiate

510

indefinite length bytestrings out of the decoder and to differentiate

511

a regular bytestring from an indefinite length bytestring.

511

a regular bytestring from an indefinite length bytestring.

512

"""

512

"""

513

514

_STATE_NONE = 0

514

_STATE_NONE = 0

515

_STATE_WANT_MAP_KEY = 1

515

_STATE_WANT_MAP_KEY = 1

516

_STATE_WANT_MAP_VALUE = 2

516

_STATE_WANT_MAP_VALUE = 2

517

_STATE_WANT_ARRAY_VALUE = 3

517

_STATE_WANT_ARRAY_VALUE = 3

518

_STATE_WANT_SET_VALUE = 4

518

_STATE_WANT_SET_VALUE = 4

519

_STATE_WANT_BYTESTRING_CHUNK_FIRST = 5

519

_STATE_WANT_BYTESTRING_CHUNK_FIRST = 5

520

_STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6

520

_STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6

521

522

def __init__(self):

522

def __init__(self):

523

# TODO add support for limiting size of bytestrings

523

# TODO add support for limiting size of bytestrings

524

# TODO add support for limiting number of keys / values in collections

524

# TODO add support for limiting number of keys / values in collections

525

# TODO add support for limiting size of buffered partial values

525

# TODO add support for limiting size of buffered partial values

526

527

self.decodedbytecount = 0

527

self.decodedbytecount = 0

528

529

self._state = self._STATE_NONE

529

self._state = self._STATE_NONE

530

531

# Stack of active nested collections. Each entry is a dict describing

531

# Stack of active nested collections. Each entry is a dict describing

532

# the collection.

532

# the collection.

533

self._collectionstack = []

533

self._collectionstack = []

534

535

# Fully decoded key to use for the current map.

535

# Fully decoded key to use for the current map.

536

self._currentmapkey = None

536

self._currentmapkey = None

537

538

# Fully decoded values available for retrieval.

538

# Fully decoded values available for retrieval.

539

self._decodedvalues = []

539

self._decodedvalues = []

540

541

@property

541

@property

542

def inprogress(self):

542

def inprogress(self):

543

"""Whether the decoder has partially decoded a value."""

543

"""Whether the decoder has partially decoded a value."""

544

return self._state != self._STATE_NONE

544

return self._state != self._STATE_NONE

545

546

def decode(self, b, offset=0):

546

def decode(self, b, offset=0):

547

"""Attempt to decode bytes from an input buffer.

547

"""Attempt to decode bytes from an input buffer.

548

549

``b`` is a collection of bytes and ``offset`` is the byte

549

``b`` is a collection of bytes and ``offset`` is the byte

550

offset within that buffer from which to begin reading data.

550

offset within that buffer from which to begin reading data.

551

552

``b`` must support ``len()`` and accessing bytes slices via

552

``b`` must support ``len()`` and accessing bytes slices via

553

``__slice__``. Typically ``bytes`` instances are used.

553

``__slice__``. Typically ``bytes`` instances are used.

554

555

Returns a tuple with the following fields:

555

Returns a tuple with the following fields:

556

557

* Bool indicating whether values are available for retrieval.

557

* Bool indicating whether values are available for retrieval.

558

* Integer indicating the number of bytes that were fully consumed,

558

* Integer indicating the number of bytes that were fully consumed,

559

starting from ``offset``.

559

starting from ``offset``.

560

* Integer indicating the number of bytes that are desired for the

560

* Integer indicating the number of bytes that are desired for the

561

next call in order to decode an item.

561

next call in order to decode an item.

562

"""

562

"""

563

if not b:

563

if not b:

564

return bool(self._decodedvalues), 0, 0

564

return bool(self._decodedvalues), 0, 0

565

566

initialoffset = offset

566

initialoffset = offset

567

568

# We could easily split the body of this loop into a function. But

568

# We could easily split the body of this loop into a function. But

569

# Python performance is sensitive to function calls and collections

569

# Python performance is sensitive to function calls and collections

570

# are composed of many items. So leaving as a while loop could help

570

# are composed of many items. So leaving as a while loop could help

571

# with performance. One thing that may not help is the use of

571

# with performance. One thing that may not help is the use of

572

# if..elif versus a lookup/dispatch table. There may be value

572

# if..elif versus a lookup/dispatch table. There may be value

573

# in switching that.

573

# in switching that.

574

while offset < len(b):

574

while offset < len(b):

575

# Attempt to decode an item. This could be a whole value or a

575

# Attempt to decode an item. This could be a whole value or a

576

# special value indicating an event, such as start or end of a

576

# special value indicating an event, such as start or end of a

577

# collection or indefinite length type.

577

# collection or indefinite length type.

578

complete, value, readcount, special = decodeitem(b, offset)

578

complete, value, readcount, special = decodeitem(b, offset)

579

580

if readcount > 0:

580

if readcount > 0:

581

self.decodedbytecount += readcount

581

self.decodedbytecount += readcount

582

583

if not complete:

583

if not complete:

584

assert readcount < 0

584

assert readcount < 0

585

return (

585

return (

586

bool(self._decodedvalues),

586

bool(self._decodedvalues),

587

offset - initialoffset,

587

offset - initialoffset,

588

-readcount,

588

-readcount,

589

)

589

)

590

591

offset += readcount

591

offset += readcount

592

593

# No nested state. We either have a full value or beginning of a

593

# No nested state. We either have a full value or beginning of a

594

# complex value to deal with.

594

# complex value to deal with.

595

if self._state == self._STATE_NONE:

595

if self._state == self._STATE_NONE:

596

# A normal value.

596

# A normal value.

597

if special == SPECIAL_NONE:

597

if special == SPECIAL_NONE:

598

self._decodedvalues.append(value)

598

self._decodedvalues.append(value)

599

600

elif special == SPECIAL_START_ARRAY:

600

elif special == SPECIAL_START_ARRAY:

601

self._collectionstack.append({

601

self._collectionstack.append({

602

'remaining': value,

602

'remaining': value,

603

'v': [],

603

'v': [],

604

})

604

})

605

self._state = self._STATE_WANT_ARRAY_VALUE

605

self._state = self._STATE_WANT_ARRAY_VALUE

606

607

elif special == SPECIAL_START_MAP:

607

elif special == SPECIAL_START_MAP:

608

self._collectionstack.append({

608

self._collectionstack.append({

609

'remaining': value,

609

'remaining': value,

610

'v': {},

610

'v': {},

611

})

611

})

612

self._state = self._STATE_WANT_MAP_KEY

612

self._state = self._STATE_WANT_MAP_KEY

613

614

elif special == SPECIAL_START_SET:

614

elif special == SPECIAL_START_SET:

615

self._collectionstack.append({

615

self._collectionstack.append({

616

'remaining': value,

616

'remaining': value,

617

'v': set(),

617

'v': set(),

618

})

618

})

619

self._state = self._STATE_WANT_SET_VALUE

619

self._state = self._STATE_WANT_SET_VALUE

620

621

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

621

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

622

self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST

622

self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST

623

624

else:

624

else:

625

raise CBORDecodeError('unhandled special state: %d' %

625

raise CBORDecodeError('unhandled special state: %d' %

626

special)

626

special)

627

628

# This value becomes an element of the current array.

628

# This value becomes an element of the current array.

629

elif self._state == self._STATE_WANT_ARRAY_VALUE:

629

elif self._state == self._STATE_WANT_ARRAY_VALUE:

630

# Simple values get appended.

630

# Simple values get appended.

631

if special == SPECIAL_NONE:

631

if special == SPECIAL_NONE:

632

c = self._collectionstack[-1]

632

c = self._collectionstack[-1]

633

c['v'].append(value)

633

c['v'].append(value)

634

c['remaining'] -= 1

634

c['remaining'] -= 1

635

636

# self._state doesn't need changed.

636

# self._state doesn't need changed.

637

638

# An array nested within an array.

638

# An array nested within an array.

639

elif special == SPECIAL_START_ARRAY:

639

elif special == SPECIAL_START_ARRAY:

640

lastc = self._collectionstack[-1]

640

lastc = self._collectionstack[-1]

641

newvalue = []

641

newvalue = []

642

643

lastc['v'].append(newvalue)

643

lastc['v'].append(newvalue)

644

lastc['remaining'] -= 1

644

lastc['remaining'] -= 1

645

646

self._collectionstack.append({

646

self._collectionstack.append({

647

'remaining': value,

647

'remaining': value,

648

'v': newvalue,

648

'v': newvalue,

649

})

649

})

650

651

# self._state doesn't need changed.

651

# self._state doesn't need changed.

652

653

# A map nested within an array.

653

# A map nested within an array.

654

elif special == SPECIAL_START_MAP:

654

elif special == SPECIAL_START_MAP:

655

lastc = self._collectionstack[-1]

655

lastc = self._collectionstack[-1]

656

newvalue = {}

656

newvalue = {}

657

658

lastc['v'].append(newvalue)

658

lastc['v'].append(newvalue)

659

lastc['remaining'] -= 1

659

lastc['remaining'] -= 1

660

661

self._collectionstack.append({

661

self._collectionstack.append({

662

'remaining': value,

662

'remaining': value,

663

'v': newvalue

663

'v': newvalue

664

})

664

})

665

666

self._state = self._STATE_WANT_MAP_KEY

666

self._state = self._STATE_WANT_MAP_KEY

667

668

elif special == SPECIAL_START_SET:

668

elif special == SPECIAL_START_SET:

669

lastc = self._collectionstack[-1]

669

lastc = self._collectionstack[-1]

670

newvalue = set()

670

newvalue = set()

671

672

lastc['v'].append(newvalue)

672

lastc['v'].append(newvalue)

673

lastc['remaining'] -= 1

673

lastc['remaining'] -= 1

674

675

self._collectionstack.append({

675

self._collectionstack.append({

676

'remaining': value,

676

'remaining': value,

677

'v': newvalue,

677

'v': newvalue,

678

})

678

})

679

680

self._state = self._STATE_WANT_SET_VALUE

680

self._state = self._STATE_WANT_SET_VALUE

681

682

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

682

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

683

raise CBORDecodeError('indefinite length bytestrings '

683

raise CBORDecodeError('indefinite length bytestrings '

684

'not allowed as array values')

684

'not allowed as array values')

685

686

else:

686

else:

687

raise CBORDecodeError('unhandled special item when '

687

raise CBORDecodeError('unhandled special item when '

688

'expecting array value: %d' % special)

688

'expecting array value: %d' % special)

689

690

# This value becomes the key of the current map instance.

690

# This value becomes the key of the current map instance.

691

elif self._state == self._STATE_WANT_MAP_KEY:

691

elif self._state == self._STATE_WANT_MAP_KEY:

692

if special == SPECIAL_NONE:

692

if special == SPECIAL_NONE:

693

self._currentmapkey = value

693

self._currentmapkey = value

694

self._state = self._STATE_WANT_MAP_VALUE

694

self._state = self._STATE_WANT_MAP_VALUE

695

696

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

696

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

697

raise CBORDecodeError('indefinite length bytestrings '

697

raise CBORDecodeError('indefinite length bytestrings '

698

'not allowed as map keys')

698

'not allowed as map keys')

699

700

elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,

700

elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,

701

SPECIAL_START_SET):

701

SPECIAL_START_SET):

702

raise CBORDecodeError('collections not supported as map '

702

raise CBORDecodeError('collections not supported as map '

703

'keys')

703

'keys')

704

705

# We do not allow special values to be used as map keys.

705

# We do not allow special values to be used as map keys.

706

else:

706

else:

707

raise CBORDecodeError('unhandled special item when '

707

raise CBORDecodeError('unhandled special item when '

708

'expecting map key: %d' % special)

708

'expecting map key: %d' % special)

709

710

# This value becomes the value of the current map key.

710

# This value becomes the value of the current map key.

711

elif self._state == self._STATE_WANT_MAP_VALUE:

711

elif self._state == self._STATE_WANT_MAP_VALUE:

712

# Simple values simply get inserted into the map.

712

# Simple values simply get inserted into the map.

713

if special == SPECIAL_NONE:

713

if special == SPECIAL_NONE:

714

lastc = self._collectionstack[-1]

714

lastc = self._collectionstack[-1]

715

lastc['v'][self._currentmapkey] = value

715

lastc['v'][self._currentmapkey] = value

716

lastc['remaining'] -= 1

716

lastc['remaining'] -= 1

717

718

self._state = self._STATE_WANT_MAP_KEY

718

self._state = self._STATE_WANT_MAP_KEY

719

720

# A new array is used as the map value.

720

# A new array is used as the map value.

721

elif special == SPECIAL_START_ARRAY:

721

elif special == SPECIAL_START_ARRAY:

722

lastc = self._collectionstack[-1]

722

lastc = self._collectionstack[-1]

723

newvalue = []

723

newvalue = []

724

725

lastc['v'][self._currentmapkey] = newvalue

725

lastc['v'][self._currentmapkey] = newvalue

726

lastc['remaining'] -= 1

726

lastc['remaining'] -= 1

727

728

self._collectionstack.append({

728

self._collectionstack.append({

729

'remaining': value,

729

'remaining': value,

730

'v': newvalue,

730

'v': newvalue,

731

})

731

})

732

733

self._state = self._STATE_WANT_ARRAY_VALUE

733

self._state = self._STATE_WANT_ARRAY_VALUE

734

735

# A new map is used as the map value.

735

# A new map is used as the map value.

736

elif special == SPECIAL_START_MAP:

736

elif special == SPECIAL_START_MAP:

737

lastc = self._collectionstack[-1]

737

lastc = self._collectionstack[-1]

738

newvalue = {}

738

newvalue = {}

739

740

lastc['v'][self._currentmapkey] = newvalue

740

lastc['v'][self._currentmapkey] = newvalue

741

lastc['remaining'] -= 1

741

lastc['remaining'] -= 1

742

743

self._collectionstack.append({

743

self._collectionstack.append({

744

'remaining': value,

744

'remaining': value,

745

'v': newvalue,

745

'v': newvalue,

746

})

746

})

747

748

self._state = self._STATE_WANT_MAP_KEY

748

self._state = self._STATE_WANT_MAP_KEY

749

750

# A new set is used as the map value.

750

# A new set is used as the map value.

751

elif special == SPECIAL_START_SET:

751

elif special == SPECIAL_START_SET:

752

lastc = self._collectionstack[-1]

752

lastc = self._collectionstack[-1]

753

newvalue = set()

753

newvalue = set()

754

755

lastc['v'][self._currentmapkey] = newvalue

755

lastc['v'][self._currentmapkey] = newvalue

756

lastc['remaining'] -= 1

756

lastc['remaining'] -= 1

757

758

self._collectionstack.append({

758

self._collectionstack.append({

759

'remaining': value,

759

'remaining': value,

760

'v': newvalue,

760

'v': newvalue,

761

})

761

})

762

763

self._state = self._STATE_WANT_SET_VALUE

763

self._state = self._STATE_WANT_SET_VALUE

764

765

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

765

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

766

raise CBORDecodeError('indefinite length bytestrings not '

766

raise CBORDecodeError('indefinite length bytestrings not '

767

'allowed as map values')

767

'allowed as map values')

768

769

else:

769

else:

770

raise CBORDecodeError('unhandled special item when '

770

raise CBORDecodeError('unhandled special item when '

771

'expecting map value: %d' % special)

771

'expecting map value: %d' % special)

772

773

self._currentmapkey = None

773

self._currentmapkey = None

774

775

# This value is added to the current set.

775

# This value is added to the current set.

776

elif self._state == self._STATE_WANT_SET_VALUE:

776

elif self._state == self._STATE_WANT_SET_VALUE:

777

if special == SPECIAL_NONE:

777

if special == SPECIAL_NONE:

778

lastc = self._collectionstack[-1]

778

lastc = self._collectionstack[-1]

779

lastc['v'].add(value)

779

lastc['v'].add(value)

780

lastc['remaining'] -= 1

780

lastc['remaining'] -= 1

781

782

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

782

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

783

raise CBORDecodeError('indefinite length bytestrings not '

783

raise CBORDecodeError('indefinite length bytestrings not '

784

'allowed as set values')

784

'allowed as set values')

785

786

elif special in (SPECIAL_START_ARRAY,

786

elif special in (SPECIAL_START_ARRAY,

787

SPECIAL_START_MAP,

787

SPECIAL_START_MAP,

788

SPECIAL_START_SET):

788

SPECIAL_START_SET):

789

raise CBORDecodeError('collections not allowed as set '

789

raise CBORDecodeError('collections not allowed as set '

790

'values')

790

'values')

791

792

# We don't allow non-trivial types to exist as set values.

792

# We don't allow non-trivial types to exist as set values.

793

else:

793

else:

794

raise CBORDecodeError('unhandled special item when '

794

raise CBORDecodeError('unhandled special item when '

795

'expecting set value: %d' % special)

795

'expecting set value: %d' % special)

796

797

# This value represents the first chunk in an indefinite length

797

# This value represents the first chunk in an indefinite length

798

# bytestring.

798

# bytestring.

799

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:

799

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:

800

# We received a full chunk.

800

# We received a full chunk.

801

if special == SPECIAL_NONE:

801

if special == SPECIAL_NONE:

802

self._decodedvalues.append(bytestringchunk(value,

802

self._decodedvalues.append(bytestringchunk(value,

803

first=True))

803

first=True))

804

805

self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT

805

self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT

806

807

# The end of stream marker. This means it is an empty

807

# The end of stream marker. This means it is an empty

808

# indefinite length bytestring.

808

# indefinite length bytestring.

809

elif special == SPECIAL_INDEFINITE_BREAK:

809

elif special == SPECIAL_INDEFINITE_BREAK:

810

# We /could/ convert this to a b''. But we want to preserve

810

# We /could/ convert this to a b''. But we want to preserve

811

# the nature of the underlying data so consumers expecting

811

# the nature of the underlying data so consumers expecting

812

# an indefinite length bytestring get one.

812

# an indefinite length bytestring get one.

813

self._decodedvalues.append(bytestringchunk(b'',

813

self._decodedvalues.append(bytestringchunk(b'',

814

first=True,

814

first=True,

815

last=True))

815

last=True))

816

817

# Since indefinite length bytestrings can't be used in

817

# Since indefinite length bytestrings can't be used in

818

# collections, we must be at the root level.

818

# collections, we must be at the root level.

819

assert not self._collectionstack

819

assert not self._collectionstack

820

self._state = self._STATE_NONE

820

self._state = self._STATE_NONE

821

822

else:

822

else:

823

raise CBORDecodeError('unexpected special value when '

823

raise CBORDecodeError('unexpected special value when '

824

'expecting bytestring chunk: %d' %

824

'expecting bytestring chunk: %d' %

825

special)

825

special)

826

827

# This value represents the non-initial chunk in an indefinite

827

# This value represents the non-initial chunk in an indefinite

828

# length bytestring.

828

# length bytestring.

829

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:

829

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:

830

# We received a full chunk.

830

# We received a full chunk.

831

if special == SPECIAL_NONE:

831

if special == SPECIAL_NONE:

832

self._decodedvalues.append(bytestringchunk(value))

832

self._decodedvalues.append(bytestringchunk(value))

833

834

# The end of stream marker.

834

# The end of stream marker.

835

elif special == SPECIAL_INDEFINITE_BREAK:

835

elif special == SPECIAL_INDEFINITE_BREAK:

836

self._decodedvalues.append(bytestringchunk(b'', last=True))

836

self._decodedvalues.append(bytestringchunk(b'', last=True))

837

838

# Since indefinite length bytestrings can't be used in

838

# Since indefinite length bytestrings can't be used in

839

# collections, we must be at the root level.

839

# collections, we must be at the root level.

840

assert not self._collectionstack

840

assert not self._collectionstack

841

self._state = self._STATE_NONE

841

self._state = self._STATE_NONE

842

843

else:

843

else:

844

raise CBORDecodeError('unexpected special value when '

844

raise CBORDecodeError('unexpected special value when '

845

'expecting bytestring chunk: %d' %

845

'expecting bytestring chunk: %d' %

846

special)

846

special)

847

848

else:

848

else:

849

raise CBORDecodeError('unhandled decoder state: %d' %

849

raise CBORDecodeError('unhandled decoder state: %d' %

850

self._state)

850

self._state)

851

852

# We could have just added the final value in a collection. End

852

# We could have just added the final value in a collection. End

853

# all complete collections at the top of the stack.

853

# all complete collections at the top of the stack.

854

while True:

854

while True:

855

# Bail if we're not waiting on a new collection item.

855

# Bail if we're not waiting on a new collection item.

856

if self._state not in (self._STATE_WANT_ARRAY_VALUE,

856

if self._state not in (self._STATE_WANT_ARRAY_VALUE,

857

self._STATE_WANT_MAP_KEY,

857

self._STATE_WANT_MAP_KEY,

858

self._STATE_WANT_SET_VALUE):

858

self._STATE_WANT_SET_VALUE):

859

break

859

break

860

861

# Or we are expecting more items for this collection.

861

# Or we are expecting more items for this collection.

862

lastc = self._collectionstack[-1]

862

lastc = self._collectionstack[-1]

863

864

if lastc['remaining']:

864

if lastc['remaining']:

865

break

865

break

866

867

# The collection at the top of the stack is complete.

867

# The collection at the top of the stack is complete.

868

869

# Discard it, as it isn't needed for future items.

869

# Discard it, as it isn't needed for future items.

870

self._collectionstack.pop()

870

self._collectionstack.pop()

871

872

# If this is a nested collection, we don't emit it, since it

872

# If this is a nested collection, we don't emit it, since it

873

# will be emitted by its parent collection. But we do need to

873

# will be emitted by its parent collection. But we do need to

874

# update state to reflect what the new top-most collection

874

# update state to reflect what the new top-most collection

875

# on the stack is.

875

# on the stack is.

876

if self._collectionstack:

876

if self._collectionstack:

877

self._state = {

877

self._state = {

878

list: self._STATE_WANT_ARRAY_VALUE,

878

list: self._STATE_WANT_ARRAY_VALUE,

879

dict: self._STATE_WANT_MAP_KEY,

879

dict: self._STATE_WANT_MAP_KEY,

880

set: self._STATE_WANT_SET_VALUE,

880

set: self._STATE_WANT_SET_VALUE,

881

}[type(self._collectionstack[-1]['v'])]

881

}[type(self._collectionstack[-1]['v'])]

882

883

# If this is the root collection, emit it.

883

# If this is the root collection, emit it.

884

else:

884

else:

885

self._decodedvalues.append(lastc['v'])

885

self._decodedvalues.append(lastc['v'])

886

self._state = self._STATE_NONE

886

self._state = self._STATE_NONE

887

888

return (

888

return (

889

bool(self._decodedvalues),

889

bool(self._decodedvalues),

890

offset - initialoffset,

890

offset - initialoffset,

891

0,

891

0,

892

)

892

)

893

894

def getavailable(self):

894

def getavailable(self):

895

"""Returns an iterator over fully decoded values.

895

"""Returns an iterator over fully decoded values.

896

897

Once values are retrieved, they won't be available on the next call.

897

Once values are retrieved, they won't be available on the next call.

898

"""

898

"""

899

900

l = list(self._decodedvalues)

900

l = list(self._decodedvalues)

901

self._decodedvalues = []

901

self._decodedvalues = []

902

return l

902

return l

903

904

class bufferingdecoder(object):

904

class bufferingdecoder(object):

905

"""A CBOR decoder that buffers undecoded input.

905

"""A CBOR decoder that buffers undecoded input.

906

907

This is a glorified wrapper around ``sansiodecoder`` that adds a buffering

907

This is a glorified wrapper around ``sansiodecoder`` that adds a buffering

908

layer. All input that isn't consumed by ``sansiodecoder`` will be buffered

908

layer. All input that isn't consumed by ``sansiodecoder`` will be buffered

909

and concatenated with any new input that arrives later.

909

and concatenated with any new input that arrives later.

910

911

TODO consider adding limits as to the maximum amount of data that can

911

TODO consider adding limits as to the maximum amount of data that can

912

be buffered.

912

be buffered.

913

"""

913

"""

914

def __init__(self):

914

def __init__(self):

915

self._decoder = sansiodecoder()

915

self._decoder = sansiodecoder()

916

self._~~leftover~~ = ~~None~~

916

self._chunks = []

917

self._wanted = 0

917

918

def decode(self, b):

919

def decode(self, b):

919

"""Attempt to decode bytes to CBOR values.

920

"""Attempt to decode bytes to CBOR values.

920

921

Returns a tuple with the following fields:

922

Returns a tuple with the following fields:

922

923

* Bool indicating whether new values are available for retrieval.

924

* Bool indicating whether new values are available for retrieval.

924

* Integer number of bytes decoded from the new input.

925

* Integer number of bytes decoded from the new input.

925

* Integer number of bytes wanted to decode the next value.

926

* Integer number of bytes wanted to decode the next value.

926

"""

927

"""

928

# Our strategy for buffering is to aggregate the incoming chunks in a

929

# list until we've received enough data to decode the next item.

930

# This is slightly more complicated than using an ``io.BytesIO``

931

# or continuously concatenating incoming data. However, because it

932

# isn't constantly reallocating backing memory for a growing buffer,

933

# it prevents excessive memory thrashing and is significantly faster,

934

# especially in cases where the percentage of input chunks that don't

935

# decode into a full item is high.

927

936

928

if self._~~leftover~~:

937

if self._chunks:

929

oldlen = len(self._leftover)

938

# A previous call said we needed N bytes to decode the next item.

930

b = self._leftover + b

939

# But this call doesn't provide enough data. We buffer the incoming

931

self._leftover = None

940

# chunk without attempting to decode.

941

if len(b) < self._wanted:

942

self._chunks.append(b)

943

self._wanted -= len(b)

944

return False, 0, self._wanted

945

946

# Else we may have enough data to decode the next item. Aggregate

947

# old data with new and reset the buffer.

948

newlen = len(b)

949

self._chunks.append(b)

950

b = b''.join(self._chunks)

951

self._chunks = []

952

oldlen = len(b) - newlen

953

932

else:

954

else:

933

b = b

934

oldlen = 0

955

oldlen = 0

935

956

936

available, readcount, wanted = self._decoder.decode(b)

957

available, readcount, wanted = self._decoder.decode(b)

958

self._wanted = wanted

937

959

938

if readcount < len(b):

960

if readcount < len(b):

939

self._~~leftover~~ = b[readcount:]

961

self._chunks.append(b[readcount:])

940

962

941

return available, readcount - oldlen, wanted

963

return available, readcount - oldlen, wanted

942

964

943

def getavailable(self):

965

def getavailable(self):

944

return self._decoder.getavailable()

966

return self._decoder.getavailable()

945

967

946

def decodeall(b):

968

def decodeall(b):

947

"""Decode all CBOR items present in an iterable of bytes.

969

"""Decode all CBOR items present in an iterable of bytes.

948

970

949

In addition to regular decode errors, raises CBORDecodeError if the

971

In addition to regular decode errors, raises CBORDecodeError if the

950

entirety of the passed buffer does not fully decode to complete CBOR

972

entirety of the passed buffer does not fully decode to complete CBOR

951

values. This includes failure to decode any value, incomplete collection

973

values. This includes failure to decode any value, incomplete collection

952

types, incomplete indefinite length items, and extra data at the end of

974

types, incomplete indefinite length items, and extra data at the end of

953

the buffer.

975

the buffer.

954

"""

976

"""

955

if not b:

977

if not b:

956

return []

978

return []

957

979

958

decoder = sansiodecoder()

980

decoder = sansiodecoder()

959

981

960

havevalues, readcount, wantbytes = decoder.decode(b)

982

havevalues, readcount, wantbytes = decoder.decode(b)

961

983

962

if readcount != len(b):

984

if readcount != len(b):

963

raise CBORDecodeError('input data not fully consumed')

985

raise CBORDecodeError('input data not fully consumed')

964

986

965

if decoder.inprogress:

987

if decoder.inprogress:

966

raise CBORDecodeError('input data not complete')

988

raise CBORDecodeError('input data not complete')

967

989

968

return decoder.getavailable()

990

return decoder.getavailable()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # cborutil.py - CBOR extensions
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             import sys
             from .. import pycompat
             # Very short very of RFC 7049...
             #
             # Each item begins with a byte. The 3 high bits of that byte denote the
             # "major type." The lower 5 bits denote the "subtype." Each major type
             # has its own encoding mechanism.
             #
             # Most types have lengths. However, bytestring, string, array, and map
             # can be indefinite length. These are denotes by a subtype with value 31.
             # Sub-components of those types then come afterwards and are terminated
             # by a "break" byte.
             MAJOR_TYPE_UINT = 0
             MAJOR_TYPE_NEGINT = 1
             MAJOR_TYPE_BYTESTRING = 2
             MAJOR_TYPE_STRING = 3
             MAJOR_TYPE_ARRAY = 4
             MAJOR_TYPE_MAP = 5
             MAJOR_TYPE_SEMANTIC = 6
             MAJOR_TYPE_SPECIAL = 7
             SUBTYPE_MASK = 0b00011111
             SUBTYPE_FALSE = 20
             SUBTYPE_TRUE = 21
             SUBTYPE_NULL = 22
             SUBTYPE_HALF_FLOAT = 25
             SUBTYPE_SINGLE_FLOAT = 26
             SUBTYPE_DOUBLE_FLOAT = 27
             SUBTYPE_INDEFINITE = 31
             SEMANTIC_TAG_FINITE_SET = 258
             # Indefinite types begin with their major type ORd with information value 31.
             BEGIN_INDEFINITE_BYTESTRING = struct.pack(
                 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
             BEGIN_INDEFINITE_ARRAY = struct.pack(
                 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
             BEGIN_INDEFINITE_MAP = struct.pack(
                 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
             ENCODED_LENGTH_1 = struct.Struct(r'>B')
             ENCODED_LENGTH_2 = struct.Struct(r'>BB')
             ENCODED_LENGTH_3 = struct.Struct(r'>BH')
             ENCODED_LENGTH_4 = struct.Struct(r'>BL')
             ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
             # The break ends an indefinite length item.
             BREAK = b'\xff'
             BREAK_INT = 255
             def encodelength(majortype, length):
                 """Obtain a value encoding the major type and its length."""
                 if length < 24:
                     return ENCODED_LENGTH_1.pack(majortype << 5 | length)
                 elif length < 256:
                     return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
                 elif length < 65536:
                     return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
                 elif length < 4294967296:
                     return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
                 else:
                     return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
             def streamencodebytestring(v):
                 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
                 yield v
             def streamencodebytestringfromiter(it):
                 """Convert an iterator of chunks to an indefinite bytestring.
                 Given an input that is iterable and each element in the iterator is
                 representable as bytes, emit an indefinite length bytestring.
                 """
                 yield BEGIN_INDEFINITE_BYTESTRING
                 for chunk in it:
                     yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
                     yield chunk
                 yield BREAK
             def streamencodeindefinitebytestring(source, chunksize=65536):
                 """Given a large source buffer, emit as an indefinite length bytestring.
                 This is a generator of chunks constituting the encoded CBOR data.
                 """
                 yield BEGIN_INDEFINITE_BYTESTRING
                 i = 0
                 l = len(source)
                 while True:
                     chunk = source[i:i + chunksize]
                     i += len(chunk)
                     yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
                     yield chunk
                     if i >= l:
                         break
                 yield BREAK
             def streamencodeint(v):
                 if v >= 18446744073709551616 or v < -18446744073709551616:
                     raise ValueError('big integers not supported')
                 if v >= 0:
                     yield encodelength(MAJOR_TYPE_UINT, v)
                 else:
                     yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
             def streamencodearray(l):
                 """Encode a known size iterable to an array."""
                 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
                 for i in l:
                     for chunk in streamencode(i):
                         yield chunk
             def streamencodearrayfromiter(it):
                 """Encode an iterator of items to an indefinite length array."""
                 yield BEGIN_INDEFINITE_ARRAY
                 for i in it:
                     for chunk in streamencode(i):
                         yield chunk
                 yield BREAK
             def _mixedtypesortkey(v):
                 return type(v).__name__, v
             def streamencodeset(s):
                 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
                 # semantic tag 258 for finite sets.
                 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
                 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
                     yield chunk
             def streamencodemap(d):
                 """Encode dictionary to a generator.
                 Does not supporting indefinite length dictionaries.
                 """
                 yield encodelength(MAJOR_TYPE_MAP, len(d))
                 for key, value in sorted(d.iteritems(),
                                          key=lambda x: _mixedtypesortkey(x[0])):
                     for chunk in streamencode(key):
                         yield chunk
                     for chunk in streamencode(value):
                         yield chunk
             def streamencodemapfromiter(it):
                 """Given an iterable of (key, value), encode to an indefinite length map."""
                 yield BEGIN_INDEFINITE_MAP
                 for key, value in it:
                     for chunk in streamencode(key):
                         yield chunk
                     for chunk in streamencode(value):
                         yield chunk
                 yield BREAK
             def streamencodebool(b):
                 # major type 7, simple value 20 and 21.
                 yield b'\xf5' if b else b'\xf4'
             def streamencodenone(v):
                 # major type 7, simple value 22.
                 yield b'\xf6'
             STREAM_ENCODERS = {
                 bytes: streamencodebytestring,
                 int: streamencodeint,
                 pycompat.long: streamencodeint,
                 list: streamencodearray,
                 tuple: streamencodearray,
                 dict: streamencodemap,
                 set: streamencodeset,
                 bool: streamencodebool,
                 type(None): streamencodenone,
             }
             def streamencode(v):
                 """Encode a value in a streaming manner.
                 Given an input object, encode it to CBOR recursively.
                 Returns a generator of CBOR encoded bytes. There is no guarantee
                 that each emitted chunk fully decodes to a value or sub-value.
                 Encoding is deterministic - unordered collections are sorted.
                 """
                 fn = STREAM_ENCODERS.get(v.__class__)
                 if not fn:
                     raise ValueError('do not know how to encode %s' % type(v))
                 return fn(v)
             class CBORDecodeError(Exception):
                 """Represents an error decoding CBOR."""
             if sys.version_info.major >= 3:
                 def _elementtointeger(b, i):
                     return b[i]
             else:
                 def _elementtointeger(b, i):
                     return ord(b[i])
             STRUCT_BIG_UBYTE = struct.Struct(r'>B')
             STRUCT_BIG_USHORT = struct.Struct('>H')
             STRUCT_BIG_ULONG = struct.Struct('>L')
             STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
             SPECIAL_NONE = 0
             SPECIAL_START_INDEFINITE_BYTESTRING = 1
             SPECIAL_START_ARRAY = 2
             SPECIAL_START_MAP = 3
             SPECIAL_START_SET = 4
             SPECIAL_INDEFINITE_BREAK = 5
             def decodeitem(b, offset=0):
                 """Decode a new CBOR value from a buffer at offset.
                 This function attempts to decode up to one complete CBOR value
                 from ``b`` starting at offset ``offset``.
                 The beginning of a collection (such as an array, map, set, or
                 indefinite length bytestring) counts as a single value. For these
                 special cases, a state flag will indicate that a special value was seen.
                 When called, the function either returns a decoded value or gives
                 a hint as to how many more bytes are needed to do so. By calling
                 the function repeatedly given a stream of bytes, the caller can
                 build up the original values.
                 Returns a tuple with the following elements:
                 * Bool indicating whether a complete value was decoded.
                 * A decoded value if first value is True otherwise None
                 * Integer number of bytes. If positive, the number of bytes
                   read. If negative, the number of bytes we need to read to
                   decode this value or the next chunk in this value.
                 * One of the ``SPECIAL_*`` constants indicating special treatment
                   for this value. ``SPECIAL_NONE`` means this is a fully decoded
                   simple value (such as an integer or bool).
                 """
                 initial = _elementtointeger(b, offset)
                 offset += 1
                 majortype = initial >> 5
                 subtype = initial & SUBTYPE_MASK
                 if majortype == MAJOR_TYPE_UINT:
                     complete, value, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, value, readcount + 1, SPECIAL_NONE
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_NEGINT:
                     # Negative integers are the same as UINT except inverted minus 1.
                     complete, value, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, -value - 1, readcount + 1, SPECIAL_NONE
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_BYTESTRING:
                     # Beginning of bytestrings are treated as uints in order to
                     # decode their length, which may be indefinite.
                     complete, size, readcount = decodeuint(subtype, b, offset,
                                                            allowindefinite=True)
                     # We don't know the size of the bytestring. It must be a definitive
                     # length since the indefinite subtype would be encoded in the initial
                     # byte.
                     if not complete:
                         return False, None, readcount, SPECIAL_NONE
                     # We know the length of the bytestring.
                     if size is not None:
                         # And the data is available in the buffer.
                         if offset + readcount + size <= len(b):
                             value = b[offset + readcount:offset + readcount + size]
                             return True, value, readcount + size + 1, SPECIAL_NONE
                         # And we need more data in order to return the bytestring.
                         else:
                             wanted = len(b) - offset - readcount - size
                             return False, None, wanted, SPECIAL_NONE
                     # It is an indefinite length bytestring.
                     else:
                         return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
                 elif majortype == MAJOR_TYPE_STRING:
                     raise CBORDecodeError('string major type not supported')
                 elif majortype == MAJOR_TYPE_ARRAY:
                     # Beginning of arrays are treated as uints in order to decode their
                     # length. We don't allow indefinite length arrays.
                     complete, size, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, size, readcount + 1, SPECIAL_START_ARRAY
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_MAP:
                     # Beginning of maps are treated as uints in order to decode their
                     # number of elements. We don't allow indefinite length arrays.
                     complete, size, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, size, readcount + 1, SPECIAL_START_MAP
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_SEMANTIC:
                     # Semantic tag value is read the same as a uint.
                     complete, tagvalue, readcount = decodeuint(subtype, b, offset)
                     if not complete:
                         return False, None, readcount, SPECIAL_NONE
                     # This behavior here is a little wonky. The main type being "decorated"
                     # by this semantic tag follows. A more robust parser would probably emit
                     # a special flag indicating this as a semantic tag and let the caller
                     # deal with the types that follow. But since we don't support many
                     # semantic tags, it is easier to deal with the special cases here and
                     # hide complexity from the caller. If we add support for more semantic
                     # tags, we should probably move semantic tag handling into the caller.
                     if tagvalue == SEMANTIC_TAG_FINITE_SET:
                         if offset + readcount >= len(b):
                             return False, None, -1, SPECIAL_NONE
                         complete, size, readcount2, special = decodeitem(b,
                                                                          offset + readcount)
                         if not complete:
                             return False, None, readcount2, SPECIAL_NONE
                         if special != SPECIAL_START_ARRAY:
                             raise CBORDecodeError('expected array after finite set '
                                                   'semantic tag')
                         return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
                     else:
                         raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
                 elif majortype == MAJOR_TYPE_SPECIAL:
                     # Only specific values for the information field are allowed.
                     if subtype == SUBTYPE_FALSE:
                         return True, False, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_TRUE:
                         return True, True, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_NULL:
                         return True, None, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_INDEFINITE:
                         return True, None, 1, SPECIAL_INDEFINITE_BREAK
                     # If value is 24, subtype is in next byte.
                     else:
                         raise CBORDecodeError('special type %d not allowed' % subtype)
                 else:
                     assert False
             def decodeuint(subtype, b, offset=0, allowindefinite=False):
                 """Decode an unsigned integer.
                 ``subtype`` is the lower 5 bits from the initial byte CBOR item
                 "header." ``b`` is a buffer containing bytes. ``offset`` points to
                 the index of the first byte after the byte that ``subtype`` was
                 derived from.
                 ``allowindefinite`` allows the special indefinite length value
                 indicator.
                 Returns a 3-tuple of (successful, value, count).
                 The first element is a bool indicating if decoding completed. The 2nd
                 is the decoded integer value or None if not fully decoded or the subtype
                 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
                 If positive, it is the number of additional bytes decoded. If negative,
                 it is the number of additional bytes needed to decode this value.
                 """
                 # Small values are inline.
                 if subtype < 24:
                     return True, subtype, 0
                 # Indefinite length specifier.
                 elif subtype == 31:
                     if allowindefinite:
                         return True, None, 0
                     else:
                         raise CBORDecodeError('indefinite length uint not allowed here')
                 elif subtype >= 28:
                     raise CBORDecodeError('unsupported subtype on integer type: %d' %
                                           subtype)
                 if subtype == 24:
                     s = STRUCT_BIG_UBYTE
                 elif subtype == 25:
                     s = STRUCT_BIG_USHORT
                 elif subtype == 26:
                     s = STRUCT_BIG_ULONG
                 elif subtype == 27:
                     s = STRUCT_BIG_ULONGLONG
                 else:
                     raise CBORDecodeError('bounds condition checking violation')
                 if len(b) - offset >= s.size:
                     return True, s.unpack_from(b, offset)[0], s.size
                 else:
                     return False, None, len(b) - offset - s.size
             class bytestringchunk(bytes):
                 """Represents a chunk/segment in an indefinite length bytestring.
                 This behaves like a ``bytes`` but in addition has the ``isfirst``
                 and ``islast`` attributes indicating whether this chunk is the first
                 or last in an indefinite length bytestring.
                 """
                 def __new__(cls, v, first=False, last=False):
                     self = bytes.__new__(cls, v)
                     self.isfirst = first
                     self.islast = last
                     return self
             class sansiodecoder(object):
                 """A CBOR decoder that doesn't perform its own I/O.
                 To use, construct an instance and feed it segments containing
                 CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
                 indicates whether a fully-decoded value is available, how many bytes
                 were consumed, and offers a hint as to how many bytes should be fed
                 in next time to decode the next value.
                 The decoder assumes it will decode N discrete CBOR values, not just
                 a single value. i.e. if the bytestream contains uints packed one after
                 the other, the decoder will decode them all, rather than just the initial
                 one.
                 When ``decode()`` indicates a value is available, call ``getavailable()``
                 to return all fully decoded values.
                 ``decode()`` can partially decode input. It is up to the caller to keep
                 track of what data was consumed and to pass unconsumed data in on the
                 next invocation.
                 The decoder decodes atomically at the *item* level. See ``decodeitem()``.
                 If an *item* cannot be fully decoded, the decoder won't record it as
                 partially consumed. Instead, the caller will be instructed to pass in
                 the initial bytes of this item on the next invocation. This does result
                 in some redundant parsing. But the overhead should be minimal.
                 This decoder only supports a subset of CBOR as required by Mercurial.
                 It lacks support for:
                 * Indefinite length arrays
                 * Indefinite length maps
                 * Use of indefinite length bytestrings as keys or values within
                   arrays, maps, or sets.
                 * Nested arrays, maps, or sets within sets
                 * Any semantic tag that isn't a mathematical finite set
                 * Floating point numbers
                 * Undefined special value
                 CBOR types are decoded to Python types as follows:
                 uint -> int
                 negint -> int
                 bytestring -> bytes
                 map -> dict
                 array -> list
                 True -> bool
                 False -> bool
                 null -> None
                 indefinite length bytestring chunk -> [bytestringchunk]
                 The only non-obvious mapping here is an indefinite length bytestring
                 to the ``bytestringchunk`` type. This is to facilitate streaming
                 indefinite length bytestrings out of the decoder and to differentiate
                 a regular bytestring from an indefinite length bytestring.
                 """
                 _STATE_NONE = 0
                 _STATE_WANT_MAP_KEY = 1
                 _STATE_WANT_MAP_VALUE = 2
                 _STATE_WANT_ARRAY_VALUE = 3
                 _STATE_WANT_SET_VALUE = 4
                 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
                 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
                 def __init__(self):
                     # TODO add support for limiting size of bytestrings
                     # TODO add support for limiting number of keys / values in collections
                     # TODO add support for limiting size of buffered partial values
                     self.decodedbytecount = 0
                     self._state = self._STATE_NONE
                     # Stack of active nested collections. Each entry is a dict describing
                     # the collection.
                     self._collectionstack = []
                     # Fully decoded key to use for the current map.
                     self._currentmapkey = None
                     # Fully decoded values available for retrieval.
                     self._decodedvalues = []
                 @property
                 def inprogress(self):
                     """Whether the decoder has partially decoded a value."""
                     return self._state != self._STATE_NONE
                 def decode(self, b, offset=0):
                     """Attempt to decode bytes from an input buffer.
                     ``b`` is a collection of bytes and ``offset`` is the byte
                     offset within that buffer from which to begin reading data.
                     ``b`` must support ``len()`` and accessing bytes slices via
                     ``__slice__``. Typically ``bytes`` instances are used.
                     Returns a tuple with the following fields:
                     * Bool indicating whether values are available for retrieval.
                     * Integer indicating the number of bytes that were fully consumed,
                       starting from ``offset``.
                     * Integer indicating the number of bytes that are desired for the
                       next call in order to decode an item.
                     """
                     if not b:
                         return bool(self._decodedvalues), 0, 0
                     initialoffset = offset
                     # We could easily split the body of this loop into a function. But
                     # Python performance is sensitive to function calls and collections
                     # are composed of many items. So leaving as a while loop could help
                     # with performance. One thing that may not help is the use of
                     # if..elif versus a lookup/dispatch table. There may be value
                     # in switching that.
                     while offset < len(b):
                         # Attempt to decode an item. This could be a whole value or a
                         # special value indicating an event, such as start or end of a
                         # collection or indefinite length type.
                         complete, value, readcount, special = decodeitem(b, offset)
                         if readcount > 0:
                             self.decodedbytecount += readcount
                         if not complete:
                             assert readcount < 0
                             return (
                                 bool(self._decodedvalues),
                                 offset - initialoffset,
                                 -readcount,
                             )
                         offset += readcount
                         # No nested state. We either have a full value or beginning of a
                         # complex value to deal with.
                         if self._state == self._STATE_NONE:
                             # A normal value.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(value)
                             elif special == SPECIAL_START_ARRAY:
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': [],
                                 })
                                 self._state = self._STATE_WANT_ARRAY_VALUE
                             elif special == SPECIAL_START_MAP:
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': {},
                                 })
                                 self._state = self._STATE_WANT_MAP_KEY
                             elif special == SPECIAL_START_SET:
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': set(),
                                 })
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
                             else:
                                 raise CBORDecodeError('unhandled special state: %d' %
                                                       special)
                         # This value becomes an element of the current array.
                         elif self._state == self._STATE_WANT_ARRAY_VALUE:
                             # Simple values get appended.
                             if special == SPECIAL_NONE:
                                 c = self._collectionstack[-1]
                                 c['v'].append(value)
                                 c['remaining'] -= 1
                                 # self._state doesn't need changed.
                             # An array nested within an array.
                             elif special == SPECIAL_START_ARRAY:
                                 lastc = self._collectionstack[-1]
                                 newvalue = []
                                 lastc['v'].append(newvalue)
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue,
                                 })
                                 # self._state doesn't need changed.
                             # A map nested within an array.
                             elif special == SPECIAL_START_MAP:
                                 lastc = self._collectionstack[-1]
                                 newvalue = {}
                                 lastc['v'].append(newvalue)
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue
                                 })
                                 self._state = self._STATE_WANT_MAP_KEY
                             elif special == SPECIAL_START_SET:
                                 lastc = self._collectionstack[-1]
                                 newvalue = set()
                                 lastc['v'].append(newvalue)
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue,
                                 })
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError('indefinite length bytestrings '
                                                       'not allowed as array values')
                             else:
                                 raise CBORDecodeError('unhandled special item when '
                                                       'expecting array value: %d' % special)
                         # This value becomes the key of the current map instance.
                         elif self._state == self._STATE_WANT_MAP_KEY:
                             if special == SPECIAL_NONE:
                                 self._currentmapkey = value
                                 self._state = self._STATE_WANT_MAP_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError('indefinite length bytestrings '
                                                       'not allowed as map keys')
                             elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
                                              SPECIAL_START_SET):
                                 raise CBORDecodeError('collections not supported as map '
                                                       'keys')
                             # We do not allow special values to be used as map keys.
                             else:
                                 raise CBORDecodeError('unhandled special item when '
                                                       'expecting map key: %d' % special)
                         # This value becomes the value of the current map key.
                         elif self._state == self._STATE_WANT_MAP_VALUE:
                             # Simple values simply get inserted into the map.
                             if special == SPECIAL_NONE:
                                 lastc = self._collectionstack[-1]
                                 lastc['v'][self._currentmapkey] = value
                                 lastc['remaining'] -= 1
                                 self._state = self._STATE_WANT_MAP_KEY
                             # A new array is used as the map value.
                             elif special == SPECIAL_START_ARRAY:
                                 lastc = self._collectionstack[-1]
                                 newvalue = []
                                 lastc['v'][self._currentmapkey] = newvalue
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue,
                                 })
                                 self._state = self._STATE_WANT_ARRAY_VALUE
                             # A new map is used as the map value.
                             elif special == SPECIAL_START_MAP:
                                 lastc = self._collectionstack[-1]
                                 newvalue = {}
                                 lastc['v'][self._currentmapkey] = newvalue
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue,
                                 })
                                 self._state = self._STATE_WANT_MAP_KEY
                             # A new set is used as the map value.
                             elif special == SPECIAL_START_SET:
                                 lastc = self._collectionstack[-1]
                                 newvalue = set()
                                 lastc['v'][self._currentmapkey] = newvalue
                                 lastc['remaining'] -= 1
                                 self._collectionstack.append({
                                     'remaining': value,
                                     'v': newvalue,
                                 })
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError('indefinite length bytestrings not '
                                                       'allowed as map values')
                             else:
                                 raise CBORDecodeError('unhandled special item when '
                                                       'expecting map value: %d' % special)
                             self._currentmapkey = None
                         # This value is added to the current set.
                         elif self._state == self._STATE_WANT_SET_VALUE:
                             if special == SPECIAL_NONE:
                                 lastc = self._collectionstack[-1]
                                 lastc['v'].add(value)
                                 lastc['remaining'] -= 1
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError('indefinite length bytestrings not '
                                                       'allowed as set values')
                             elif special in (SPECIAL_START_ARRAY,
                                              SPECIAL_START_MAP,
                                              SPECIAL_START_SET):
                                 raise CBORDecodeError('collections not allowed as set '
                                                       'values')
                             # We don't allow non-trivial types to exist as set values.
                             else:
                                 raise CBORDecodeError('unhandled special item when '
                                                       'expecting set value: %d' % special)
                         # This value represents the first chunk in an indefinite length
                         # bytestring.
                         elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
                             # We received a full chunk.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(bytestringchunk(value,
                                                                            first=True))
                                 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
                             # The end of stream marker. This means it is an empty
                             # indefinite length bytestring.
                             elif special == SPECIAL_INDEFINITE_BREAK:
                                 # We /could/ convert this to a b''. But we want to preserve
                                 # the nature of the underlying data so consumers expecting
                                 # an indefinite length bytestring get one.
                                 self._decodedvalues.append(bytestringchunk(b'',
                                                                            first=True,
                                                                            last=True))
                                 # Since indefinite length bytestrings can't be used in
                                 # collections, we must be at the root level.
                                 assert not self._collectionstack
                                 self._state = self._STATE_NONE
                             else:
                                 raise CBORDecodeError('unexpected special value when '
                                                       'expecting bytestring chunk: %d' %
                                                       special)
                         # This value represents the non-initial chunk in an indefinite
                         # length bytestring.
                         elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
                             # We received a full chunk.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(bytestringchunk(value))
                             # The end of stream marker.
                             elif special == SPECIAL_INDEFINITE_BREAK:
                                 self._decodedvalues.append(bytestringchunk(b'', last=True))
                                 # Since indefinite length bytestrings can't be used in
                                 # collections, we must be at the root level.
                                 assert not self._collectionstack
                                 self._state = self._STATE_NONE
                             else:
                                 raise CBORDecodeError('unexpected special value when '
                                                       'expecting bytestring chunk: %d' %
                                                       special)
                         else:
                             raise CBORDecodeError('unhandled decoder state: %d' %
                                                   self._state)
                         # We could have just added the final value in a collection. End
                         # all complete collections at the top of the stack.
                         while True:
                             # Bail if we're not waiting on a new collection item.
                             if self._state not in (self._STATE_WANT_ARRAY_VALUE,
                                                    self._STATE_WANT_MAP_KEY,
                                                    self._STATE_WANT_SET_VALUE):
                                 break
                             # Or we are expecting more items for this collection.
                             lastc = self._collectionstack[-1]
                             if lastc['remaining']:
                                 break
                             # The collection at the top of the stack is complete.
                             # Discard it, as it isn't needed for future items.
                             self._collectionstack.pop()
                             # If this is a nested collection, we don't emit it, since it
                             # will be emitted by its parent collection. But we do need to
                             # update state to reflect what the new top-most collection
                             # on the stack is.
                             if self._collectionstack:
                                 self._state = {
                                     list: self._STATE_WANT_ARRAY_VALUE,
                                     dict: self._STATE_WANT_MAP_KEY,
                                     set: self._STATE_WANT_SET_VALUE,
                                 }[type(self._collectionstack[-1]['v'])]
                             # If this is the root collection, emit it.
                             else:
                                 self._decodedvalues.append(lastc['v'])
                                 self._state = self._STATE_NONE
                     return (
                         bool(self._decodedvalues),
                         offset - initialoffset,
 ,
                     )
                 def getavailable(self):
                     """Returns an iterator over fully decoded values.
                     Once values are retrieved, they won't be available on the next call.
                     """
                     l = list(self._decodedvalues)
                     self._decodedvalues = []
                     return l
             class bufferingdecoder(object):
                 """A CBOR decoder that buffers undecoded input.
                 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
                 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
                 and concatenated with any new input that arrives later.
                 TODO consider adding limits as to the maximum amount of data that can
                 be buffered.
                 """
                 def __init__(self):
                     self._decoder = sansiodecoder()
-                    self._leftover = None
+                    self._chunks = []
+                    self._wanted = 0
                 def decode(self, b):
                     """Attempt to decode bytes to CBOR values.
                     Returns a tuple with the following fields:
                     * Bool indicating whether new values are available for retrieval.
                     * Integer number of bytes decoded from the new input.
                     * Integer number of bytes wanted to decode the next value.
                     """
+                    # Our strategy for buffering is to aggregate the incoming chunks in a
+                    # list until we've received enough data to decode the next item.
+                    # This is slightly more complicated than using an ``io.BytesIO``
+                    # or continuously concatenating incoming data. However, because it
+                    # isn't constantly reallocating backing memory for a growing buffer,
+                    # it prevents excessive memory thrashing and is significantly faster,
+                    # especially in cases where the percentage of input chunks that don't
+                    # decode into a full item is high.
-                    if self._leftover:
+                    if self._chunks:
-                        oldlen = len(self._leftover)
+                        # A previous call said we needed N bytes to decode the next item.
-                        b = self._leftover + b
+                        # But this call doesn't provide enough data. We buffer the incoming
-                        self._leftover = None
+                        # chunk without attempting to decode.
+                        if len(b) < self._wanted:
+                            self._chunks.append(b)
+                            self._wanted -= len(b)
+                            return False, 0, self._wanted
+                        # Else we may have enough data to decode the next item. Aggregate
+                        # old data with new and reset the buffer.
+                        newlen = len(b)
+                        self._chunks.append(b)
+                        b = b''.join(self._chunks)
+                        self._chunks = []
+                        oldlen = len(b) - newlen
                     else:
-                        b = b
                         oldlen = 0
                     available, readcount, wanted = self._decoder.decode(b)
+                    self._wanted = wanted
                     if readcount < len(b):
-                        self._leftover = b[readcount:]
+                        self._chunks.append(b[readcount:])
                     return available, readcount - oldlen, wanted
                 def getavailable(self):
                     return self._decoder.getavailable()
             def decodeall(b):
                 """Decode all CBOR items present in an iterable of bytes.
                 In addition to regular decode errors, raises CBORDecodeError if the
                 entirety of the passed buffer does not fully decode to complete CBOR
                 values. This includes failure to decode any value, incomplete collection
                 types, incomplete indefinite length items, and extra data at the end of
                 the buffer.
                 """
                 if not b:
                     return []
                 decoder = sansiodecoder()
                 havevalues, readcount, wantbytes = decoder.decode(b)
                 if readcount != len(b):
                     raise CBORDecodeError('input data not fully consumed')
                 if decoder.inprogress:
                     raise CBORDecodeError('input data not complete')
                 return decoder.getavailable()