upstream/mercurial-mirror Commit - r49797:bce8f66d

1

# cborutil.py - CBOR extensions

1

# cborutil.py - CBOR extensions

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import struct

9

import struct

10

import sys

11

10

12

11

13

# Very short very of RFC 7049...

12

# Very short very of RFC 7049...

14

#

13

#

15

# Each item begins with a byte. The 3 high bits of that byte denote the

14

# Each item begins with a byte. The 3 high bits of that byte denote the

16

# "major type." The lower 5 bits denote the "subtype." Each major type

15

# "major type." The lower 5 bits denote the "subtype." Each major type

17

# has its own encoding mechanism.

16

# has its own encoding mechanism.

18

#

17

#

19

# Most types have lengths. However, bytestring, string, array, and map

18

# Most types have lengths. However, bytestring, string, array, and map

20

# can be indefinite length. These are denotes by a subtype with value 31.

19

# can be indefinite length. These are denotes by a subtype with value 31.

21

# Sub-components of those types then come afterwards and are terminated

20

# Sub-components of those types then come afterwards and are terminated

22

# by a "break" byte.

21

# by a "break" byte.

23

22

24

MAJOR_TYPE_UINT = 0

23

MAJOR_TYPE_UINT = 0

25

MAJOR_TYPE_NEGINT = 1

24

MAJOR_TYPE_NEGINT = 1

26

MAJOR_TYPE_BYTESTRING = 2

25

MAJOR_TYPE_BYTESTRING = 2

27

MAJOR_TYPE_STRING = 3

26

MAJOR_TYPE_STRING = 3

28

MAJOR_TYPE_ARRAY = 4

27

MAJOR_TYPE_ARRAY = 4

29

MAJOR_TYPE_MAP = 5

28

MAJOR_TYPE_MAP = 5

30

MAJOR_TYPE_SEMANTIC = 6

29

MAJOR_TYPE_SEMANTIC = 6

31

MAJOR_TYPE_SPECIAL = 7

30

MAJOR_TYPE_SPECIAL = 7

32

31

33

SUBTYPE_MASK = 0b00011111

32

SUBTYPE_MASK = 0b00011111

34

33

35

SUBTYPE_FALSE = 20

34

SUBTYPE_FALSE = 20

36

SUBTYPE_TRUE = 21

35

SUBTYPE_TRUE = 21

37

SUBTYPE_NULL = 22

36

SUBTYPE_NULL = 22

38

SUBTYPE_HALF_FLOAT = 25

37

SUBTYPE_HALF_FLOAT = 25

39

SUBTYPE_SINGLE_FLOAT = 26

38

SUBTYPE_SINGLE_FLOAT = 26

40

SUBTYPE_DOUBLE_FLOAT = 27

39

SUBTYPE_DOUBLE_FLOAT = 27

41

SUBTYPE_INDEFINITE = 31

40

SUBTYPE_INDEFINITE = 31

42

41

43

SEMANTIC_TAG_FINITE_SET = 258

42

SEMANTIC_TAG_FINITE_SET = 258

44

43

45

# Indefinite types begin with their major type ORd with information value 31.

44

# Indefinite types begin with their major type ORd with information value 31.

46

BEGIN_INDEFINITE_BYTESTRING = struct.pack(

45

BEGIN_INDEFINITE_BYTESTRING = struct.pack(

47

'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE

46

'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE

48

)

47

)

49

BEGIN_INDEFINITE_ARRAY = struct.pack(

48

BEGIN_INDEFINITE_ARRAY = struct.pack(

50

'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE

49

'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE

51

)

50

)

52

BEGIN_INDEFINITE_MAP = struct.pack(

51

BEGIN_INDEFINITE_MAP = struct.pack(

53

'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE

52

'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE

54

)

53

)

55

54

56

ENCODED_LENGTH_1 = struct.Struct('>B')

55

ENCODED_LENGTH_1 = struct.Struct('>B')

57

ENCODED_LENGTH_2 = struct.Struct('>BB')

56

ENCODED_LENGTH_2 = struct.Struct('>BB')

58

ENCODED_LENGTH_3 = struct.Struct('>BH')

57

ENCODED_LENGTH_3 = struct.Struct('>BH')

59

ENCODED_LENGTH_4 = struct.Struct('>BL')

58

ENCODED_LENGTH_4 = struct.Struct('>BL')

60

ENCODED_LENGTH_5 = struct.Struct('>BQ')

59

ENCODED_LENGTH_5 = struct.Struct('>BQ')

61

60

62

# The break ends an indefinite length item.

61

# The break ends an indefinite length item.

63

BREAK = b'\xff'

62

BREAK = b'\xff'

64

BREAK_INT = 255

63

BREAK_INT = 255

65

64

66

65

67

def encodelength(majortype, length):

66

def encodelength(majortype, length):

68

"""Obtain a value encoding the major type and its length."""

67

"""Obtain a value encoding the major type and its length."""

69

if length < 24:

68

if length < 24:

70

return ENCODED_LENGTH_1.pack(majortype << 5 | length)

69

return ENCODED_LENGTH_1.pack(majortype << 5 | length)

71

elif length < 256:

70

elif length < 256:

72

return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)

71

return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)

73

elif length < 65536:

72

elif length < 65536:

74

return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)

73

return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)

75

elif length < 4294967296:

74

elif length < 4294967296:

76

return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)

75

return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)

77

else:

76

else:

78

return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)

77

return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)

79

78

80

79

81

def streamencodebytestring(v):

80

def streamencodebytestring(v):

82

yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))

81

yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))

83

yield v

82

yield v

84

83

85

84

86

def streamencodebytestringfromiter(it):

85

def streamencodebytestringfromiter(it):

87

"""Convert an iterator of chunks to an indefinite bytestring.

86

"""Convert an iterator of chunks to an indefinite bytestring.

88

87

89

Given an input that is iterable and each element in the iterator is

88

Given an input that is iterable and each element in the iterator is

90

representable as bytes, emit an indefinite length bytestring.

89

representable as bytes, emit an indefinite length bytestring.

91

"""

90

"""

92

yield BEGIN_INDEFINITE_BYTESTRING

91

yield BEGIN_INDEFINITE_BYTESTRING

93

92

94

for chunk in it:

93

for chunk in it:

95

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

94

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

96

yield chunk

95

yield chunk

97

96

98

yield BREAK

97

yield BREAK

99

98

100

99

101

def streamencodeindefinitebytestring(source, chunksize=65536):

100

def streamencodeindefinitebytestring(source, chunksize=65536):

102

"""Given a large source buffer, emit as an indefinite length bytestring.

101

"""Given a large source buffer, emit as an indefinite length bytestring.

103

102

104

This is a generator of chunks constituting the encoded CBOR data.

103

This is a generator of chunks constituting the encoded CBOR data.

105

"""

104

"""

106

yield BEGIN_INDEFINITE_BYTESTRING

105

yield BEGIN_INDEFINITE_BYTESTRING

107

106

108

i = 0

107

i = 0

109

l = len(source)

108

l = len(source)

110

109

111

while True:

110

while True:

112

chunk = source[i : i + chunksize]

111

chunk = source[i : i + chunksize]

113

i += len(chunk)

112

i += len(chunk)

114

113

115

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

114

yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))

116

yield chunk

115

yield chunk

117

116

118

if i >= l:

117

if i >= l:

119

break

118

break

120

119

121

yield BREAK

120

yield BREAK

122

121

123

122

124

def streamencodeint(v):

123

def streamencodeint(v):

125

if v >= 18446744073709551616 or v < -18446744073709551616:

124

if v >= 18446744073709551616 or v < -18446744073709551616:

126

raise ValueError(b'big integers not supported')

125

raise ValueError(b'big integers not supported')

127

126

128

if v >= 0:

127

if v >= 0:

129

yield encodelength(MAJOR_TYPE_UINT, v)

128

yield encodelength(MAJOR_TYPE_UINT, v)

130

else:

129

else:

131

yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)

130

yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)

132

131

133

132

134

def streamencodearray(l):

133

def streamencodearray(l):

135

"""Encode a known size iterable to an array."""

134

"""Encode a known size iterable to an array."""

136

135

137

yield encodelength(MAJOR_TYPE_ARRAY, len(l))

136

yield encodelength(MAJOR_TYPE_ARRAY, len(l))

138

137

139

for i in l:

138

for i in l:

140

for chunk in streamencode(i):

139

for chunk in streamencode(i):

141

yield chunk

140

yield chunk

142

141

143

142

144

def streamencodearrayfromiter(it):

143

def streamencodearrayfromiter(it):

145

"""Encode an iterator of items to an indefinite length array."""

144

"""Encode an iterator of items to an indefinite length array."""

146

145

147

yield BEGIN_INDEFINITE_ARRAY

146

yield BEGIN_INDEFINITE_ARRAY

148

147

149

for i in it:

148

for i in it:

150

for chunk in streamencode(i):

149

for chunk in streamencode(i):

151

yield chunk

150

yield chunk

152

151

153

yield BREAK

152

yield BREAK

154

153

155

154

156

def _mixedtypesortkey(v):

155

def _mixedtypesortkey(v):

157

return type(v).__name__, v

156

return type(v).__name__, v

158

157

159

158

160

def streamencodeset(s):

159

def streamencodeset(s):

161

# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines

160

# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines

162

# semantic tag 258 for finite sets.

161

# semantic tag 258 for finite sets.

163

yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)

162

yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)

164

163

165

for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):

164

for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):

166

yield chunk

165

yield chunk

167

166

168

167

169

def streamencodemap(d):

168

def streamencodemap(d):

170

"""Encode dictionary to a generator.

169

"""Encode dictionary to a generator.

171

170

172

Does not supporting indefinite length dictionaries.

171

Does not supporting indefinite length dictionaries.

173

"""

172

"""

174

yield encodelength(MAJOR_TYPE_MAP, len(d))

173

yield encodelength(MAJOR_TYPE_MAP, len(d))

175

174

176

for key, value in sorted(d.items(), key=lambda x: _mixedtypesortkey(x[0])):

175

for key, value in sorted(d.items(), key=lambda x: _mixedtypesortkey(x[0])):

177

for chunk in streamencode(key):

176

for chunk in streamencode(key):

178

yield chunk

177

yield chunk

179

for chunk in streamencode(value):

178

for chunk in streamencode(value):

180

yield chunk

179

yield chunk

181

180

182

181

183

def streamencodemapfromiter(it):

182

def streamencodemapfromiter(it):

184

"""Given an iterable of (key, value), encode to an indefinite length map."""

183

"""Given an iterable of (key, value), encode to an indefinite length map."""

185

yield BEGIN_INDEFINITE_MAP

184

yield BEGIN_INDEFINITE_MAP

186

185

187

for key, value in it:

186

for key, value in it:

188

for chunk in streamencode(key):

187

for chunk in streamencode(key):

189

yield chunk

188

yield chunk

190

for chunk in streamencode(value):

189

for chunk in streamencode(value):

191

yield chunk

190

yield chunk

192

191

193

yield BREAK

192

yield BREAK

194

193

195

194

196

def streamencodebool(b):

195

def streamencodebool(b):

197

# major type 7, simple value 20 and 21.

196

# major type 7, simple value 20 and 21.

198

yield b'\xf5' if b else b'\xf4'

197

yield b'\xf5' if b else b'\xf4'

199

198

200

199

201

def streamencodenone(v):

200

def streamencodenone(v):

202

# major type 7, simple value 22.

201

# major type 7, simple value 22.

203

yield b'\xf6'

202

yield b'\xf6'

204

203

205

204

206

STREAM_ENCODERS = {

205

STREAM_ENCODERS = {

207

bytes: streamencodebytestring,

206

bytes: streamencodebytestring,

208

int: streamencodeint,

207

int: streamencodeint,

209

int: streamencodeint,

208

int: streamencodeint,

210

list: streamencodearray,

209

list: streamencodearray,

211

tuple: streamencodearray,

210

tuple: streamencodearray,

212

dict: streamencodemap,

211

dict: streamencodemap,

213

set: streamencodeset,

212

set: streamencodeset,

214

bool: streamencodebool,

213

bool: streamencodebool,

215

type(None): streamencodenone,

214

type(None): streamencodenone,

216

}

215

}

217

216

218

217

219

def streamencode(v):

218

def streamencode(v):

220

"""Encode a value in a streaming manner.

219

"""Encode a value in a streaming manner.

221

220

222

Given an input object, encode it to CBOR recursively.

221

Given an input object, encode it to CBOR recursively.

223

222

224

Returns a generator of CBOR encoded bytes. There is no guarantee

223

Returns a generator of CBOR encoded bytes. There is no guarantee

225

that each emitted chunk fully decodes to a value or sub-value.

224

that each emitted chunk fully decodes to a value or sub-value.

226

225

227

Encoding is deterministic - unordered collections are sorted.

226

Encoding is deterministic - unordered collections are sorted.

228

"""

227

"""

229

fn = STREAM_ENCODERS.get(v.__class__)

228

fn = STREAM_ENCODERS.get(v.__class__)

230

229

231

if not fn:

230

if not fn:

232

# handle subtypes such as encoding.localstr and util.sortdict

231

# handle subtypes such as encoding.localstr and util.sortdict

233

for ty in STREAM_ENCODERS:

232

for ty in STREAM_ENCODERS:

234

if not isinstance(v, ty):

233

if not isinstance(v, ty):

235

continue

234

continue

236

fn = STREAM_ENCODERS[ty]

235

fn = STREAM_ENCODERS[ty]

237

break

236

break

238

237

239

if not fn:

238

if not fn:

240

raise ValueError(b'do not know how to encode %s' % type(v))

239

raise ValueError(b'do not know how to encode %s' % type(v))

241

240

242

return fn(v)

241

return fn(v)

243

242

244

243

245

class CBORDecodeError(Exception):

244

class CBORDecodeError(Exception):

246

"""Represents an error decoding CBOR."""

245

"""Represents an error decoding CBOR."""

247

246

248

247

249

if sys.version_info.major >= 3:

248

def _elementtointeger(b, i):

250

249

return b[i]

251

def _elementtointeger(b, i):

252

return b[i]

253

254

255

else:

256

257

def _elementtointeger(b, i):

258

return ord(b[i])

259

250

260

251

261

STRUCT_BIG_UBYTE = struct.Struct('>B')

252

STRUCT_BIG_UBYTE = struct.Struct('>B')

262

STRUCT_BIG_USHORT = struct.Struct(b'>H')

253

STRUCT_BIG_USHORT = struct.Struct(b'>H')

263

STRUCT_BIG_ULONG = struct.Struct(b'>L')

254

STRUCT_BIG_ULONG = struct.Struct(b'>L')

264

STRUCT_BIG_ULONGLONG = struct.Struct(b'>Q')

255

STRUCT_BIG_ULONGLONG = struct.Struct(b'>Q')

265

256

266

SPECIAL_NONE = 0

257

SPECIAL_NONE = 0

267

SPECIAL_START_INDEFINITE_BYTESTRING = 1

258

SPECIAL_START_INDEFINITE_BYTESTRING = 1

268

SPECIAL_START_ARRAY = 2

259

SPECIAL_START_ARRAY = 2

269

SPECIAL_START_MAP = 3

260

SPECIAL_START_MAP = 3

270

SPECIAL_START_SET = 4

261

SPECIAL_START_SET = 4

271

SPECIAL_INDEFINITE_BREAK = 5

262

SPECIAL_INDEFINITE_BREAK = 5

272

263

273

264

274

def decodeitem(b, offset=0):

265

def decodeitem(b, offset=0):

275

"""Decode a new CBOR value from a buffer at offset.

266

"""Decode a new CBOR value from a buffer at offset.

276

267

277

This function attempts to decode up to one complete CBOR value

268

This function attempts to decode up to one complete CBOR value

278

from ``b`` starting at offset ``offset``.

269

from ``b`` starting at offset ``offset``.

279

270

280

The beginning of a collection (such as an array, map, set, or

271

The beginning of a collection (such as an array, map, set, or

281

indefinite length bytestring) counts as a single value. For these

272

indefinite length bytestring) counts as a single value. For these

282

special cases, a state flag will indicate that a special value was seen.

273

special cases, a state flag will indicate that a special value was seen.

283

274

284

When called, the function either returns a decoded value or gives

275

When called, the function either returns a decoded value or gives

285

a hint as to how many more bytes are needed to do so. By calling

276

a hint as to how many more bytes are needed to do so. By calling

286

the function repeatedly given a stream of bytes, the caller can

277

the function repeatedly given a stream of bytes, the caller can

287

build up the original values.

278

build up the original values.

288

279

289

Returns a tuple with the following elements:

280

Returns a tuple with the following elements:

290

281

291

* Bool indicating whether a complete value was decoded.

282

* Bool indicating whether a complete value was decoded.

292

* A decoded value if first value is True otherwise None

283

* A decoded value if first value is True otherwise None

293

* Integer number of bytes. If positive, the number of bytes

284

* Integer number of bytes. If positive, the number of bytes

294

read. If negative, the number of bytes we need to read to

285

read. If negative, the number of bytes we need to read to

295

decode this value or the next chunk in this value.

286

decode this value or the next chunk in this value.

296

* One of the ``SPECIAL_*`` constants indicating special treatment

287

* One of the ``SPECIAL_*`` constants indicating special treatment

297

for this value. ``SPECIAL_NONE`` means this is a fully decoded

288

for this value. ``SPECIAL_NONE`` means this is a fully decoded

298

simple value (such as an integer or bool).

289

simple value (such as an integer or bool).

299

"""

290

"""

300

291

301

initial = _elementtointeger(b, offset)

292

initial = _elementtointeger(b, offset)

302

offset += 1

293

offset += 1

303

294

304

majortype = initial >> 5

295

majortype = initial >> 5

305

subtype = initial & SUBTYPE_MASK

296

subtype = initial & SUBTYPE_MASK

306

297

307

if majortype == MAJOR_TYPE_UINT:

298

if majortype == MAJOR_TYPE_UINT:

308

complete, value, readcount = decodeuint(subtype, b, offset)

299

complete, value, readcount = decodeuint(subtype, b, offset)

309

300

310

if complete:

301

if complete:

311

return True, value, readcount + 1, SPECIAL_NONE

302

return True, value, readcount + 1, SPECIAL_NONE

312

else:

303

else:

313

return False, None, readcount, SPECIAL_NONE

304

return False, None, readcount, SPECIAL_NONE

314

305

315

elif majortype == MAJOR_TYPE_NEGINT:

306

elif majortype == MAJOR_TYPE_NEGINT:

316

# Negative integers are the same as UINT except inverted minus 1.

307

# Negative integers are the same as UINT except inverted minus 1.

317

complete, value, readcount = decodeuint(subtype, b, offset)

308

complete, value, readcount = decodeuint(subtype, b, offset)

318

309

319

if complete:

310

if complete:

320

return True, -value - 1, readcount + 1, SPECIAL_NONE

311

return True, -value - 1, readcount + 1, SPECIAL_NONE

321

else:

312

else:

322

return False, None, readcount, SPECIAL_NONE

313

return False, None, readcount, SPECIAL_NONE

323

314

324

elif majortype == MAJOR_TYPE_BYTESTRING:

315

elif majortype == MAJOR_TYPE_BYTESTRING:

325

# Beginning of bytestrings are treated as uints in order to

316

# Beginning of bytestrings are treated as uints in order to

326

# decode their length, which may be indefinite.

317

# decode their length, which may be indefinite.

327

complete, size, readcount = decodeuint(

318

complete, size, readcount = decodeuint(

328

subtype, b, offset, allowindefinite=True

319

subtype, b, offset, allowindefinite=True

329

)

320

)

330

321

331

# We don't know the size of the bytestring. It must be a definitive

322

# We don't know the size of the bytestring. It must be a definitive

332

# length since the indefinite subtype would be encoded in the initial

323

# length since the indefinite subtype would be encoded in the initial

333

# byte.

324

# byte.

334

if not complete:

325

if not complete:

335

return False, None, readcount, SPECIAL_NONE

326

return False, None, readcount, SPECIAL_NONE

336

327

337

# We know the length of the bytestring.

328

# We know the length of the bytestring.

338

if size is not None:

329

if size is not None:

339

# And the data is available in the buffer.

330

# And the data is available in the buffer.

340

if offset + readcount + size <= len(b):

331

if offset + readcount + size <= len(b):

341

value = b[offset + readcount : offset + readcount + size]

332

value = b[offset + readcount : offset + readcount + size]

342

return True, value, readcount + size + 1, SPECIAL_NONE

333

return True, value, readcount + size + 1, SPECIAL_NONE

343

334

344

# And we need more data in order to return the bytestring.

335

# And we need more data in order to return the bytestring.

345

else:

336

else:

346

wanted = len(b) - offset - readcount - size

337

wanted = len(b) - offset - readcount - size

347

return False, None, wanted, SPECIAL_NONE

338

return False, None, wanted, SPECIAL_NONE

348

339

349

# It is an indefinite length bytestring.

340

# It is an indefinite length bytestring.

350

else:

341

else:

351

return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING

342

return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING

352

343

353

elif majortype == MAJOR_TYPE_STRING:

344

elif majortype == MAJOR_TYPE_STRING:

354

raise CBORDecodeError(b'string major type not supported')

345

raise CBORDecodeError(b'string major type not supported')

355

346

356

elif majortype == MAJOR_TYPE_ARRAY:

347

elif majortype == MAJOR_TYPE_ARRAY:

357

# Beginning of arrays are treated as uints in order to decode their

348

# Beginning of arrays are treated as uints in order to decode their

358

# length. We don't allow indefinite length arrays.

349

# length. We don't allow indefinite length arrays.

359

complete, size, readcount = decodeuint(subtype, b, offset)

350

complete, size, readcount = decodeuint(subtype, b, offset)

360

351

361

if complete:

352

if complete:

362

return True, size, readcount + 1, SPECIAL_START_ARRAY

353

return True, size, readcount + 1, SPECIAL_START_ARRAY

363

else:

354

else:

364

return False, None, readcount, SPECIAL_NONE

355

return False, None, readcount, SPECIAL_NONE

365

356

366

elif majortype == MAJOR_TYPE_MAP:

357

elif majortype == MAJOR_TYPE_MAP:

367

# Beginning of maps are treated as uints in order to decode their

358

# Beginning of maps are treated as uints in order to decode their

368

# number of elements. We don't allow indefinite length arrays.

359

# number of elements. We don't allow indefinite length arrays.

369

complete, size, readcount = decodeuint(subtype, b, offset)

360

complete, size, readcount = decodeuint(subtype, b, offset)

370

361

371

if complete:

362

if complete:

372

return True, size, readcount + 1, SPECIAL_START_MAP

363

return True, size, readcount + 1, SPECIAL_START_MAP

373

else:

364

else:

374

return False, None, readcount, SPECIAL_NONE

365

return False, None, readcount, SPECIAL_NONE

375

366

376

elif majortype == MAJOR_TYPE_SEMANTIC:

367

elif majortype == MAJOR_TYPE_SEMANTIC:

377

# Semantic tag value is read the same as a uint.

368

# Semantic tag value is read the same as a uint.

378

complete, tagvalue, readcount = decodeuint(subtype, b, offset)

369

complete, tagvalue, readcount = decodeuint(subtype, b, offset)

379

370

380

if not complete:

371

if not complete:

381

return False, None, readcount, SPECIAL_NONE

372

return False, None, readcount, SPECIAL_NONE

382

373

383

# This behavior here is a little wonky. The main type being "decorated"

374

# This behavior here is a little wonky. The main type being "decorated"

384

# by this semantic tag follows. A more robust parser would probably emit

375

# by this semantic tag follows. A more robust parser would probably emit

385

# a special flag indicating this as a semantic tag and let the caller

376

# a special flag indicating this as a semantic tag and let the caller

386

# deal with the types that follow. But since we don't support many

377

# deal with the types that follow. But since we don't support many

387

# semantic tags, it is easier to deal with the special cases here and

378

# semantic tags, it is easier to deal with the special cases here and

388

# hide complexity from the caller. If we add support for more semantic

379

# hide complexity from the caller. If we add support for more semantic

389

# tags, we should probably move semantic tag handling into the caller.

380

# tags, we should probably move semantic tag handling into the caller.

390

if tagvalue == SEMANTIC_TAG_FINITE_SET:

381

if tagvalue == SEMANTIC_TAG_FINITE_SET:

391

if offset + readcount >= len(b):

382

if offset + readcount >= len(b):

392

return False, None, -1, SPECIAL_NONE

383

return False, None, -1, SPECIAL_NONE

393

384

394

complete, size, readcount2, special = decodeitem(

385

complete, size, readcount2, special = decodeitem(

395

b, offset + readcount

386

b, offset + readcount

396

)

387

)

397

388

398

if not complete:

389

if not complete:

399

return False, None, readcount2, SPECIAL_NONE

390

return False, None, readcount2, SPECIAL_NONE

400

391

401

if special != SPECIAL_START_ARRAY:

392

if special != SPECIAL_START_ARRAY:

402

raise CBORDecodeError(

393

raise CBORDecodeError(

403

b'expected array after finite set semantic tag'

394

b'expected array after finite set semantic tag'

404

)

395

)

405

396

406

return True, size, readcount + readcount2 + 1, SPECIAL_START_SET

397

return True, size, readcount + readcount2 + 1, SPECIAL_START_SET

407

398

408

else:

399

else:

409

raise CBORDecodeError(b'semantic tag %d not allowed' % tagvalue)

400

raise CBORDecodeError(b'semantic tag %d not allowed' % tagvalue)

410

401

411

elif majortype == MAJOR_TYPE_SPECIAL:

402

elif majortype == MAJOR_TYPE_SPECIAL:

412

# Only specific values for the information field are allowed.

403

# Only specific values for the information field are allowed.

413

if subtype == SUBTYPE_FALSE:

404

if subtype == SUBTYPE_FALSE:

414

return True, False, 1, SPECIAL_NONE

405

return True, False, 1, SPECIAL_NONE

415

elif subtype == SUBTYPE_TRUE:

406

elif subtype == SUBTYPE_TRUE:

416

return True, True, 1, SPECIAL_NONE

407

return True, True, 1, SPECIAL_NONE

417

elif subtype == SUBTYPE_NULL:

408

elif subtype == SUBTYPE_NULL:

418

return True, None, 1, SPECIAL_NONE

409

return True, None, 1, SPECIAL_NONE

419

elif subtype == SUBTYPE_INDEFINITE:

410

elif subtype == SUBTYPE_INDEFINITE:

420

return True, None, 1, SPECIAL_INDEFINITE_BREAK

411

return True, None, 1, SPECIAL_INDEFINITE_BREAK

421

# If value is 24, subtype is in next byte.

412

# If value is 24, subtype is in next byte.

422

else:

413

else:

423

raise CBORDecodeError(b'special type %d not allowed' % subtype)

414

raise CBORDecodeError(b'special type %d not allowed' % subtype)

424

else:

415

else:

425

assert False

416

assert False

426

417

427

418

428

def decodeuint(subtype, b, offset=0, allowindefinite=False):

419

def decodeuint(subtype, b, offset=0, allowindefinite=False):

429

"""Decode an unsigned integer.

420

"""Decode an unsigned integer.

430

421

431

``subtype`` is the lower 5 bits from the initial byte CBOR item

422

``subtype`` is the lower 5 bits from the initial byte CBOR item

432

"header." ``b`` is a buffer containing bytes. ``offset`` points to

423

"header." ``b`` is a buffer containing bytes. ``offset`` points to

433

the index of the first byte after the byte that ``subtype`` was

424

the index of the first byte after the byte that ``subtype`` was

434

derived from.

425

derived from.

435

426

436

``allowindefinite`` allows the special indefinite length value

427

``allowindefinite`` allows the special indefinite length value

437

indicator.

428

indicator.

438

429

439

Returns a 3-tuple of (successful, value, count).

430

Returns a 3-tuple of (successful, value, count).

440

431

441

The first element is a bool indicating if decoding completed. The 2nd

432

The first element is a bool indicating if decoding completed. The 2nd

442

is the decoded integer value or None if not fully decoded or the subtype

433

is the decoded integer value or None if not fully decoded or the subtype

443

is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.

434

is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.

444

If positive, it is the number of additional bytes decoded. If negative,

435

If positive, it is the number of additional bytes decoded. If negative,

445

it is the number of additional bytes needed to decode this value.

436

it is the number of additional bytes needed to decode this value.

446

"""

437

"""

447

438

448

# Small values are inline.

439

# Small values are inline.

449

if subtype < 24:

440

if subtype < 24:

450

return True, subtype, 0

441

return True, subtype, 0

451

# Indefinite length specifier.

442

# Indefinite length specifier.

452

elif subtype == 31:

443

elif subtype == 31:

453

if allowindefinite:

444

if allowindefinite:

454

return True, None, 0

445

return True, None, 0

455

else:

446

else:

456

raise CBORDecodeError(b'indefinite length uint not allowed here')

447

raise CBORDecodeError(b'indefinite length uint not allowed here')

457

elif subtype >= 28:

448

elif subtype >= 28:

458

raise CBORDecodeError(

449

raise CBORDecodeError(

459

b'unsupported subtype on integer type: %d' % subtype

450

b'unsupported subtype on integer type: %d' % subtype

460

)

451

)

461

452

462

if subtype == 24:

453

if subtype == 24:

463

s = STRUCT_BIG_UBYTE

454

s = STRUCT_BIG_UBYTE

464

elif subtype == 25:

455

elif subtype == 25:

465

s = STRUCT_BIG_USHORT

456

s = STRUCT_BIG_USHORT

466

elif subtype == 26:

457

elif subtype == 26:

467

s = STRUCT_BIG_ULONG

458

s = STRUCT_BIG_ULONG

468

elif subtype == 27:

459

elif subtype == 27:

469

s = STRUCT_BIG_ULONGLONG

460

s = STRUCT_BIG_ULONGLONG

470

else:

461

else:

471

raise CBORDecodeError(b'bounds condition checking violation')

462

raise CBORDecodeError(b'bounds condition checking violation')

472

463

473

if len(b) - offset >= s.size:

464

if len(b) - offset >= s.size:

474

return True, s.unpack_from(b, offset)[0], s.size

465

return True, s.unpack_from(b, offset)[0], s.size

475

else:

466

else:

476

return False, None, len(b) - offset - s.size

467

return False, None, len(b) - offset - s.size

477

468

478

469

479

class bytestringchunk(bytes):

470

class bytestringchunk(bytes):

480

"""Represents a chunk/segment in an indefinite length bytestring.

471

"""Represents a chunk/segment in an indefinite length bytestring.

481

472

482

This behaves like a ``bytes`` but in addition has the ``isfirst``

473

This behaves like a ``bytes`` but in addition has the ``isfirst``

483

and ``islast`` attributes indicating whether this chunk is the first

474

and ``islast`` attributes indicating whether this chunk is the first

484

or last in an indefinite length bytestring.

475

or last in an indefinite length bytestring.

485

"""

476

"""

486

477

487

def __new__(cls, v, first=False, last=False):

478

def __new__(cls, v, first=False, last=False):

488

self = bytes.__new__(cls, v)

479

self = bytes.__new__(cls, v)

489

self.isfirst = first

480

self.isfirst = first

490

self.islast = last

481

self.islast = last

491

482

492

return self

483

return self

493

484

494

485

495

class sansiodecoder(object):

486

class sansiodecoder(object):

496

"""A CBOR decoder that doesn't perform its own I/O.

487

"""A CBOR decoder that doesn't perform its own I/O.

497

488

498

To use, construct an instance and feed it segments containing

489

To use, construct an instance and feed it segments containing

499

CBOR-encoded bytes via ``decode()``. The return value from ``decode()``

490

CBOR-encoded bytes via ``decode()``. The return value from ``decode()``

500

indicates whether a fully-decoded value is available, how many bytes

491

indicates whether a fully-decoded value is available, how many bytes

501

were consumed, and offers a hint as to how many bytes should be fed

492

were consumed, and offers a hint as to how many bytes should be fed

502

in next time to decode the next value.

493

in next time to decode the next value.

503

494

504

The decoder assumes it will decode N discrete CBOR values, not just

495

The decoder assumes it will decode N discrete CBOR values, not just

505

a single value. i.e. if the bytestream contains uints packed one after

496

a single value. i.e. if the bytestream contains uints packed one after

506

the other, the decoder will decode them all, rather than just the initial

497

the other, the decoder will decode them all, rather than just the initial

507

one.

498

one.

508

499

509

When ``decode()`` indicates a value is available, call ``getavailable()``

500

When ``decode()`` indicates a value is available, call ``getavailable()``

510

to return all fully decoded values.

501

to return all fully decoded values.

511

502

512

``decode()`` can partially decode input. It is up to the caller to keep

503

``decode()`` can partially decode input. It is up to the caller to keep

513

track of what data was consumed and to pass unconsumed data in on the

504

track of what data was consumed and to pass unconsumed data in on the

514

next invocation.

505

next invocation.

515

506

516

The decoder decodes atomically at the *item* level. See ``decodeitem()``.

507

The decoder decodes atomically at the *item* level. See ``decodeitem()``.

517

If an *item* cannot be fully decoded, the decoder won't record it as

508

If an *item* cannot be fully decoded, the decoder won't record it as

518

partially consumed. Instead, the caller will be instructed to pass in

509

partially consumed. Instead, the caller will be instructed to pass in

519

the initial bytes of this item on the next invocation. This does result

510

the initial bytes of this item on the next invocation. This does result

520

in some redundant parsing. But the overhead should be minimal.

511

in some redundant parsing. But the overhead should be minimal.

521

512

522

This decoder only supports a subset of CBOR as required by Mercurial.

513

This decoder only supports a subset of CBOR as required by Mercurial.

523

It lacks support for:

514

It lacks support for:

524

515

525

* Indefinite length arrays

516

* Indefinite length arrays

526

* Indefinite length maps

517

* Indefinite length maps

527

* Use of indefinite length bytestrings as keys or values within

518

* Use of indefinite length bytestrings as keys or values within

528

arrays, maps, or sets.

519

arrays, maps, or sets.

529

* Nested arrays, maps, or sets within sets

520

* Nested arrays, maps, or sets within sets

530

* Any semantic tag that isn't a mathematical finite set

521

* Any semantic tag that isn't a mathematical finite set

531

* Floating point numbers

522

* Floating point numbers

532

* Undefined special value

523

* Undefined special value

533

524

534

CBOR types are decoded to Python types as follows:

525

CBOR types are decoded to Python types as follows:

535

526

536

uint -> int

527

uint -> int

537

negint -> int

528

negint -> int

538

bytestring -> bytes

529

bytestring -> bytes

539

map -> dict

530

map -> dict

540

array -> list

531

array -> list

541

True -> bool

532

True -> bool

542

False -> bool

533

False -> bool

543

null -> None

534

null -> None

544

indefinite length bytestring chunk -> [bytestringchunk]

535

indefinite length bytestring chunk -> [bytestringchunk]

545

536

546

The only non-obvious mapping here is an indefinite length bytestring

537

The only non-obvious mapping here is an indefinite length bytestring

547

to the ``bytestringchunk`` type. This is to facilitate streaming

538

to the ``bytestringchunk`` type. This is to facilitate streaming

548

indefinite length bytestrings out of the decoder and to differentiate

539

indefinite length bytestrings out of the decoder and to differentiate

549

a regular bytestring from an indefinite length bytestring.

540

a regular bytestring from an indefinite length bytestring.

550

"""

541

"""

551

542

552

_STATE_NONE = 0

543

_STATE_NONE = 0

553

_STATE_WANT_MAP_KEY = 1

544

_STATE_WANT_MAP_KEY = 1

554

_STATE_WANT_MAP_VALUE = 2

545

_STATE_WANT_MAP_VALUE = 2

555

_STATE_WANT_ARRAY_VALUE = 3

546

_STATE_WANT_ARRAY_VALUE = 3

556

_STATE_WANT_SET_VALUE = 4

547

_STATE_WANT_SET_VALUE = 4

557

_STATE_WANT_BYTESTRING_CHUNK_FIRST = 5

548

_STATE_WANT_BYTESTRING_CHUNK_FIRST = 5

558

_STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6

549

_STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6

559

550

560

def __init__(self):

551

def __init__(self):

561

# TODO add support for limiting size of bytestrings

552

# TODO add support for limiting size of bytestrings

562

# TODO add support for limiting number of keys / values in collections

553

# TODO add support for limiting number of keys / values in collections

563

# TODO add support for limiting size of buffered partial values

554

# TODO add support for limiting size of buffered partial values

564

555

565

self.decodedbytecount = 0

556

self.decodedbytecount = 0

566

557

567

self._state = self._STATE_NONE

558

self._state = self._STATE_NONE

568

559

569

# Stack of active nested collections. Each entry is a dict describing

560

# Stack of active nested collections. Each entry is a dict describing

570

# the collection.

561

# the collection.

571

self._collectionstack = []

562

self._collectionstack = []

572

563

573

# Fully decoded key to use for the current map.

564

# Fully decoded key to use for the current map.

574

self._currentmapkey = None

565

self._currentmapkey = None

575

566

576

# Fully decoded values available for retrieval.

567

# Fully decoded values available for retrieval.

577

self._decodedvalues = []

568

self._decodedvalues = []

578

569

579

@property

570

@property

580

def inprogress(self):

571

def inprogress(self):

581

"""Whether the decoder has partially decoded a value."""

572

"""Whether the decoder has partially decoded a value."""

582

return self._state != self._STATE_NONE

573

return self._state != self._STATE_NONE

583

574

584

def decode(self, b, offset=0):

575

def decode(self, b, offset=0):

585

"""Attempt to decode bytes from an input buffer.

576

"""Attempt to decode bytes from an input buffer.

586

577

587

``b`` is a collection of bytes and ``offset`` is the byte

578

``b`` is a collection of bytes and ``offset`` is the byte

588

offset within that buffer from which to begin reading data.

579

offset within that buffer from which to begin reading data.

589

580

590

``b`` must support ``len()`` and accessing bytes slices via

581

``b`` must support ``len()`` and accessing bytes slices via

591

``__slice__``. Typically ``bytes`` instances are used.

582

``__slice__``. Typically ``bytes`` instances are used.

592

583

593

Returns a tuple with the following fields:

584

Returns a tuple with the following fields:

594

585

595

* Bool indicating whether values are available for retrieval.

586

* Bool indicating whether values are available for retrieval.

596

* Integer indicating the number of bytes that were fully consumed,

587

* Integer indicating the number of bytes that were fully consumed,

597

starting from ``offset``.

588

starting from ``offset``.

598

* Integer indicating the number of bytes that are desired for the

589

* Integer indicating the number of bytes that are desired for the

599

next call in order to decode an item.

590

next call in order to decode an item.

600

"""

591

"""

601

if not b:

592

if not b:

602

return bool(self._decodedvalues), 0, 0

593

return bool(self._decodedvalues), 0, 0

603

594

604

initialoffset = offset

595

initialoffset = offset

605

596

606

# We could easily split the body of this loop into a function. But

597

# We could easily split the body of this loop into a function. But

607

# Python performance is sensitive to function calls and collections

598

# Python performance is sensitive to function calls and collections

608

# are composed of many items. So leaving as a while loop could help

599

# are composed of many items. So leaving as a while loop could help

609

# with performance. One thing that may not help is the use of

600

# with performance. One thing that may not help is the use of

610

# if..elif versus a lookup/dispatch table. There may be value

601

# if..elif versus a lookup/dispatch table. There may be value

611

# in switching that.

602

# in switching that.

612

while offset < len(b):

603

while offset < len(b):

613

# Attempt to decode an item. This could be a whole value or a

604

# Attempt to decode an item. This could be a whole value or a

614

# special value indicating an event, such as start or end of a

605

# special value indicating an event, such as start or end of a

615

# collection or indefinite length type.

606

# collection or indefinite length type.

616

complete, value, readcount, special = decodeitem(b, offset)

607

complete, value, readcount, special = decodeitem(b, offset)

617

608

618

if readcount > 0:

609

if readcount > 0:

619

self.decodedbytecount += readcount

610

self.decodedbytecount += readcount

620

611

621

if not complete:

612

if not complete:

622

assert readcount < 0

613

assert readcount < 0

623

return (

614

return (

624

bool(self._decodedvalues),

615

bool(self._decodedvalues),

625

offset - initialoffset,

616

offset - initialoffset,

626

-readcount,

617

-readcount,

627

)

618

)

628

619

629

offset += readcount

620

offset += readcount

630

621

631

# No nested state. We either have a full value or beginning of a

622

# No nested state. We either have a full value or beginning of a

632

# complex value to deal with.

623

# complex value to deal with.

633

if self._state == self._STATE_NONE:

624

if self._state == self._STATE_NONE:

634

# A normal value.

625

# A normal value.

635

if special == SPECIAL_NONE:

626

if special == SPECIAL_NONE:

636

self._decodedvalues.append(value)

627

self._decodedvalues.append(value)

637

628

638

elif special == SPECIAL_START_ARRAY:

629

elif special == SPECIAL_START_ARRAY:

639

self._collectionstack.append(

630

self._collectionstack.append(

640

{

631

{

641

b'remaining': value,

632

b'remaining': value,

642

b'v': [],

633

b'v': [],

643

}

634

}

644

)

635

)

645

self._state = self._STATE_WANT_ARRAY_VALUE

636

self._state = self._STATE_WANT_ARRAY_VALUE

646

637

647

elif special == SPECIAL_START_MAP:

638

elif special == SPECIAL_START_MAP:

648

self._collectionstack.append(

639

self._collectionstack.append(

649

{

640

{

650

b'remaining': value,

641

b'remaining': value,

651

b'v': {},

642

b'v': {},

652

}

643

}

653

)

644

)

654

self._state = self._STATE_WANT_MAP_KEY

645

self._state = self._STATE_WANT_MAP_KEY

655

646

656

elif special == SPECIAL_START_SET:

647

elif special == SPECIAL_START_SET:

657

self._collectionstack.append(

648

self._collectionstack.append(

658

{

649

{

659

b'remaining': value,

650

b'remaining': value,

660

b'v': set(),

651

b'v': set(),

661

}

652

}

662

)

653

)

663

self._state = self._STATE_WANT_SET_VALUE

654

self._state = self._STATE_WANT_SET_VALUE

664

655

665

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

656

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

666

self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST

657

self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST

667

658

668

else:

659

else:

669

raise CBORDecodeError(

660

raise CBORDecodeError(

670

b'unhandled special state: %d' % special

661

b'unhandled special state: %d' % special

671

)

662

)

672

663

673

# This value becomes an element of the current array.

664

# This value becomes an element of the current array.

674

elif self._state == self._STATE_WANT_ARRAY_VALUE:

665

elif self._state == self._STATE_WANT_ARRAY_VALUE:

675

# Simple values get appended.

666

# Simple values get appended.

676

if special == SPECIAL_NONE:

667

if special == SPECIAL_NONE:

677

c = self._collectionstack[-1]

668

c = self._collectionstack[-1]

678

c[b'v'].append(value)

669

c[b'v'].append(value)

679

c[b'remaining'] -= 1

670

c[b'remaining'] -= 1

680

671

681

# self._state doesn't need changed.

672

# self._state doesn't need changed.

682

673

683

# An array nested within an array.

674

# An array nested within an array.

684

elif special == SPECIAL_START_ARRAY:

675

elif special == SPECIAL_START_ARRAY:

685

lastc = self._collectionstack[-1]

676

lastc = self._collectionstack[-1]

686

newvalue = []

677

newvalue = []

687

678

688

lastc[b'v'].append(newvalue)

679

lastc[b'v'].append(newvalue)

689

lastc[b'remaining'] -= 1

680

lastc[b'remaining'] -= 1

690

681

691

self._collectionstack.append(

682

self._collectionstack.append(

692

{

683

{

693

b'remaining': value,

684

b'remaining': value,

694

b'v': newvalue,

685

b'v': newvalue,

695

}

686

}

696

)

687

)

697

688

698

# self._state doesn't need changed.

689

# self._state doesn't need changed.

699

690

700

# A map nested within an array.

691

# A map nested within an array.

701

elif special == SPECIAL_START_MAP:

692

elif special == SPECIAL_START_MAP:

702

lastc = self._collectionstack[-1]

693

lastc = self._collectionstack[-1]

703

newvalue = {}

694

newvalue = {}

704

695

705

lastc[b'v'].append(newvalue)

696

lastc[b'v'].append(newvalue)

706

lastc[b'remaining'] -= 1

697

lastc[b'remaining'] -= 1

707

698

708

self._collectionstack.append(

699

self._collectionstack.append(

709

{b'remaining': value, b'v': newvalue}

700

{b'remaining': value, b'v': newvalue}

710

)

701

)

711

702

712

self._state = self._STATE_WANT_MAP_KEY

703

self._state = self._STATE_WANT_MAP_KEY

713

704

714

elif special == SPECIAL_START_SET:

705

elif special == SPECIAL_START_SET:

715

lastc = self._collectionstack[-1]

706

lastc = self._collectionstack[-1]

716

newvalue = set()

707

newvalue = set()

717

708

718

lastc[b'v'].append(newvalue)

709

lastc[b'v'].append(newvalue)

719

lastc[b'remaining'] -= 1

710

lastc[b'remaining'] -= 1

720

711

721

self._collectionstack.append(

712

self._collectionstack.append(

722

{

713

{

723

b'remaining': value,

714

b'remaining': value,

724

b'v': newvalue,

715

b'v': newvalue,

725

}

716

}

726

)

717

)

727

718

728

self._state = self._STATE_WANT_SET_VALUE

719

self._state = self._STATE_WANT_SET_VALUE

729

720

730

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

721

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

731

raise CBORDecodeError(

722

raise CBORDecodeError(

732

b'indefinite length bytestrings '

723

b'indefinite length bytestrings '

733

b'not allowed as array values'

724

b'not allowed as array values'

734

)

725

)

735

726

736

else:

727

else:

737

raise CBORDecodeError(

728

raise CBORDecodeError(

738

b'unhandled special item when '

729

b'unhandled special item when '

739

b'expecting array value: %d' % special

730

b'expecting array value: %d' % special

740

)

731

)

741

732

742

# This value becomes the key of the current map instance.

733

# This value becomes the key of the current map instance.

743

elif self._state == self._STATE_WANT_MAP_KEY:

734

elif self._state == self._STATE_WANT_MAP_KEY:

744

if special == SPECIAL_NONE:

735

if special == SPECIAL_NONE:

745

self._currentmapkey = value

736

self._currentmapkey = value

746

self._state = self._STATE_WANT_MAP_VALUE

737

self._state = self._STATE_WANT_MAP_VALUE

747

738

748

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

739

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

749

raise CBORDecodeError(

740

raise CBORDecodeError(

750

b'indefinite length bytestrings '

741

b'indefinite length bytestrings '

751

b'not allowed as map keys'

742

b'not allowed as map keys'

752

)

743

)

753

744

754

elif special in (

745

elif special in (

755

SPECIAL_START_ARRAY,

746

SPECIAL_START_ARRAY,

756

SPECIAL_START_MAP,

747

SPECIAL_START_MAP,

757

SPECIAL_START_SET,

748

SPECIAL_START_SET,

758

):

749

):

759

raise CBORDecodeError(

750

raise CBORDecodeError(

760

b'collections not supported as map keys'

751

b'collections not supported as map keys'

761

)

752

)

762

753

763

# We do not allow special values to be used as map keys.

754

# We do not allow special values to be used as map keys.

764

else:

755

else:

765

raise CBORDecodeError(

756

raise CBORDecodeError(

766

b'unhandled special item when '

757

b'unhandled special item when '

767

b'expecting map key: %d' % special

758

b'expecting map key: %d' % special

768

)

759

)

769

760

770

# This value becomes the value of the current map key.

761

# This value becomes the value of the current map key.

771

elif self._state == self._STATE_WANT_MAP_VALUE:

762

elif self._state == self._STATE_WANT_MAP_VALUE:

772

# Simple values simply get inserted into the map.

763

# Simple values simply get inserted into the map.

773

if special == SPECIAL_NONE:

764

if special == SPECIAL_NONE:

774

lastc = self._collectionstack[-1]

765

lastc = self._collectionstack[-1]

775

lastc[b'v'][self._currentmapkey] = value

766

lastc[b'v'][self._currentmapkey] = value

776

lastc[b'remaining'] -= 1

767

lastc[b'remaining'] -= 1

777

768

778

self._state = self._STATE_WANT_MAP_KEY

769

self._state = self._STATE_WANT_MAP_KEY

779

770

780

# A new array is used as the map value.

771

# A new array is used as the map value.

781

elif special == SPECIAL_START_ARRAY:

772

elif special == SPECIAL_START_ARRAY:

782

lastc = self._collectionstack[-1]

773

lastc = self._collectionstack[-1]

783

newvalue = []

774

newvalue = []

784

775

785

lastc[b'v'][self._currentmapkey] = newvalue

776

lastc[b'v'][self._currentmapkey] = newvalue

786

lastc[b'remaining'] -= 1

777

lastc[b'remaining'] -= 1

787

778

788

self._collectionstack.append(

779

self._collectionstack.append(

789

{

780

{

790

b'remaining': value,

781

b'remaining': value,

791

b'v': newvalue,

782

b'v': newvalue,

792

}

783

}

793

)

784

)

794

785

795

self._state = self._STATE_WANT_ARRAY_VALUE

786

self._state = self._STATE_WANT_ARRAY_VALUE

796

787

797

# A new map is used as the map value.

788

# A new map is used as the map value.

798

elif special == SPECIAL_START_MAP:

789

elif special == SPECIAL_START_MAP:

799

lastc = self._collectionstack[-1]

790

lastc = self._collectionstack[-1]

800

newvalue = {}

791

newvalue = {}

801

792

802

lastc[b'v'][self._currentmapkey] = newvalue

793

lastc[b'v'][self._currentmapkey] = newvalue

803

lastc[b'remaining'] -= 1

794

lastc[b'remaining'] -= 1

804

795

805

self._collectionstack.append(

796

self._collectionstack.append(

806

{

797

{

807

b'remaining': value,

798

b'remaining': value,

808

b'v': newvalue,

799

b'v': newvalue,

809

}

800

}

810

)

801

)

811

802

812

self._state = self._STATE_WANT_MAP_KEY

803

self._state = self._STATE_WANT_MAP_KEY

813

804

814

# A new set is used as the map value.

805

# A new set is used as the map value.

815

elif special == SPECIAL_START_SET:

806

elif special == SPECIAL_START_SET:

816

lastc = self._collectionstack[-1]

807

lastc = self._collectionstack[-1]

817

newvalue = set()

808

newvalue = set()

818

809

819

lastc[b'v'][self._currentmapkey] = newvalue

810

lastc[b'v'][self._currentmapkey] = newvalue

820

lastc[b'remaining'] -= 1

811

lastc[b'remaining'] -= 1

821

812

822

self._collectionstack.append(

813

self._collectionstack.append(

823

{

814

{

824

b'remaining': value,

815

b'remaining': value,

825

b'v': newvalue,

816

b'v': newvalue,

826

}

817

}

827

)

818

)

828

819

829

self._state = self._STATE_WANT_SET_VALUE

820

self._state = self._STATE_WANT_SET_VALUE

830

821

831

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

822

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

832

raise CBORDecodeError(

823

raise CBORDecodeError(

833

b'indefinite length bytestrings not '

824

b'indefinite length bytestrings not '

834

b'allowed as map values'

825

b'allowed as map values'

835

)

826

)

836

827

837

else:

828

else:

838

raise CBORDecodeError(

829

raise CBORDecodeError(

839

b'unhandled special item when '

830

b'unhandled special item when '

840

b'expecting map value: %d' % special

831

b'expecting map value: %d' % special

841

)

832

)

842

833

843

self._currentmapkey = None

834

self._currentmapkey = None

844

835

845

# This value is added to the current set.

836

# This value is added to the current set.

846

elif self._state == self._STATE_WANT_SET_VALUE:

837

elif self._state == self._STATE_WANT_SET_VALUE:

847

if special == SPECIAL_NONE:

838

if special == SPECIAL_NONE:

848

lastc = self._collectionstack[-1]

839

lastc = self._collectionstack[-1]

849

lastc[b'v'].add(value)

840

lastc[b'v'].add(value)

850

lastc[b'remaining'] -= 1

841

lastc[b'remaining'] -= 1

851

842

852

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

843

elif special == SPECIAL_START_INDEFINITE_BYTESTRING:

853

raise CBORDecodeError(

844

raise CBORDecodeError(

854

b'indefinite length bytestrings not '

845

b'indefinite length bytestrings not '

855

b'allowed as set values'

846

b'allowed as set values'

856

)

847

)

857

848

858

elif special in (

849

elif special in (

859

SPECIAL_START_ARRAY,

850

SPECIAL_START_ARRAY,

860

SPECIAL_START_MAP,

851

SPECIAL_START_MAP,

861

SPECIAL_START_SET,

852

SPECIAL_START_SET,

862

):

853

):

863

raise CBORDecodeError(

854

raise CBORDecodeError(

864

b'collections not allowed as set values'

855

b'collections not allowed as set values'

865

)

856

)

866

857

867

# We don't allow non-trivial types to exist as set values.

858

# We don't allow non-trivial types to exist as set values.

868

else:

859

else:

869

raise CBORDecodeError(

860

raise CBORDecodeError(

870

b'unhandled special item when '

861

b'unhandled special item when '

871

b'expecting set value: %d' % special

862

b'expecting set value: %d' % special

872

)

863

)

873

864

874

# This value represents the first chunk in an indefinite length

865

# This value represents the first chunk in an indefinite length

875

# bytestring.

866

# bytestring.

876

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:

867

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:

877

# We received a full chunk.

868

# We received a full chunk.

878

if special == SPECIAL_NONE:

869

if special == SPECIAL_NONE:

879

self._decodedvalues.append(

870

self._decodedvalues.append(

880

bytestringchunk(value, first=True)

871

bytestringchunk(value, first=True)

881

)

872

)

882

873

883

self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT

874

self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT

884

875

885

# The end of stream marker. This means it is an empty

876

# The end of stream marker. This means it is an empty

886

# indefinite length bytestring.

877

# indefinite length bytestring.

887

elif special == SPECIAL_INDEFINITE_BREAK:

878

elif special == SPECIAL_INDEFINITE_BREAK:

888

# We /could/ convert this to a b''. But we want to preserve

879

# We /could/ convert this to a b''. But we want to preserve

889

# the nature of the underlying data so consumers expecting

880

# the nature of the underlying data so consumers expecting

890

# an indefinite length bytestring get one.

881

# an indefinite length bytestring get one.

891

self._decodedvalues.append(

882

self._decodedvalues.append(

892

bytestringchunk(b'', first=True, last=True)

883

bytestringchunk(b'', first=True, last=True)

893

)

884

)

894

885

895

# Since indefinite length bytestrings can't be used in

886

# Since indefinite length bytestrings can't be used in

896

# collections, we must be at the root level.

887

# collections, we must be at the root level.

897

assert not self._collectionstack

888

assert not self._collectionstack

898

self._state = self._STATE_NONE

889

self._state = self._STATE_NONE

899

890

900

else:

891

else:

901

raise CBORDecodeError(

892

raise CBORDecodeError(

902

b'unexpected special value when '

893

b'unexpected special value when '

903

b'expecting bytestring chunk: %d' % special

894

b'expecting bytestring chunk: %d' % special

904

)

895

)

905

896

906

# This value represents the non-initial chunk in an indefinite

897

# This value represents the non-initial chunk in an indefinite

907

# length bytestring.

898

# length bytestring.

908

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:

899

elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:

909

# We received a full chunk.

900

# We received a full chunk.

910

if special == SPECIAL_NONE:

901

if special == SPECIAL_NONE:

911

self._decodedvalues.append(bytestringchunk(value))

902

self._decodedvalues.append(bytestringchunk(value))

912

903

913

# The end of stream marker.

904

# The end of stream marker.

914

elif special == SPECIAL_INDEFINITE_BREAK:

905

elif special == SPECIAL_INDEFINITE_BREAK:

915

self._decodedvalues.append(bytestringchunk(b'', last=True))

906

self._decodedvalues.append(bytestringchunk(b'', last=True))

916

907

917

# Since indefinite length bytestrings can't be used in

908

# Since indefinite length bytestrings can't be used in

918

# collections, we must be at the root level.

909

# collections, we must be at the root level.

919

assert not self._collectionstack

910

assert not self._collectionstack

920

self._state = self._STATE_NONE

911

self._state = self._STATE_NONE

921

912

922

else:

913

else:

923

raise CBORDecodeError(

914

raise CBORDecodeError(

924

b'unexpected special value when '

915

b'unexpected special value when '

925

b'expecting bytestring chunk: %d' % special

916

b'expecting bytestring chunk: %d' % special

926

)

917

)

927

918

928

else:

919

else:

929

raise CBORDecodeError(

920

raise CBORDecodeError(

930

b'unhandled decoder state: %d' % self._state

921

b'unhandled decoder state: %d' % self._state

931

)

922

)

932

923

933

# We could have just added the final value in a collection. End

924

# We could have just added the final value in a collection. End

934

# all complete collections at the top of the stack.

925

# all complete collections at the top of the stack.

935

while True:

926

while True:

936

# Bail if we're not waiting on a new collection item.

927

# Bail if we're not waiting on a new collection item.

937

if self._state not in (

928

if self._state not in (

938

self._STATE_WANT_ARRAY_VALUE,

929

self._STATE_WANT_ARRAY_VALUE,

939

self._STATE_WANT_MAP_KEY,

930

self._STATE_WANT_MAP_KEY,

940

self._STATE_WANT_SET_VALUE,

931

self._STATE_WANT_SET_VALUE,

941

):

932

):

942

break

933

break

943

934

944

# Or we are expecting more items for this collection.

935

# Or we are expecting more items for this collection.

945

lastc = self._collectionstack[-1]

936

lastc = self._collectionstack[-1]

946

937

947

if lastc[b'remaining']:

938

if lastc[b'remaining']:

948

break

939

break

949

940

950

# The collection at the top of the stack is complete.

941

# The collection at the top of the stack is complete.

951

942

952

# Discard it, as it isn't needed for future items.

943

# Discard it, as it isn't needed for future items.

953

self._collectionstack.pop()

944

self._collectionstack.pop()

954

945

955

# If this is a nested collection, we don't emit it, since it

946

# If this is a nested collection, we don't emit it, since it

956

# will be emitted by its parent collection. But we do need to

947

# will be emitted by its parent collection. But we do need to

957

# update state to reflect what the new top-most collection

948

# update state to reflect what the new top-most collection

958

# on the stack is.

949

# on the stack is.

959

if self._collectionstack:

950

if self._collectionstack:

960

self._state = {

951

self._state = {

961

list: self._STATE_WANT_ARRAY_VALUE,

952

list: self._STATE_WANT_ARRAY_VALUE,

962

dict: self._STATE_WANT_MAP_KEY,

953

dict: self._STATE_WANT_MAP_KEY,

963

set: self._STATE_WANT_SET_VALUE,

954

set: self._STATE_WANT_SET_VALUE,

964

}[type(self._collectionstack[-1][b'v'])]

955

}[type(self._collectionstack[-1][b'v'])]

965

956

966

# If this is the root collection, emit it.

957

# If this is the root collection, emit it.

967

else:

958

else:

968

self._decodedvalues.append(lastc[b'v'])

959

self._decodedvalues.append(lastc[b'v'])

969

self._state = self._STATE_NONE

960

self._state = self._STATE_NONE

970

961

971

return (

962

return (

972

bool(self._decodedvalues),

963

bool(self._decodedvalues),

973

offset - initialoffset,

964

offset - initialoffset,

974

0,

965

0,

975

)

966

)

976

967

977

def getavailable(self):

968

def getavailable(self):

978

"""Returns an iterator over fully decoded values.

969

"""Returns an iterator over fully decoded values.

979

970

980

Once values are retrieved, they won't be available on the next call.

971

Once values are retrieved, they won't be available on the next call.

981

"""

972

"""

982

973

983

l = list(self._decodedvalues)

974

l = list(self._decodedvalues)

984

self._decodedvalues = []

975

self._decodedvalues = []

985

return l

976

return l

986

977

987

978

988

class bufferingdecoder(object):

979

class bufferingdecoder(object):

989

"""A CBOR decoder that buffers undecoded input.

980

"""A CBOR decoder that buffers undecoded input.

990

981

991

This is a glorified wrapper around ``sansiodecoder`` that adds a buffering

982

This is a glorified wrapper around ``sansiodecoder`` that adds a buffering

992

layer. All input that isn't consumed by ``sansiodecoder`` will be buffered

983

layer. All input that isn't consumed by ``sansiodecoder`` will be buffered

993

and concatenated with any new input that arrives later.

984

and concatenated with any new input that arrives later.

994

985

995

TODO consider adding limits as to the maximum amount of data that can

986

TODO consider adding limits as to the maximum amount of data that can

996

be buffered.

987

be buffered.

997

"""

988

"""

998

989

999

def __init__(self):

990

def __init__(self):

1000

self._decoder = sansiodecoder()

991

self._decoder = sansiodecoder()

1001

self._chunks = []

992

self._chunks = []

1002

self._wanted = 0

993

self._wanted = 0

1003

994

1004

def decode(self, b):

995

def decode(self, b):

1005

"""Attempt to decode bytes to CBOR values.

996

"""Attempt to decode bytes to CBOR values.

1006

997

1007

Returns a tuple with the following fields:

998

Returns a tuple with the following fields:

1008

999

1009

* Bool indicating whether new values are available for retrieval.

1000

* Bool indicating whether new values are available for retrieval.

1010

* Integer number of bytes decoded from the new input.

1001

* Integer number of bytes decoded from the new input.

1011

* Integer number of bytes wanted to decode the next value.

1002

* Integer number of bytes wanted to decode the next value.

1012

"""

1003

"""

1013

# We /might/ be able to support passing a bytearray all the

1004

# We /might/ be able to support passing a bytearray all the

1014

# way through. For now, let's cheat.

1005

# way through. For now, let's cheat.

1015

if isinstance(b, bytearray):

1006

if isinstance(b, bytearray):

1016

b = bytes(b)

1007

b = bytes(b)

1017

1008

1018

# Our strategy for buffering is to aggregate the incoming chunks in a

1009

# Our strategy for buffering is to aggregate the incoming chunks in a

1019

# list until we've received enough data to decode the next item.

1010

# list until we've received enough data to decode the next item.

1020

# This is slightly more complicated than using an ``io.BytesIO``

1011

# This is slightly more complicated than using an ``io.BytesIO``

1021

# or continuously concatenating incoming data. However, because it

1012

# or continuously concatenating incoming data. However, because it

1022

# isn't constantly reallocating backing memory for a growing buffer,

1013

# isn't constantly reallocating backing memory for a growing buffer,

1023

# it prevents excessive memory thrashing and is significantly faster,

1014

# it prevents excessive memory thrashing and is significantly faster,

1024

# especially in cases where the percentage of input chunks that don't

1015

# especially in cases where the percentage of input chunks that don't

1025

# decode into a full item is high.

1016

# decode into a full item is high.

1026

1017

1027

if self._chunks:

1018

if self._chunks:

1028

# A previous call said we needed N bytes to decode the next item.

1019

# A previous call said we needed N bytes to decode the next item.

1029

# But this call doesn't provide enough data. We buffer the incoming

1020

# But this call doesn't provide enough data. We buffer the incoming

1030

# chunk without attempting to decode.

1021

# chunk without attempting to decode.

1031

if len(b) < self._wanted:

1022

if len(b) < self._wanted:

1032

self._chunks.append(b)

1023

self._chunks.append(b)

1033

self._wanted -= len(b)

1024

self._wanted -= len(b)

1034

return False, 0, self._wanted

1025

return False, 0, self._wanted

1035

1026

1036

# Else we may have enough data to decode the next item. Aggregate

1027

# Else we may have enough data to decode the next item. Aggregate

1037

# old data with new and reset the buffer.

1028

# old data with new and reset the buffer.

1038

newlen = len(b)

1029

newlen = len(b)

1039

self._chunks.append(b)

1030

self._chunks.append(b)

1040

b = b''.join(self._chunks)

1031

b = b''.join(self._chunks)

1041

self._chunks = []

1032

self._chunks = []

1042

oldlen = len(b) - newlen

1033

oldlen = len(b) - newlen

1043

1034

1044

else:

1035

else:

1045

oldlen = 0

1036

oldlen = 0

1046

1037

1047

available, readcount, wanted = self._decoder.decode(b)

1038

available, readcount, wanted = self._decoder.decode(b)

1048

self._wanted = wanted

1039

self._wanted = wanted

1049

1040

1050

if readcount < len(b):

1041

if readcount < len(b):

1051

self._chunks.append(b[readcount:])

1042

self._chunks.append(b[readcount:])

1052

1043

1053

return available, readcount - oldlen, wanted

1044

return available, readcount - oldlen, wanted

1054

1045

1055

def getavailable(self):

1046

def getavailable(self):

1056

return self._decoder.getavailable()

1047

return self._decoder.getavailable()

1057

1048

1058

1049

1059

def decodeall(b):

1050

def decodeall(b):

1060

"""Decode all CBOR items present in an iterable of bytes.

1051

"""Decode all CBOR items present in an iterable of bytes.

1061

1052

1062

In addition to regular decode errors, raises CBORDecodeError if the

1053

In addition to regular decode errors, raises CBORDecodeError if the

1063

entirety of the passed buffer does not fully decode to complete CBOR

1054

entirety of the passed buffer does not fully decode to complete CBOR

1064

values. This includes failure to decode any value, incomplete collection

1055

values. This includes failure to decode any value, incomplete collection

1065

types, incomplete indefinite length items, and extra data at the end of

1056

types, incomplete indefinite length items, and extra data at the end of

1066

the buffer.

1057

the buffer.

1067

"""

1058

"""

1068

if not b:

1059

if not b:

1069

return []

1060

return []

1070

1061

1071

decoder = sansiodecoder()

1062

decoder = sansiodecoder()

1072

1063

1073

havevalues, readcount, wantbytes = decoder.decode(b)

1064

havevalues, readcount, wantbytes = decoder.decode(b)

1074

1065

1075

if readcount != len(b):

1066

if readcount != len(b):

1076

raise CBORDecodeError(b'input data not fully consumed')

1067

raise CBORDecodeError(b'input data not fully consumed')

1077

1068

1078

if decoder.inprogress:

1069

if decoder.inprogress:

1079

raise CBORDecodeError(b'input data not complete')

1070

raise CBORDecodeError(b'input data not complete')

1080

1071

1081

return decoder.getavailable()

1072

return decoder.getavailable()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # cborutil.py - CBOR extensions
             #
             # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import struct
-            import sys
             # Very short very of RFC 7049...
             #
             # Each item begins with a byte. The 3 high bits of that byte denote the
             # "major type." The lower 5 bits denote the "subtype." Each major type
             # has its own encoding mechanism.
             #
             # Most types have lengths. However, bytestring, string, array, and map
             # can be indefinite length. These are denotes by a subtype with value 31.
             # Sub-components of those types then come afterwards and are terminated
             # by a "break" byte.
             MAJOR_TYPE_UINT = 0
             MAJOR_TYPE_NEGINT = 1
             MAJOR_TYPE_BYTESTRING = 2
             MAJOR_TYPE_STRING = 3
             MAJOR_TYPE_ARRAY = 4
             MAJOR_TYPE_MAP = 5
             MAJOR_TYPE_SEMANTIC = 6
             MAJOR_TYPE_SPECIAL = 7
             SUBTYPE_MASK = 0b00011111
             SUBTYPE_FALSE = 20
             SUBTYPE_TRUE = 21
             SUBTYPE_NULL = 22
             SUBTYPE_HALF_FLOAT = 25
             SUBTYPE_SINGLE_FLOAT = 26
             SUBTYPE_DOUBLE_FLOAT = 27
             SUBTYPE_INDEFINITE = 31
             SEMANTIC_TAG_FINITE_SET = 258
             # Indefinite types begin with their major type ORd with information value 31.
             BEGIN_INDEFINITE_BYTESTRING = struct.pack(
                 '>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE
             )
             BEGIN_INDEFINITE_ARRAY = struct.pack(
                 '>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE
             )
             BEGIN_INDEFINITE_MAP = struct.pack(
                 '>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE
             )
             ENCODED_LENGTH_1 = struct.Struct('>B')
             ENCODED_LENGTH_2 = struct.Struct('>BB')
             ENCODED_LENGTH_3 = struct.Struct('>BH')
             ENCODED_LENGTH_4 = struct.Struct('>BL')
             ENCODED_LENGTH_5 = struct.Struct('>BQ')
             # The break ends an indefinite length item.
             BREAK = b'\xff'
             BREAK_INT = 255
             def encodelength(majortype, length):
                 """Obtain a value encoding the major type and its length."""
                 if length < 24:
                     return ENCODED_LENGTH_1.pack(majortype << 5 | length)
                 elif length < 256:
                     return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
                 elif length < 65536:
                     return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
                 elif length < 4294967296:
                     return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
                 else:
                     return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
             def streamencodebytestring(v):
                 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
                 yield v
             def streamencodebytestringfromiter(it):
                 """Convert an iterator of chunks to an indefinite bytestring.
                 Given an input that is iterable and each element in the iterator is
                 representable as bytes, emit an indefinite length bytestring.
                 """
                 yield BEGIN_INDEFINITE_BYTESTRING
                 for chunk in it:
                     yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
                     yield chunk
                 yield BREAK
             def streamencodeindefinitebytestring(source, chunksize=65536):
                 """Given a large source buffer, emit as an indefinite length bytestring.
                 This is a generator of chunks constituting the encoded CBOR data.
                 """
                 yield BEGIN_INDEFINITE_BYTESTRING
                 i = 0
                 l = len(source)
                 while True:
                     chunk = source[i : i + chunksize]
                     i += len(chunk)
                     yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
                     yield chunk
                     if i >= l:
                         break
                 yield BREAK
             def streamencodeint(v):
                 if v >= 18446744073709551616 or v < -18446744073709551616:
                     raise ValueError(b'big integers not supported')
                 if v >= 0:
                     yield encodelength(MAJOR_TYPE_UINT, v)
                 else:
                     yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
             def streamencodearray(l):
                 """Encode a known size iterable to an array."""
                 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
                 for i in l:
                     for chunk in streamencode(i):
                         yield chunk
             def streamencodearrayfromiter(it):
                 """Encode an iterator of items to an indefinite length array."""
                 yield BEGIN_INDEFINITE_ARRAY
                 for i in it:
                     for chunk in streamencode(i):
                         yield chunk
                 yield BREAK
             def _mixedtypesortkey(v):
                 return type(v).__name__, v
             def streamencodeset(s):
                 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
                 # semantic tag 258 for finite sets.
                 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
                 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
                     yield chunk
             def streamencodemap(d):
                 """Encode dictionary to a generator.
                 Does not supporting indefinite length dictionaries.
                 """
                 yield encodelength(MAJOR_TYPE_MAP, len(d))
                 for key, value in sorted(d.items(), key=lambda x: _mixedtypesortkey(x[0])):
                     for chunk in streamencode(key):
                         yield chunk
                     for chunk in streamencode(value):
                         yield chunk
             def streamencodemapfromiter(it):
                 """Given an iterable of (key, value), encode to an indefinite length map."""
                 yield BEGIN_INDEFINITE_MAP
                 for key, value in it:
                     for chunk in streamencode(key):
                         yield chunk
                     for chunk in streamencode(value):
                         yield chunk
                 yield BREAK
             def streamencodebool(b):
                 # major type 7, simple value 20 and 21.
                 yield b'\xf5' if b else b'\xf4'
             def streamencodenone(v):
                 # major type 7, simple value 22.
                 yield b'\xf6'
             STREAM_ENCODERS = {
                 bytes: streamencodebytestring,
                 int: streamencodeint,
                 int: streamencodeint,
                 list: streamencodearray,
                 tuple: streamencodearray,
                 dict: streamencodemap,
                 set: streamencodeset,
                 bool: streamencodebool,
                 type(None): streamencodenone,
             }
             def streamencode(v):
                 """Encode a value in a streaming manner.
                 Given an input object, encode it to CBOR recursively.
                 Returns a generator of CBOR encoded bytes. There is no guarantee
                 that each emitted chunk fully decodes to a value or sub-value.
                 Encoding is deterministic - unordered collections are sorted.
                 """
                 fn = STREAM_ENCODERS.get(v.__class__)
                 if not fn:
                     # handle subtypes such as encoding.localstr and util.sortdict
                     for ty in STREAM_ENCODERS:
                         if not isinstance(v, ty):
                             continue
                         fn = STREAM_ENCODERS[ty]
                         break
                 if not fn:
                     raise ValueError(b'do not know how to encode %s' % type(v))
                 return fn(v)
             class CBORDecodeError(Exception):
                 """Represents an error decoding CBOR."""
-            if sys.version_info.major >= 3:
+            def _elementtointeger(b, i):
+                return b[i]
-                def _elementtointeger(b, i):
-                    return b[i]
-            else:
-                def _elementtointeger(b, i):
-                    return ord(b[i])
             STRUCT_BIG_UBYTE = struct.Struct('>B')
             STRUCT_BIG_USHORT = struct.Struct(b'>H')
             STRUCT_BIG_ULONG = struct.Struct(b'>L')
             STRUCT_BIG_ULONGLONG = struct.Struct(b'>Q')
             SPECIAL_NONE = 0
             SPECIAL_START_INDEFINITE_BYTESTRING = 1
             SPECIAL_START_ARRAY = 2
             SPECIAL_START_MAP = 3
             SPECIAL_START_SET = 4
             SPECIAL_INDEFINITE_BREAK = 5
             def decodeitem(b, offset=0):
                 """Decode a new CBOR value from a buffer at offset.
                 This function attempts to decode up to one complete CBOR value
                 from ``b`` starting at offset ``offset``.
                 The beginning of a collection (such as an array, map, set, or
                 indefinite length bytestring) counts as a single value. For these
                 special cases, a state flag will indicate that a special value was seen.
                 When called, the function either returns a decoded value or gives
                 a hint as to how many more bytes are needed to do so. By calling
                 the function repeatedly given a stream of bytes, the caller can
                 build up the original values.
                 Returns a tuple with the following elements:
                 * Bool indicating whether a complete value was decoded.
                 * A decoded value if first value is True otherwise None
                 * Integer number of bytes. If positive, the number of bytes
                   read. If negative, the number of bytes we need to read to
                   decode this value or the next chunk in this value.
                 * One of the ``SPECIAL_*`` constants indicating special treatment
                   for this value. ``SPECIAL_NONE`` means this is a fully decoded
                   simple value (such as an integer or bool).
                 """
                 initial = _elementtointeger(b, offset)
                 offset += 1
                 majortype = initial >> 5
                 subtype = initial & SUBTYPE_MASK
                 if majortype == MAJOR_TYPE_UINT:
                     complete, value, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, value, readcount + 1, SPECIAL_NONE
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_NEGINT:
                     # Negative integers are the same as UINT except inverted minus 1.
                     complete, value, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, -value - 1, readcount + 1, SPECIAL_NONE
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_BYTESTRING:
                     # Beginning of bytestrings are treated as uints in order to
                     # decode their length, which may be indefinite.
                     complete, size, readcount = decodeuint(
                         subtype, b, offset, allowindefinite=True
                     )
                     # We don't know the size of the bytestring. It must be a definitive
                     # length since the indefinite subtype would be encoded in the initial
                     # byte.
                     if not complete:
                         return False, None, readcount, SPECIAL_NONE
                     # We know the length of the bytestring.
                     if size is not None:
                         # And the data is available in the buffer.
                         if offset + readcount + size <= len(b):
                             value = b[offset + readcount : offset + readcount + size]
                             return True, value, readcount + size + 1, SPECIAL_NONE
                         # And we need more data in order to return the bytestring.
                         else:
                             wanted = len(b) - offset - readcount - size
                             return False, None, wanted, SPECIAL_NONE
                     # It is an indefinite length bytestring.
                     else:
                         return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
                 elif majortype == MAJOR_TYPE_STRING:
                     raise CBORDecodeError(b'string major type not supported')
                 elif majortype == MAJOR_TYPE_ARRAY:
                     # Beginning of arrays are treated as uints in order to decode their
                     # length. We don't allow indefinite length arrays.
                     complete, size, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, size, readcount + 1, SPECIAL_START_ARRAY
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_MAP:
                     # Beginning of maps are treated as uints in order to decode their
                     # number of elements. We don't allow indefinite length arrays.
                     complete, size, readcount = decodeuint(subtype, b, offset)
                     if complete:
                         return True, size, readcount + 1, SPECIAL_START_MAP
                     else:
                         return False, None, readcount, SPECIAL_NONE
                 elif majortype == MAJOR_TYPE_SEMANTIC:
                     # Semantic tag value is read the same as a uint.
                     complete, tagvalue, readcount = decodeuint(subtype, b, offset)
                     if not complete:
                         return False, None, readcount, SPECIAL_NONE
                     # This behavior here is a little wonky. The main type being "decorated"
                     # by this semantic tag follows. A more robust parser would probably emit
                     # a special flag indicating this as a semantic tag and let the caller
                     # deal with the types that follow. But since we don't support many
                     # semantic tags, it is easier to deal with the special cases here and
                     # hide complexity from the caller. If we add support for more semantic
                     # tags, we should probably move semantic tag handling into the caller.
                     if tagvalue == SEMANTIC_TAG_FINITE_SET:
                         if offset + readcount >= len(b):
                             return False, None, -1, SPECIAL_NONE
                         complete, size, readcount2, special = decodeitem(
                             b, offset + readcount
                         )
                         if not complete:
                             return False, None, readcount2, SPECIAL_NONE
                         if special != SPECIAL_START_ARRAY:
                             raise CBORDecodeError(
                                 b'expected array after finite set semantic tag'
                             )
                         return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
                     else:
                         raise CBORDecodeError(b'semantic tag %d not allowed' % tagvalue)
                 elif majortype == MAJOR_TYPE_SPECIAL:
                     # Only specific values for the information field are allowed.
                     if subtype == SUBTYPE_FALSE:
                         return True, False, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_TRUE:
                         return True, True, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_NULL:
                         return True, None, 1, SPECIAL_NONE
                     elif subtype == SUBTYPE_INDEFINITE:
                         return True, None, 1, SPECIAL_INDEFINITE_BREAK
                     # If value is 24, subtype is in next byte.
                     else:
                         raise CBORDecodeError(b'special type %d not allowed' % subtype)
                 else:
                     assert False
             def decodeuint(subtype, b, offset=0, allowindefinite=False):
                 """Decode an unsigned integer.
                 ``subtype`` is the lower 5 bits from the initial byte CBOR item
                 "header." ``b`` is a buffer containing bytes. ``offset`` points to
                 the index of the first byte after the byte that ``subtype`` was
                 derived from.
                 ``allowindefinite`` allows the special indefinite length value
                 indicator.
                 Returns a 3-tuple of (successful, value, count).
                 The first element is a bool indicating if decoding completed. The 2nd
                 is the decoded integer value or None if not fully decoded or the subtype
                 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
                 If positive, it is the number of additional bytes decoded. If negative,
                 it is the number of additional bytes needed to decode this value.
                 """
                 # Small values are inline.
                 if subtype < 24:
                     return True, subtype, 0
                 # Indefinite length specifier.
                 elif subtype == 31:
                     if allowindefinite:
                         return True, None, 0
                     else:
                         raise CBORDecodeError(b'indefinite length uint not allowed here')
                 elif subtype >= 28:
                     raise CBORDecodeError(
                         b'unsupported subtype on integer type: %d' % subtype
                     )
                 if subtype == 24:
                     s = STRUCT_BIG_UBYTE
                 elif subtype == 25:
                     s = STRUCT_BIG_USHORT
                 elif subtype == 26:
                     s = STRUCT_BIG_ULONG
                 elif subtype == 27:
                     s = STRUCT_BIG_ULONGLONG
                 else:
                     raise CBORDecodeError(b'bounds condition checking violation')
                 if len(b) - offset >= s.size:
                     return True, s.unpack_from(b, offset)[0], s.size
                 else:
                     return False, None, len(b) - offset - s.size
             class bytestringchunk(bytes):
                 """Represents a chunk/segment in an indefinite length bytestring.
                 This behaves like a ``bytes`` but in addition has the ``isfirst``
                 and ``islast`` attributes indicating whether this chunk is the first
                 or last in an indefinite length bytestring.
                 """
                 def __new__(cls, v, first=False, last=False):
                     self = bytes.__new__(cls, v)
                     self.isfirst = first
                     self.islast = last
                     return self
             class sansiodecoder(object):
                 """A CBOR decoder that doesn't perform its own I/O.
                 To use, construct an instance and feed it segments containing
                 CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
                 indicates whether a fully-decoded value is available, how many bytes
                 were consumed, and offers a hint as to how many bytes should be fed
                 in next time to decode the next value.
                 The decoder assumes it will decode N discrete CBOR values, not just
                 a single value. i.e. if the bytestream contains uints packed one after
                 the other, the decoder will decode them all, rather than just the initial
                 one.
                 When ``decode()`` indicates a value is available, call ``getavailable()``
                 to return all fully decoded values.
                 ``decode()`` can partially decode input. It is up to the caller to keep
                 track of what data was consumed and to pass unconsumed data in on the
                 next invocation.
                 The decoder decodes atomically at the *item* level. See ``decodeitem()``.
                 If an *item* cannot be fully decoded, the decoder won't record it as
                 partially consumed. Instead, the caller will be instructed to pass in
                 the initial bytes of this item on the next invocation. This does result
                 in some redundant parsing. But the overhead should be minimal.
                 This decoder only supports a subset of CBOR as required by Mercurial.
                 It lacks support for:
                 * Indefinite length arrays
                 * Indefinite length maps
                 * Use of indefinite length bytestrings as keys or values within
                   arrays, maps, or sets.
                 * Nested arrays, maps, or sets within sets
                 * Any semantic tag that isn't a mathematical finite set
                 * Floating point numbers
                 * Undefined special value
                 CBOR types are decoded to Python types as follows:
                 uint -> int
                 negint -> int
                 bytestring -> bytes
                 map -> dict
                 array -> list
                 True -> bool
                 False -> bool
                 null -> None
                 indefinite length bytestring chunk -> [bytestringchunk]
                 The only non-obvious mapping here is an indefinite length bytestring
                 to the ``bytestringchunk`` type. This is to facilitate streaming
                 indefinite length bytestrings out of the decoder and to differentiate
                 a regular bytestring from an indefinite length bytestring.
                 """
                 _STATE_NONE = 0
                 _STATE_WANT_MAP_KEY = 1
                 _STATE_WANT_MAP_VALUE = 2
                 _STATE_WANT_ARRAY_VALUE = 3
                 _STATE_WANT_SET_VALUE = 4
                 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
                 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
                 def __init__(self):
                     # TODO add support for limiting size of bytestrings
                     # TODO add support for limiting number of keys / values in collections
                     # TODO add support for limiting size of buffered partial values
                     self.decodedbytecount = 0
                     self._state = self._STATE_NONE
                     # Stack of active nested collections. Each entry is a dict describing
                     # the collection.
                     self._collectionstack = []
                     # Fully decoded key to use for the current map.
                     self._currentmapkey = None
                     # Fully decoded values available for retrieval.
                     self._decodedvalues = []
                 @property
                 def inprogress(self):
                     """Whether the decoder has partially decoded a value."""
                     return self._state != self._STATE_NONE
                 def decode(self, b, offset=0):
                     """Attempt to decode bytes from an input buffer.
                     ``b`` is a collection of bytes and ``offset`` is the byte
                     offset within that buffer from which to begin reading data.
                     ``b`` must support ``len()`` and accessing bytes slices via
                     ``__slice__``. Typically ``bytes`` instances are used.
                     Returns a tuple with the following fields:
                     * Bool indicating whether values are available for retrieval.
                     * Integer indicating the number of bytes that were fully consumed,
                       starting from ``offset``.
                     * Integer indicating the number of bytes that are desired for the
                       next call in order to decode an item.
                     """
                     if not b:
                         return bool(self._decodedvalues), 0, 0
                     initialoffset = offset
                     # We could easily split the body of this loop into a function. But
                     # Python performance is sensitive to function calls and collections
                     # are composed of many items. So leaving as a while loop could help
                     # with performance. One thing that may not help is the use of
                     # if..elif versus a lookup/dispatch table. There may be value
                     # in switching that.
                     while offset < len(b):
                         # Attempt to decode an item. This could be a whole value or a
                         # special value indicating an event, such as start or end of a
                         # collection or indefinite length type.
                         complete, value, readcount, special = decodeitem(b, offset)
                         if readcount > 0:
                             self.decodedbytecount += readcount
                         if not complete:
                             assert readcount < 0
                             return (
                                 bool(self._decodedvalues),
                                 offset - initialoffset,
                                 -readcount,
                             )
                         offset += readcount
                         # No nested state. We either have a full value or beginning of a
                         # complex value to deal with.
                         if self._state == self._STATE_NONE:
                             # A normal value.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(value)
                             elif special == SPECIAL_START_ARRAY:
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': [],
                                     }
                                 )
                                 self._state = self._STATE_WANT_ARRAY_VALUE
                             elif special == SPECIAL_START_MAP:
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': {},
                                     }
                                 )
                                 self._state = self._STATE_WANT_MAP_KEY
                             elif special == SPECIAL_START_SET:
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': set(),
                                     }
                                 )
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
                             else:
                                 raise CBORDecodeError(
                                     b'unhandled special state: %d' % special
                                 )
                         # This value becomes an element of the current array.
                         elif self._state == self._STATE_WANT_ARRAY_VALUE:
                             # Simple values get appended.
                             if special == SPECIAL_NONE:
                                 c = self._collectionstack[-1]
                                 c[b'v'].append(value)
                                 c[b'remaining'] -= 1
                                 # self._state doesn't need changed.
                             # An array nested within an array.
                             elif special == SPECIAL_START_ARRAY:
                                 lastc = self._collectionstack[-1]
                                 newvalue = []
                                 lastc[b'v'].append(newvalue)
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': newvalue,
                                     }
                                 )
                                 # self._state doesn't need changed.
                             # A map nested within an array.
                             elif special == SPECIAL_START_MAP:
                                 lastc = self._collectionstack[-1]
                                 newvalue = {}
                                 lastc[b'v'].append(newvalue)
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {b'remaining': value, b'v': newvalue}
                                 )
                                 self._state = self._STATE_WANT_MAP_KEY
                             elif special == SPECIAL_START_SET:
                                 lastc = self._collectionstack[-1]
                                 newvalue = set()
                                 lastc[b'v'].append(newvalue)
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': newvalue,
                                     }
                                 )
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError(
                                     b'indefinite length bytestrings '
                                     b'not allowed as array values'
                                 )
                             else:
                                 raise CBORDecodeError(
                                     b'unhandled special item when '
                                     b'expecting array value: %d' % special
                                 )
                         # This value becomes the key of the current map instance.
                         elif self._state == self._STATE_WANT_MAP_KEY:
                             if special == SPECIAL_NONE:
                                 self._currentmapkey = value
                                 self._state = self._STATE_WANT_MAP_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError(
                                     b'indefinite length bytestrings '
                                     b'not allowed as map keys'
                                 )
                             elif special in (
                                 SPECIAL_START_ARRAY,
                                 SPECIAL_START_MAP,
                                 SPECIAL_START_SET,
                             ):
                                 raise CBORDecodeError(
                                     b'collections not supported as map keys'
                                 )
                             # We do not allow special values to be used as map keys.
                             else:
                                 raise CBORDecodeError(
                                     b'unhandled special item when '
                                     b'expecting map key: %d' % special
                                 )
                         # This value becomes the value of the current map key.
                         elif self._state == self._STATE_WANT_MAP_VALUE:
                             # Simple values simply get inserted into the map.
                             if special == SPECIAL_NONE:
                                 lastc = self._collectionstack[-1]
                                 lastc[b'v'][self._currentmapkey] = value
                                 lastc[b'remaining'] -= 1
                                 self._state = self._STATE_WANT_MAP_KEY
                             # A new array is used as the map value.
                             elif special == SPECIAL_START_ARRAY:
                                 lastc = self._collectionstack[-1]
                                 newvalue = []
                                 lastc[b'v'][self._currentmapkey] = newvalue
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': newvalue,
                                     }
                                 )
                                 self._state = self._STATE_WANT_ARRAY_VALUE
                             # A new map is used as the map value.
                             elif special == SPECIAL_START_MAP:
                                 lastc = self._collectionstack[-1]
                                 newvalue = {}
                                 lastc[b'v'][self._currentmapkey] = newvalue
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': newvalue,
                                     }
                                 )
                                 self._state = self._STATE_WANT_MAP_KEY
                             # A new set is used as the map value.
                             elif special == SPECIAL_START_SET:
                                 lastc = self._collectionstack[-1]
                                 newvalue = set()
                                 lastc[b'v'][self._currentmapkey] = newvalue
                                 lastc[b'remaining'] -= 1
                                 self._collectionstack.append(
                                     {
                                         b'remaining': value,
                                         b'v': newvalue,
                                     }
                                 )
                                 self._state = self._STATE_WANT_SET_VALUE
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError(
                                     b'indefinite length bytestrings not '
                                     b'allowed as map values'
                                 )
                             else:
                                 raise CBORDecodeError(
                                     b'unhandled special item when '
                                     b'expecting map value: %d' % special
                                 )
                             self._currentmapkey = None
                         # This value is added to the current set.
                         elif self._state == self._STATE_WANT_SET_VALUE:
                             if special == SPECIAL_NONE:
                                 lastc = self._collectionstack[-1]
                                 lastc[b'v'].add(value)
                                 lastc[b'remaining'] -= 1
                             elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                                 raise CBORDecodeError(
                                     b'indefinite length bytestrings not '
                                     b'allowed as set values'
                                 )
                             elif special in (
                                 SPECIAL_START_ARRAY,
                                 SPECIAL_START_MAP,
                                 SPECIAL_START_SET,
                             ):
                                 raise CBORDecodeError(
                                     b'collections not allowed as set values'
                                 )
                             # We don't allow non-trivial types to exist as set values.
                             else:
                                 raise CBORDecodeError(
                                     b'unhandled special item when '
                                     b'expecting set value: %d' % special
                                 )
                         # This value represents the first chunk in an indefinite length
                         # bytestring.
                         elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
                             # We received a full chunk.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(
                                     bytestringchunk(value, first=True)
                                 )
                                 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
                             # The end of stream marker. This means it is an empty
                             # indefinite length bytestring.
                             elif special == SPECIAL_INDEFINITE_BREAK:
                                 # We /could/ convert this to a b''. But we want to preserve
                                 # the nature of the underlying data so consumers expecting
                                 # an indefinite length bytestring get one.
                                 self._decodedvalues.append(
                                     bytestringchunk(b'', first=True, last=True)
                                 )
                                 # Since indefinite length bytestrings can't be used in
                                 # collections, we must be at the root level.
                                 assert not self._collectionstack
                                 self._state = self._STATE_NONE
                             else:
                                 raise CBORDecodeError(
                                     b'unexpected special value when '
                                     b'expecting bytestring chunk: %d' % special
                                 )
                         # This value represents the non-initial chunk in an indefinite
                         # length bytestring.
                         elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
                             # We received a full chunk.
                             if special == SPECIAL_NONE:
                                 self._decodedvalues.append(bytestringchunk(value))
                             # The end of stream marker.
                             elif special == SPECIAL_INDEFINITE_BREAK:
                                 self._decodedvalues.append(bytestringchunk(b'', last=True))
                                 # Since indefinite length bytestrings can't be used in
                                 # collections, we must be at the root level.
                                 assert not self._collectionstack
                                 self._state = self._STATE_NONE
                             else:
                                 raise CBORDecodeError(
                                     b'unexpected special value when '
                                     b'expecting bytestring chunk: %d' % special
                                 )
                         else:
                             raise CBORDecodeError(
                                 b'unhandled decoder state: %d' % self._state
                             )
                         # We could have just added the final value in a collection. End
                         # all complete collections at the top of the stack.
                         while True:
                             # Bail if we're not waiting on a new collection item.
                             if self._state not in (
                                 self._STATE_WANT_ARRAY_VALUE,
                                 self._STATE_WANT_MAP_KEY,
                                 self._STATE_WANT_SET_VALUE,
                             ):
                                 break
                             # Or we are expecting more items for this collection.
                             lastc = self._collectionstack[-1]
                             if lastc[b'remaining']:
                                 break
                             # The collection at the top of the stack is complete.
                             # Discard it, as it isn't needed for future items.
                             self._collectionstack.pop()
                             # If this is a nested collection, we don't emit it, since it
                             # will be emitted by its parent collection. But we do need to
                             # update state to reflect what the new top-most collection
                             # on the stack is.
                             if self._collectionstack:
                                 self._state = {
                                     list: self._STATE_WANT_ARRAY_VALUE,
                                     dict: self._STATE_WANT_MAP_KEY,
                                     set: self._STATE_WANT_SET_VALUE,
                                 }[type(self._collectionstack[-1][b'v'])]
                             # If this is the root collection, emit it.
                             else:
                                 self._decodedvalues.append(lastc[b'v'])
                                 self._state = self._STATE_NONE
                     return (
                         bool(self._decodedvalues),
                         offset - initialoffset,
 ,
                     )
                 def getavailable(self):
                     """Returns an iterator over fully decoded values.
                     Once values are retrieved, they won't be available on the next call.
                     """
                     l = list(self._decodedvalues)
                     self._decodedvalues = []
                     return l
             class bufferingdecoder(object):
                 """A CBOR decoder that buffers undecoded input.
                 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
                 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
                 and concatenated with any new input that arrives later.
                 TODO consider adding limits as to the maximum amount of data that can
                 be buffered.
                 """
                 def __init__(self):
                     self._decoder = sansiodecoder()
                     self._chunks = []
                     self._wanted = 0
                 def decode(self, b):
                     """Attempt to decode bytes to CBOR values.
                     Returns a tuple with the following fields:
                     * Bool indicating whether new values are available for retrieval.
                     * Integer number of bytes decoded from the new input.
                     * Integer number of bytes wanted to decode the next value.
                     """
                     # We /might/ be able to support passing a bytearray all the
                     # way through. For now, let's cheat.
                     if isinstance(b, bytearray):
                         b = bytes(b)
                     # Our strategy for buffering is to aggregate the incoming chunks in a
                     # list until we've received enough data to decode the next item.
                     # This is slightly more complicated than using an ``io.BytesIO``
                     # or continuously concatenating incoming data. However, because it
                     # isn't constantly reallocating backing memory for a growing buffer,
                     # it prevents excessive memory thrashing and is significantly faster,
                     # especially in cases where the percentage of input chunks that don't
                     # decode into a full item is high.
                     if self._chunks:
                         # A previous call said we needed N bytes to decode the next item.
                         # But this call doesn't provide enough data. We buffer the incoming
                         # chunk without attempting to decode.
                         if len(b) < self._wanted:
                             self._chunks.append(b)
                             self._wanted -= len(b)
                             return False, 0, self._wanted
                         # Else we may have enough data to decode the next item. Aggregate
                         # old data with new and reset the buffer.
                         newlen = len(b)
                         self._chunks.append(b)
                         b = b''.join(self._chunks)
                         self._chunks = []
                         oldlen = len(b) - newlen
                     else:
                         oldlen = 0
                     available, readcount, wanted = self._decoder.decode(b)
                     self._wanted = wanted
                     if readcount < len(b):
                         self._chunks.append(b[readcount:])
                     return available, readcount - oldlen, wanted
                 def getavailable(self):
                     return self._decoder.getavailable()
             def decodeall(b):
                 """Decode all CBOR items present in an iterable of bytes.
                 In addition to regular decode errors, raises CBORDecodeError if the
                 entirety of the passed buffer does not fully decode to complete CBOR
                 values. This includes failure to decode any value, incomplete collection
                 types, incomplete indefinite length items, and extra data at the end of
                 the buffer.
                 """
                 if not b:
                     return []
                 decoder = sansiodecoder()
                 havevalues, readcount, wantbytes = decoder.decode(b)
                 if readcount != len(b):
                     raise CBORDecodeError(b'input data not fully consumed')
                 if decoder.inprogress:
                     raise CBORDecodeError(b'input data not complete')
                 return decoder.getavailable()