upstream/mercurial-mirror Commit - r44811:c7eebdb1

1

# nodemap.py - nodemap related code and utilities

1

# nodemap.py - nodemap related code and utilities

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import os

11

import os

12

import re

12

import re

13

import struct

13

import struct

14

15

from .. import (

15

from .. import (

16

error,

16

error,

17

node as nodemod,

17

node as nodemod,

18

util,

18

util,

19

)

19

)

20

21

22

class NodeMap(dict):

22

class NodeMap(dict):

23

def __missing__(self, x):

23

def __missing__(self, x):

24

raise error.RevlogError(b'unknown node: %s' % x)

24

raise error.RevlogError(b'unknown node: %s' % x)

25

26

27

def persisted_data(revlog):

27

def persisted_data(revlog):

28

"""read the nodemap for a revlog from disk"""

28

"""read the nodemap for a revlog from disk"""

29

if revlog.nodemap_file is None:

29

if revlog.nodemap_file is None:

30

return None

30

return None

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

32

if not pdata:

32

if not pdata:

33

return None

33

return None

34

offset = 0

34

offset = 0

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

36

if version != ONDISK_VERSION:

36

if version != ONDISK_VERSION:

37

return None

37

return None

38

offset += S_VERSION.size

38

offset += S_VERSION.size

39

headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

39

headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

40

uid_size, tip_rev, data_length, data_unused = headers

40

uid_size, tip_rev, data_length, data_unused = headers

41

offset += S_HEADER.size

41

offset += S_HEADER.size

42

docket = NodeMapDocket(pdata[offset : offset + uid_size])

42

docket = NodeMapDocket(pdata[offset : offset + uid_size])

43

docket.tip_rev = tip_rev

43

docket.tip_rev = tip_rev

44

docket.data_length = data_length

44

docket.data_length = data_length

45

docket.data_unused = data_unused

45

docket.data_unused = data_unused

46

47

filename = _rawdata_filepath(revlog, docket)

47

filename = _rawdata_filepath(revlog, docket)

48

~~return~~ ~~docket~~, revlog.opener.tryread(filename)

48

data = revlog.opener.tryread(filename)

49

if len(data) < data_length:

50

return None

51

elif len(data) > data_length:

52

data = data[:data_length]

53

return docket, data

49

54

50

55

51

def setup_persistent_nodemap(tr, revlog):

56

def setup_persistent_nodemap(tr, revlog):

52

"""Install whatever is needed transaction side to persist a nodemap on disk

57

"""Install whatever is needed transaction side to persist a nodemap on disk

53

58

54

(only actually persist the nodemap if this is relevant for this revlog)

59

(only actually persist the nodemap if this is relevant for this revlog)

55

"""

60

"""

56

if revlog._inline:

61

if revlog._inline:

57

return # inlined revlog are too small for this to be relevant

62

return # inlined revlog are too small for this to be relevant

58

if revlog.nodemap_file is None:

63

if revlog.nodemap_file is None:

59

return # we do not use persistent_nodemap on this revlog

64

return # we do not use persistent_nodemap on this revlog

60

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

65

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

61

if tr.hasfinalize(callback_id):

66

if tr.hasfinalize(callback_id):

62

return # no need to register again

67

return # no need to register again

63

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

68

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

64

69

65

70

66

def _persist_nodemap(tr, revlog):

71

def _persist_nodemap(tr, revlog):

67

"""Write nodemap data on disk for a given revlog

72

"""Write nodemap data on disk for a given revlog

68

"""

73

"""

69

if getattr(revlog, 'filteredrevs', ()):

74

if getattr(revlog, 'filteredrevs', ()):

70

raise error.ProgrammingError(

75

raise error.ProgrammingError(

71

"cannot persist nodemap of a filtered changelog"

76

"cannot persist nodemap of a filtered changelog"

72

)

77

)

73

if revlog.nodemap_file is None:

78

if revlog.nodemap_file is None:

74

msg = "calling persist nodemap on a revlog without the feature enableb"

79

msg = "calling persist nodemap on a revlog without the feature enableb"

75

raise error.ProgrammingError(msg)

80

raise error.ProgrammingError(msg)

76

81

77

can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")

82

can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")

78

ondisk_docket = revlog._nodemap_docket

83

ondisk_docket = revlog._nodemap_docket

79

84

80

data = None

85

data = None

81

# first attemp an incremental update of the data

86

# first attemp an incremental update of the data

82

if can_incremental and ondisk_docket is not None:

87

if can_incremental and ondisk_docket is not None:

83

target_docket = revlog._nodemap_docket.copy()

88

target_docket = revlog._nodemap_docket.copy()

84

(

89

(

85

src_docket,

90

src_docket,

86

data_changed_count,

91

data_changed_count,

87

data,

92

data,

88

) = revlog.index.nodemap_data_incremental()

93

) = revlog.index.nodemap_data_incremental()

89

if src_docket != target_docket:

94

if src_docket != target_docket:

90

data = None

95

data = None

91

else:

96

else:

92

datafile = _rawdata_filepath(revlog, target_docket)

97

datafile = _rawdata_filepath(revlog, target_docket)

93

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

98

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

94

# store vfs

99

# store vfs

95

with revlog.opener(datafile, b'r+') as fd:

100

with revlog.opener(datafile, b'r+') as fd:

96

fd.seek(target_docket.data_length)

101

fd.seek(target_docket.data_length)

97

fd.write(data)

102

fd.write(data)

98

target_docket.data_length += len(data)

103

target_docket.data_length += len(data)

99

target_docket.data_unused += data_changed_count

104

target_docket.data_unused += data_changed_count

100

105

101

if data is None:

106

if data is None:

102

# otherwise fallback to a full new export

107

# otherwise fallback to a full new export

103

target_docket = NodeMapDocket()

108

target_docket = NodeMapDocket()

104

datafile = _rawdata_filepath(revlog, target_docket)

109

datafile = _rawdata_filepath(revlog, target_docket)

105

if util.safehasattr(revlog.index, "nodemap_data_all"):

110

if util.safehasattr(revlog.index, "nodemap_data_all"):

106

data = revlog.index.nodemap_data_all()

111

data = revlog.index.nodemap_data_all()

107

else:

112

else:

108

data = persistent_data(revlog.index)

113

data = persistent_data(revlog.index)

109

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

114

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

110

# store vfs

115

# store vfs

111

with revlog.opener(datafile, b'w') as fd:

116

with revlog.opener(datafile, b'w') as fd:

112

fd.write(data)

117

fd.write(data)

113

target_docket.data_length = len(data)

118

target_docket.data_length = len(data)

114

target_docket.tip_rev = revlog.tiprev()

119

target_docket.tip_rev = revlog.tiprev()

115

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

120

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

116

# store vfs

121

# store vfs

117

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

122

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

118

fp.write(target_docket.serialize())

123

fp.write(target_docket.serialize())

119

revlog._nodemap_docket = target_docket

124

revlog._nodemap_docket = target_docket

120

# EXP-TODO: if the transaction abort, we should remove the new data and

125

# EXP-TODO: if the transaction abort, we should remove the new data and

121

# reinstall the old one.

126

# reinstall the old one.

122

127

123

# search for old index file in all cases, some older process might have

128

# search for old index file in all cases, some older process might have

124

# left one behind.

129

# left one behind.

125

olds = _other_rawdata_filepath(revlog, target_docket)

130

olds = _other_rawdata_filepath(revlog, target_docket)

126

if olds:

131

if olds:

127

realvfs = getattr(revlog, '_realopener', revlog.opener)

132

realvfs = getattr(revlog, '_realopener', revlog.opener)

128

133

129

def cleanup(tr):

134

def cleanup(tr):

130

for oldfile in olds:

135

for oldfile in olds:

131

realvfs.tryunlink(oldfile)

136

realvfs.tryunlink(oldfile)

132

137

133

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

138

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

134

tr.addpostclose(callback_id, cleanup)

139

tr.addpostclose(callback_id, cleanup)

135

140

136

141

137

### Nodemap docket file

142

### Nodemap docket file

138

#

143

#

139

# The nodemap data are stored on disk using 2 files:

144

# The nodemap data are stored on disk using 2 files:

140

#

145

#

141

# * a raw data files containing a persistent nodemap

146

# * a raw data files containing a persistent nodemap

142

# (see `Nodemap Trie` section)

147

# (see `Nodemap Trie` section)

143

#

148

#

144

# * a small "docket" file containing medatadata

149

# * a small "docket" file containing medatadata

145

#

150

#

146

# While the nodemap data can be multiple tens of megabytes, the "docket" is

151

# While the nodemap data can be multiple tens of megabytes, the "docket" is

147

# small, it is easy to update it automatically or to duplicated its content

152

# small, it is easy to update it automatically or to duplicated its content

148

# during a transaction.

153

# during a transaction.

149

#

154

#

150

# Multiple raw data can exist at the same time (The currently valid one and a

155

# Multiple raw data can exist at the same time (The currently valid one and a

151

# new one beind used by an in progress transaction). To accomodate this, the

156

# new one beind used by an in progress transaction). To accomodate this, the

152

# filename hosting the raw data has a variable parts. The exact filename is

157

# filename hosting the raw data has a variable parts. The exact filename is

153

# specified inside the "docket" file.

158

# specified inside the "docket" file.

154

#

159

#

155

# The docket file contains information to find, qualify and validate the raw

160

# The docket file contains information to find, qualify and validate the raw

156

# data. Its content is currently very light, but it will expand as the on disk

161

# data. Its content is currently very light, but it will expand as the on disk

157

# nodemap gains the necessary features to be used in production.

162

# nodemap gains the necessary features to be used in production.

158

163

159

# version 0 is experimental, no BC garantee, do no use outside of tests.

164

# version 0 is experimental, no BC garantee, do no use outside of tests.

160

ONDISK_VERSION = 0

165

ONDISK_VERSION = 0

161

S_VERSION = struct.Struct(">B")

166

S_VERSION = struct.Struct(">B")

162

S_HEADER = struct.Struct(">BQQQ")

167

S_HEADER = struct.Struct(">BQQQ")

163

168

164

ID_SIZE = 8

169

ID_SIZE = 8

165

170

166

171

167

def _make_uid():

172

def _make_uid():

168

"""return a new unique identifier.

173

"""return a new unique identifier.

169

174

170

The identifier is random and composed of ascii characters."""

175

The identifier is random and composed of ascii characters."""

171

return nodemod.hex(os.urandom(ID_SIZE))

176

return nodemod.hex(os.urandom(ID_SIZE))

172

177

173

178

174

class NodeMapDocket(object):

179

class NodeMapDocket(object):

175

"""metadata associated with persistent nodemap data

180

"""metadata associated with persistent nodemap data

176

181

177

The persistent data may come from disk or be on their way to disk.

182

The persistent data may come from disk or be on their way to disk.

178

"""

183

"""

179

184

180

def __init__(self, uid=None):

185

def __init__(self, uid=None):

181

if uid is None:

186

if uid is None:

182

uid = _make_uid()

187

uid = _make_uid()

183

self.uid = uid

188

self.uid = uid

184

self.tip_rev = None

189

self.tip_rev = None

185

self.data_length = None

190

self.data_length = None

186

self.data_unused = 0

191

self.data_unused = 0

187

192

188

def copy(self):

193

def copy(self):

189

new = NodeMapDocket(uid=self.uid)

194

new = NodeMapDocket(uid=self.uid)

190

new.tip_rev = self.tip_rev

195

new.tip_rev = self.tip_rev

191

new.data_length = self.data_length

196

new.data_length = self.data_length

192

new.data_unused = self.data_unused

197

new.data_unused = self.data_unused

193

return new

198

return new

194

199

195

def __cmp__(self, other):

200

def __cmp__(self, other):

196

if self.uid < other.uid:

201

if self.uid < other.uid:

197

return -1

202

return -1

198

if self.uid > other.uid:

203

if self.uid > other.uid:

199

return 1

204

return 1

200

elif self.data_length < other.data_length:

205

elif self.data_length < other.data_length:

201

return -1

206

return -1

202

elif self.data_length > other.data_length:

207

elif self.data_length > other.data_length:

203

return 1

208

return 1

204

return 0

209

return 0

205

210

206

def __eq__(self, other):

211

def __eq__(self, other):

207

return self.uid == other.uid and self.data_length == other.data_length

212

return self.uid == other.uid and self.data_length == other.data_length

208

213

209

def serialize(self):

214

def serialize(self):

210

"""return serialized bytes for a docket using the passed uid"""

215

"""return serialized bytes for a docket using the passed uid"""

211

data = []

216

data = []

212

data.append(S_VERSION.pack(ONDISK_VERSION))

217

data.append(S_VERSION.pack(ONDISK_VERSION))

213

headers = (

218

headers = (

214

len(self.uid),

219

len(self.uid),

215

self.tip_rev,

220

self.tip_rev,

216

self.data_length,

221

self.data_length,

217

self.data_unused,

222

self.data_unused,

218

)

223

)

219

data.append(S_HEADER.pack(*headers))

224

data.append(S_HEADER.pack(*headers))

220

data.append(self.uid)

225

data.append(self.uid)

221

return b''.join(data)

226

return b''.join(data)

222

227

223

228

224

def _rawdata_filepath(revlog, docket):

229

def _rawdata_filepath(revlog, docket):

225

"""The (vfs relative) nodemap's rawdata file for a given uid"""

230

"""The (vfs relative) nodemap's rawdata file for a given uid"""

226

prefix = revlog.nodemap_file[:-2]

231

prefix = revlog.nodemap_file[:-2]

227

return b"%s-%s.nd" % (prefix, docket.uid)

232

return b"%s-%s.nd" % (prefix, docket.uid)

228

233

229

234

230

def _other_rawdata_filepath(revlog, docket):

235

def _other_rawdata_filepath(revlog, docket):

231

prefix = revlog.nodemap_file[:-2]

236

prefix = revlog.nodemap_file[:-2]

232

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

237

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

233

new_file_path = _rawdata_filepath(revlog, docket)

238

new_file_path = _rawdata_filepath(revlog, docket)

234

new_file_name = revlog.opener.basename(new_file_path)

239

new_file_name = revlog.opener.basename(new_file_path)

235

dirpath = revlog.opener.dirname(new_file_path)

240

dirpath = revlog.opener.dirname(new_file_path)

236

others = []

241

others = []

237

for f in revlog.opener.listdir(dirpath):

242

for f in revlog.opener.listdir(dirpath):

238

if pattern.match(f) and f != new_file_name:

243

if pattern.match(f) and f != new_file_name:

239

others.append(f)

244

others.append(f)

240

return others

245

return others

241

246

242

247

243

### Nodemap Trie

248

### Nodemap Trie

244

#

249

#

245

# This is a simple reference implementation to compute and persist a nodemap

250

# This is a simple reference implementation to compute and persist a nodemap

246

# trie. This reference implementation is write only. The python version of this

251

# trie. This reference implementation is write only. The python version of this

247

# is not expected to be actually used, since it wont provide performance

252

# is not expected to be actually used, since it wont provide performance

248

# improvement over existing non-persistent C implementation.

253

# improvement over existing non-persistent C implementation.

249

#

254

#

250

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

255

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

251

# revision can be adressed using its node shortest prefix.

256

# revision can be adressed using its node shortest prefix.

252

#

257

#

253

# The trie is stored as a sequence of block. Each block contains 16 entries

258

# The trie is stored as a sequence of block. Each block contains 16 entries

254

# (signed 64bit integer, big endian). Each entry can be one of the following:

259

# (signed 64bit integer, big endian). Each entry can be one of the following:

255

#

260

#

256

# * value >= 0 -> index of sub-block

261

# * value >= 0 -> index of sub-block

257

# * value == -1 -> no value

262

# * value == -1 -> no value

258

# * value < -1 -> a revision value: rev = -(value+10)

263

# * value < -1 -> a revision value: rev = -(value+10)

259

#

264

#

260

# The implementation focus on simplicity, not on performance. A Rust

265

# The implementation focus on simplicity, not on performance. A Rust

261

# implementation should provide a efficient version of the same binary

266

# implementation should provide a efficient version of the same binary

262

# persistence. This reference python implementation is never meant to be

267

# persistence. This reference python implementation is never meant to be

263

# extensively use in production.

268

# extensively use in production.

264

269

265

270

266

def persistent_data(index):

271

def persistent_data(index):

267

"""return the persistent binary form for a nodemap for a given index

272

"""return the persistent binary form for a nodemap for a given index

268

"""

273

"""

269

trie = _build_trie(index)

274

trie = _build_trie(index)

270

return _persist_trie(trie)

275

return _persist_trie(trie)

271

276

272

277

273

def update_persistent_data(index, root, max_idx, last_rev):

278

def update_persistent_data(index, root, max_idx, last_rev):

274

"""return the incremental update for persistent nodemap from a given index

279

"""return the incremental update for persistent nodemap from a given index

275

"""

280

"""

276

changed_block, trie = _update_trie(index, root, last_rev)

281

changed_block, trie = _update_trie(index, root, last_rev)

277

return (

282

return (

278

changed_block * S_BLOCK.size,

283

changed_block * S_BLOCK.size,

279

_persist_trie(trie, existing_idx=max_idx),

284

_persist_trie(trie, existing_idx=max_idx),

280

)

285

)

281

286

282

287

283

S_BLOCK = struct.Struct(">" + ("l" * 16))

288

S_BLOCK = struct.Struct(">" + ("l" * 16))

284

289

285

NO_ENTRY = -1

290

NO_ENTRY = -1

286

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

291

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

287

REV_OFFSET = 2

292

REV_OFFSET = 2

288

293

289

294

290

def _transform_rev(rev):

295

def _transform_rev(rev):

291

"""Return the number used to represent the rev in the tree.

296

"""Return the number used to represent the rev in the tree.

292

297

293

(or retrieve a rev number from such representation)

298

(or retrieve a rev number from such representation)

294

299

295

Note that this is an involution, a function equal to its inverse (i.e.

300

Note that this is an involution, a function equal to its inverse (i.e.

296

which gives the identity when applied to itself).

301

which gives the identity when applied to itself).

297

"""

302

"""

298

return -(rev + REV_OFFSET)

303

return -(rev + REV_OFFSET)

299

304

300

305

301

def _to_int(hex_digit):

306

def _to_int(hex_digit):

302

"""turn an hexadecimal digit into a proper integer"""

307

"""turn an hexadecimal digit into a proper integer"""

303

return int(hex_digit, 16)

308

return int(hex_digit, 16)

304

309

305

310

306

class Block(dict):

311

class Block(dict):

307

"""represent a block of the Trie

312

"""represent a block of the Trie

308

313

309

contains up to 16 entry indexed from 0 to 15"""

314

contains up to 16 entry indexed from 0 to 15"""

310

315

311

def __init__(self):

316

def __init__(self):

312

super(Block, self).__init__()

317

super(Block, self).__init__()

313

# If this block exist on disk, here is its ID

318

# If this block exist on disk, here is its ID

314

self.ondisk_id = None

319

self.ondisk_id = None

315

320

316

def __iter__(self):

321

def __iter__(self):

317

return iter(self.get(i) for i in range(16))

322

return iter(self.get(i) for i in range(16))

318

323

319

324

320

def _build_trie(index):

325

def _build_trie(index):

321

"""build a nodemap trie

326

"""build a nodemap trie

322

327

323

The nodemap stores revision number for each unique prefix.

328

The nodemap stores revision number for each unique prefix.

324

329

325

Each block is a dictionary with keys in `[0, 15]`. Values are either

330

Each block is a dictionary with keys in `[0, 15]`. Values are either

326

another block or a revision number.

331

another block or a revision number.

327

"""

332

"""

328

root = Block()

333

root = Block()

329

for rev in range(len(index)):

334

for rev in range(len(index)):

330

hex = nodemod.hex(index[rev][7])

335

hex = nodemod.hex(index[rev][7])

331

_insert_into_block(index, 0, root, rev, hex)

336

_insert_into_block(index, 0, root, rev, hex)

332

return root

337

return root

333

338

334

339

335

def _update_trie(index, root, last_rev):

340

def _update_trie(index, root, last_rev):

336

"""consume"""

341

"""consume"""

337

changed = 0

342

changed = 0

338

for rev in range(last_rev + 1, len(index)):

343

for rev in range(last_rev + 1, len(index)):

339

hex = nodemod.hex(index[rev][7])

344

hex = nodemod.hex(index[rev][7])

340

changed += _insert_into_block(index, 0, root, rev, hex)

345

changed += _insert_into_block(index, 0, root, rev, hex)

341

return changed, root

346

return changed, root

342

347

343

348

344

def _insert_into_block(index, level, block, current_rev, current_hex):

349

def _insert_into_block(index, level, block, current_rev, current_hex):

345

"""insert a new revision in a block

350

"""insert a new revision in a block

346

351

347

index: the index we are adding revision for

352

index: the index we are adding revision for

348

level: the depth of the current block in the trie

353

level: the depth of the current block in the trie

349

block: the block currently being considered

354

block: the block currently being considered

350

current_rev: the revision number we are adding

355

current_rev: the revision number we are adding

351

current_hex: the hexadecimal representation of the of that revision

356

current_hex: the hexadecimal representation of the of that revision

352

"""

357

"""

353

changed = 1

358

changed = 1

354

if block.ondisk_id is not None:

359

if block.ondisk_id is not None:

355

block.ondisk_id = None

360

block.ondisk_id = None

356

hex_digit = _to_int(current_hex[level : level + 1])

361

hex_digit = _to_int(current_hex[level : level + 1])

357

entry = block.get(hex_digit)

362

entry = block.get(hex_digit)

358

if entry is None:

363

if entry is None:

359

# no entry, simply store the revision number

364

# no entry, simply store the revision number

360

block[hex_digit] = current_rev

365

block[hex_digit] = current_rev

361

elif isinstance(entry, dict):

366

elif isinstance(entry, dict):

362

# need to recurse to an underlying block

367

# need to recurse to an underlying block

363

changed += _insert_into_block(

368

changed += _insert_into_block(

364

index, level + 1, entry, current_rev, current_hex

369

index, level + 1, entry, current_rev, current_hex

365

)

370

)

366

else:

371

else:

367

# collision with a previously unique prefix, inserting new

372

# collision with a previously unique prefix, inserting new

368

# vertices to fit both entry.

373

# vertices to fit both entry.

369

other_hex = nodemod.hex(index[entry][7])

374

other_hex = nodemod.hex(index[entry][7])

370

other_rev = entry

375

other_rev = entry

371

new = Block()

376

new = Block()

372

block[hex_digit] = new

377

block[hex_digit] = new

373

_insert_into_block(index, level + 1, new, other_rev, other_hex)

378

_insert_into_block(index, level + 1, new, other_rev, other_hex)

374

_insert_into_block(index, level + 1, new, current_rev, current_hex)

379

_insert_into_block(index, level + 1, new, current_rev, current_hex)

375

return changed

380

return changed

376

381

377

382

378

def _persist_trie(root, existing_idx=None):

383

def _persist_trie(root, existing_idx=None):

379

"""turn a nodemap trie into persistent binary data

384

"""turn a nodemap trie into persistent binary data

380

385

381

See `_build_trie` for nodemap trie structure"""

386

See `_build_trie` for nodemap trie structure"""

382

block_map = {}

387

block_map = {}

383

if existing_idx is not None:

388

if existing_idx is not None:

384

base_idx = existing_idx + 1

389

base_idx = existing_idx + 1

385

else:

390

else:

386

base_idx = 0

391

base_idx = 0

387

chunks = []

392

chunks = []

388

for tn in _walk_trie(root):

393

for tn in _walk_trie(root):

389

if tn.ondisk_id is not None:

394

if tn.ondisk_id is not None:

390

block_map[id(tn)] = tn.ondisk_id

395

block_map[id(tn)] = tn.ondisk_id

391

else:

396

else:

392

block_map[id(tn)] = len(chunks) + base_idx

397

block_map[id(tn)] = len(chunks) + base_idx

393

chunks.append(_persist_block(tn, block_map))

398

chunks.append(_persist_block(tn, block_map))

394

return b''.join(chunks)

399

return b''.join(chunks)

395

400

396

401

397

def _walk_trie(block):

402

def _walk_trie(block):

398

"""yield all the block in a trie

403

"""yield all the block in a trie

399

404

400

Children blocks are always yield before their parent block.

405

Children blocks are always yield before their parent block.

401

"""

406

"""

402

for (_, item) in sorted(block.items()):

407

for (_, item) in sorted(block.items()):

403

if isinstance(item, dict):

408

if isinstance(item, dict):

404

for sub_block in _walk_trie(item):

409

for sub_block in _walk_trie(item):

405

yield sub_block

410

yield sub_block

406

yield block

411

yield block

407

412

408

413

409

def _persist_block(block_node, block_map):

414

def _persist_block(block_node, block_map):

410

"""produce persistent binary data for a single block

415

"""produce persistent binary data for a single block

411

416

412

Children block are assumed to be already persisted and present in

417

Children block are assumed to be already persisted and present in

413

block_map.

418

block_map.

414

"""

419

"""

415

data = tuple(_to_value(v, block_map) for v in block_node)

420

data = tuple(_to_value(v, block_map) for v in block_node)

416

return S_BLOCK.pack(*data)

421

return S_BLOCK.pack(*data)

417

422

418

423

419

def _to_value(item, block_map):

424

def _to_value(item, block_map):

420

"""persist any value as an integer"""

425

"""persist any value as an integer"""

421

if item is None:

426

if item is None:

422

return NO_ENTRY

427

return NO_ENTRY

423

elif isinstance(item, dict):

428

elif isinstance(item, dict):

424

return block_map[id(item)]

429

return block_map[id(item)]

425

else:

430

else:

426

return _transform_rev(item)

431

return _transform_rev(item)

427

432

428

433

429

def parse_data(data):

434

def parse_data(data):

430

"""parse parse nodemap data into a nodemap Trie"""

435

"""parse parse nodemap data into a nodemap Trie"""

431

if (len(data) % S_BLOCK.size) != 0:

436

if (len(data) % S_BLOCK.size) != 0:

432

msg = "nodemap data size is not a multiple of block size (%d): %d"

437

msg = "nodemap data size is not a multiple of block size (%d): %d"

433

raise error.Abort(msg % (S_BLOCK.size, len(data)))

438

raise error.Abort(msg % (S_BLOCK.size, len(data)))

434

if not data:

439

if not data:

435

return Block(), None

440

return Block(), None

436

block_map = {}

441

block_map = {}

437

new_blocks = []

442

new_blocks = []

438

for i in range(0, len(data), S_BLOCK.size):

443

for i in range(0, len(data), S_BLOCK.size):

439

block = Block()

444

block = Block()

440

block.ondisk_id = len(block_map)

445

block.ondisk_id = len(block_map)

441

block_map[block.ondisk_id] = block

446

block_map[block.ondisk_id] = block

442

block_data = data[i : i + S_BLOCK.size]

447

block_data = data[i : i + S_BLOCK.size]

443

values = S_BLOCK.unpack(block_data)

448

values = S_BLOCK.unpack(block_data)

444

new_blocks.append((block, values))

449

new_blocks.append((block, values))

445

for b, values in new_blocks:

450

for b, values in new_blocks:

446

for idx, v in enumerate(values):

451

for idx, v in enumerate(values):

447

if v == NO_ENTRY:

452

if v == NO_ENTRY:

448

continue

453

continue

449

elif v >= 0:

454

elif v >= 0:

450

b[idx] = block_map[v]

455

b[idx] = block_map[v]

451

else:

456

else:

452

b[idx] = _transform_rev(v)

457

b[idx] = _transform_rev(v)

453

return block, i // S_BLOCK.size

458

return block, i // S_BLOCK.size

454

459

455

460

456

# debug utility

461

# debug utility

457

462

458

463

459

def check_data(ui, index, data):

464

def check_data(ui, index, data):

460

"""verify that the provided nodemap data are valid for the given idex"""

465

"""verify that the provided nodemap data are valid for the given idex"""

461

ret = 0

466

ret = 0

462

ui.status((b"revision in index: %d\n") % len(index))

467

ui.status((b"revision in index: %d\n") % len(index))

463

root, __ = parse_data(data)

468

root, __ = parse_data(data)

464

all_revs = set(_all_revisions(root))

469

all_revs = set(_all_revisions(root))

465

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

470

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

466

for r in range(len(index)):

471

for r in range(len(index)):

467

if r not in all_revs:

472

if r not in all_revs:

468

msg = b" revision missing from nodemap: %d\n" % r

473

msg = b" revision missing from nodemap: %d\n" % r

469

ui.write_err(msg)

474

ui.write_err(msg)

470

ret = 1

475

ret = 1

471

else:

476

else:

472

all_revs.remove(r)

477

all_revs.remove(r)

473

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

478

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

474

if nm_rev is None:

479

if nm_rev is None:

475

msg = b" revision node does not match any entries: %d\n" % r

480

msg = b" revision node does not match any entries: %d\n" % r

476

ui.write_err(msg)

481

ui.write_err(msg)

477

ret = 1

482

ret = 1

478

elif nm_rev != r:

483

elif nm_rev != r:

479

msg = (

484

msg = (

480

b" revision node does not match the expected revision: "

485

b" revision node does not match the expected revision: "

481

b"%d != %d\n" % (r, nm_rev)

486

b"%d != %d\n" % (r, nm_rev)

482

)

487

)

483

ui.write_err(msg)

488

ui.write_err(msg)

484

ret = 1

489

ret = 1

485

490

486

if all_revs:

491

if all_revs:

487

for r in sorted(all_revs):

492

for r in sorted(all_revs):

488

msg = b" extra revision in nodemap: %d\n" % r

493

msg = b" extra revision in nodemap: %d\n" % r

489

ui.write_err(msg)

494

ui.write_err(msg)

490

ret = 1

495

ret = 1

491

return ret

496

return ret

492

497

493

498

494

def _all_revisions(root):

499

def _all_revisions(root):

495

"""return all revisions stored in a Trie"""

500

"""return all revisions stored in a Trie"""

496

for block in _walk_trie(root):

501

for block in _walk_trie(root):

497

for v in block:

502

for v in block:

498

if v is None or isinstance(v, Block):

503

if v is None or isinstance(v, Block):

499

continue

504

continue

500

yield v

505

yield v

501

506

502

507

503

def _find_node(block, node):

508

def _find_node(block, node):

504

"""find the revision associated with a given node"""

509

"""find the revision associated with a given node"""

505

entry = block.get(_to_int(node[0:1]))

510

entry = block.get(_to_int(node[0:1]))

506

if isinstance(entry, dict):

511

if isinstance(entry, dict):

507

return _find_node(entry, node[1:])

512

return _find_node(entry, node[1:])

508

return entry

513

return entry

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # nodemap.py - nodemap related code and utilities
             #
             # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2019 George Racinet <georges.racinet@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             import re
             import struct
             from .. import (
                 error,
                 node as nodemod,
                 util,
             )
             class NodeMap(dict):
                 def __missing__(self, x):
                     raise error.RevlogError(b'unknown node: %s' % x)
             def persisted_data(revlog):
                 """read the nodemap for a revlog from disk"""
                 if revlog.nodemap_file is None:
                     return None
                 pdata = revlog.opener.tryread(revlog.nodemap_file)
                 if not pdata:
                     return None
                 offset = 0
                 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
                 if version != ONDISK_VERSION:
                     return None
                 offset += S_VERSION.size
                 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
                 uid_size, tip_rev, data_length, data_unused = headers
                 offset += S_HEADER.size
                 docket = NodeMapDocket(pdata[offset : offset + uid_size])
                 docket.tip_rev = tip_rev
                 docket.data_length = data_length
                 docket.data_unused = data_unused
                 filename = _rawdata_filepath(revlog, docket)
-                return docket, revlog.opener.tryread(filename)
+                data = revlog.opener.tryread(filename)
+                if len(data) < data_length:
+                    return None
+                elif len(data) > data_length:
+                    data = data[:data_length]
+                return docket, data
             def setup_persistent_nodemap(tr, revlog):
                 """Install whatever is needed transaction side to persist a nodemap on disk
                 (only actually persist the nodemap if this is relevant for this revlog)
                 """
                 if revlog._inline:
                     return  # inlined revlog are too small for this to be relevant
                 if revlog.nodemap_file is None:
                     return  # we do not use persistent_nodemap on this revlog
                 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
                 if tr.hasfinalize(callback_id):
                     return  # no need to register again
                 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
             def _persist_nodemap(tr, revlog):
                 """Write nodemap data on disk for a given revlog
                 """
                 if getattr(revlog, 'filteredrevs', ()):
                     raise error.ProgrammingError(
                         "cannot persist nodemap of a filtered changelog"
                     )
                 if revlog.nodemap_file is None:
                     msg = "calling persist nodemap on a revlog without the feature enableb"
                     raise error.ProgrammingError(msg)
                 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
                 ondisk_docket = revlog._nodemap_docket
                 data = None
                 # first attemp an incremental update of the data
                 if can_incremental and ondisk_docket is not None:
                     target_docket = revlog._nodemap_docket.copy()
                     (
                         src_docket,
                         data_changed_count,
                         data,
                     ) = revlog.index.nodemap_data_incremental()
                     if src_docket != target_docket:
                         data = None
                     else:
                         datafile = _rawdata_filepath(revlog, target_docket)
                         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                         # store vfs
                         with revlog.opener(datafile, b'r+') as fd:
                             fd.seek(target_docket.data_length)
                             fd.write(data)
                         target_docket.data_length += len(data)
                         target_docket.data_unused += data_changed_count
                 if data is None:
                     # otherwise fallback to a full new export
                     target_docket = NodeMapDocket()
                     datafile = _rawdata_filepath(revlog, target_docket)
                     if util.safehasattr(revlog.index, "nodemap_data_all"):
                         data = revlog.index.nodemap_data_all()
                     else:
                         data = persistent_data(revlog.index)
                     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                     # store vfs
                     with revlog.opener(datafile, b'w') as fd:
                         fd.write(data)
                     target_docket.data_length = len(data)
                 target_docket.tip_rev = revlog.tiprev()
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
                     fp.write(target_docket.serialize())
                 revlog._nodemap_docket = target_docket
                 # EXP-TODO: if the transaction abort, we should remove the new data and
                 # reinstall the old one.
                 # search for old index file in all cases, some older process might have
                 # left one behind.
                 olds = _other_rawdata_filepath(revlog, target_docket)
                 if olds:
                     realvfs = getattr(revlog, '_realopener', revlog.opener)
                     def cleanup(tr):
                         for oldfile in olds:
                             realvfs.tryunlink(oldfile)
                     callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
                     tr.addpostclose(callback_id, cleanup)
             ### Nodemap docket file
             #
             # The nodemap data are stored on disk using 2 files:
             #
             # * a raw data files containing a persistent nodemap
             #   (see `Nodemap Trie` section)
             #
             # * a small "docket" file containing medatadata
             #
             # While the nodemap data can be multiple tens of megabytes, the "docket" is
             # small, it is easy to update it automatically or to duplicated its content
             # during a transaction.
             #
             # Multiple raw data can exist at the same time (The currently valid one and a
             # new one beind used by an in progress transaction). To accomodate this, the
             # filename hosting the raw data has a variable parts. The exact filename is
             # specified inside the "docket" file.
             #
             # The docket file contains information to find, qualify and validate the raw
             # data. Its content is currently very light, but it will expand as the on disk
             # nodemap gains the necessary features to be used in production.
             # version 0 is experimental, no BC garantee, do no use outside of tests.
             ONDISK_VERSION = 0
             S_VERSION = struct.Struct(">B")
             S_HEADER = struct.Struct(">BQQQ")
             ID_SIZE = 8
             def _make_uid():
                 """return a new unique identifier.
                 The identifier is random and composed of ascii characters."""
                 return nodemod.hex(os.urandom(ID_SIZE))
             class NodeMapDocket(object):
                 """metadata associated with persistent nodemap data
                 The persistent data may come from disk or be on their way to disk.
                 """
                 def __init__(self, uid=None):
                     if uid is None:
                         uid = _make_uid()
                     self.uid = uid
                     self.tip_rev = None
                     self.data_length = None
                     self.data_unused = 0
                 def copy(self):
                     new = NodeMapDocket(uid=self.uid)
                     new.tip_rev = self.tip_rev
                     new.data_length = self.data_length
                     new.data_unused = self.data_unused
                     return new
                 def __cmp__(self, other):
                     if self.uid < other.uid:
                         return -1
                     if self.uid > other.uid:
                         return 1
                     elif self.data_length < other.data_length:
                         return -1
                     elif self.data_length > other.data_length:
                         return 1
                     return 0
                 def __eq__(self, other):
                     return self.uid == other.uid and self.data_length == other.data_length
                 def serialize(self):
                     """return serialized bytes for a docket using the passed uid"""
                     data = []
                     data.append(S_VERSION.pack(ONDISK_VERSION))
                     headers = (
                         len(self.uid),
                         self.tip_rev,
                         self.data_length,
                         self.data_unused,
                     )
                     data.append(S_HEADER.pack(*headers))
                     data.append(self.uid)
                     return b''.join(data)
             def _rawdata_filepath(revlog, docket):
                 """The (vfs relative) nodemap's rawdata file for a given uid"""
                 prefix = revlog.nodemap_file[:-2]
                 return b"%s-%s.nd" % (prefix, docket.uid)
             def _other_rawdata_filepath(revlog, docket):
                 prefix = revlog.nodemap_file[:-2]
                 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
                 new_file_path = _rawdata_filepath(revlog, docket)
                 new_file_name = revlog.opener.basename(new_file_path)
                 dirpath = revlog.opener.dirname(new_file_path)
                 others = []
                 for f in revlog.opener.listdir(dirpath):
                     if pattern.match(f) and f != new_file_name:
                         others.append(f)
                 return others
             ### Nodemap Trie
             #
             # This is a simple reference implementation to compute and persist a nodemap
             # trie. This reference implementation is write only. The python version of this
             # is not expected to be actually used, since it wont provide performance
             # improvement over existing non-persistent C implementation.
             #
             # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
             # revision can be adressed using its node shortest prefix.
             #
             # The trie is stored as a sequence of block. Each block contains 16 entries
             # (signed 64bit integer, big endian). Each entry can be one of the following:
             #
             #  * value >=  0 -> index of sub-block
             #  * value == -1 -> no value
             #  * value <  -1 -> a revision value: rev = -(value+10)
             #
             # The implementation focus on simplicity, not on performance. A Rust
             # implementation should provide a efficient version of the same binary
             # persistence. This reference python implementation is never meant to be
             # extensively use in production.
             def persistent_data(index):
                 """return the persistent binary form for a nodemap for a given index
                 """
                 trie = _build_trie(index)
                 return _persist_trie(trie)
             def update_persistent_data(index, root, max_idx, last_rev):
                 """return the incremental update for persistent nodemap from a given index
                 """
                 changed_block, trie = _update_trie(index, root, last_rev)
                 return (
                     changed_block * S_BLOCK.size,
                     _persist_trie(trie, existing_idx=max_idx),
                 )
             S_BLOCK = struct.Struct(">" + ("l" * 16))
             NO_ENTRY = -1
             # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
             REV_OFFSET = 2
             def _transform_rev(rev):
                 """Return the number used to represent the rev in the tree.
                 (or retrieve a rev number from such representation)
                 Note that this is an involution, a function equal to its inverse (i.e.
                 which gives the identity when applied to itself).
                 """
                 return -(rev + REV_OFFSET)
             def _to_int(hex_digit):
                 """turn an hexadecimal digit into a proper integer"""
                 return int(hex_digit, 16)
             class Block(dict):
                 """represent a block of the Trie
                 contains up to 16 entry indexed from 0 to 15"""
                 def __init__(self):
                     super(Block, self).__init__()
                     # If this block exist on disk, here is its ID
                     self.ondisk_id = None
                 def __iter__(self):
                     return iter(self.get(i) for i in range(16))
             def _build_trie(index):
                 """build a nodemap trie
                 The nodemap stores revision number for each unique prefix.
                 Each block is a dictionary with keys in `[0, 15]`. Values are either
                 another block or a revision number.
                 """
                 root = Block()
                 for rev in range(len(index)):
                     hex = nodemod.hex(index[rev][7])
                     _insert_into_block(index, 0, root, rev, hex)
                 return root
             def _update_trie(index, root, last_rev):
                 """consume"""
                 changed = 0
                 for rev in range(last_rev + 1, len(index)):
                     hex = nodemod.hex(index[rev][7])
                     changed += _insert_into_block(index, 0, root, rev, hex)
                 return changed, root
             def _insert_into_block(index, level, block, current_rev, current_hex):
                 """insert a new revision in a block
                 index: the index we are adding revision for
                 level: the depth of the current block in the trie
                 block: the block currently being considered
                 current_rev: the revision number we are adding
                 current_hex: the hexadecimal representation of the of that revision
                 """
                 changed = 1
                 if block.ondisk_id is not None:
                     block.ondisk_id = None
                 hex_digit = _to_int(current_hex[level : level + 1])
                 entry = block.get(hex_digit)
                 if entry is None:
                     # no entry, simply store the revision number
                     block[hex_digit] = current_rev
                 elif isinstance(entry, dict):
                     # need to recurse to an underlying block
                     changed += _insert_into_block(
                         index, level + 1, entry, current_rev, current_hex
                     )
                 else:
                     # collision with a previously unique prefix, inserting new
                     # vertices to fit both entry.
                     other_hex = nodemod.hex(index[entry][7])
                     other_rev = entry
                     new = Block()
                     block[hex_digit] = new
                     _insert_into_block(index, level + 1, new, other_rev, other_hex)
                     _insert_into_block(index, level + 1, new, current_rev, current_hex)
                 return changed
             def _persist_trie(root, existing_idx=None):
                 """turn a nodemap trie into persistent binary data
                 See `_build_trie` for nodemap trie structure"""
                 block_map = {}
                 if existing_idx is not None:
                     base_idx = existing_idx + 1
                 else:
                     base_idx = 0
                 chunks = []
                 for tn in _walk_trie(root):
                     if tn.ondisk_id is not None:
                         block_map[id(tn)] = tn.ondisk_id
                     else:
                         block_map[id(tn)] = len(chunks) + base_idx
                         chunks.append(_persist_block(tn, block_map))
                 return b''.join(chunks)
             def _walk_trie(block):
                 """yield all the block in a trie
                 Children blocks are always yield before their parent block.
                 """
                 for (_, item) in sorted(block.items()):
                     if isinstance(item, dict):
                         for sub_block in _walk_trie(item):
                             yield sub_block
                 yield block
             def _persist_block(block_node, block_map):
                 """produce persistent binary data for a single block
                 Children block are assumed to be already persisted and present in
                 block_map.
                 """
                 data = tuple(_to_value(v, block_map) for v in block_node)
                 return S_BLOCK.pack(*data)
             def _to_value(item, block_map):
                 """persist any value as an integer"""
                 if item is None:
                     return NO_ENTRY
                 elif isinstance(item, dict):
                     return block_map[id(item)]
                 else:
                     return _transform_rev(item)
             def parse_data(data):
                 """parse parse nodemap data into a nodemap Trie"""
                 if (len(data) % S_BLOCK.size) != 0:
                     msg = "nodemap data size is not a multiple of block size (%d): %d"
                     raise error.Abort(msg % (S_BLOCK.size, len(data)))
                 if not data:
                     return Block(), None
                 block_map = {}
                 new_blocks = []
                 for i in range(0, len(data), S_BLOCK.size):
                     block = Block()
                     block.ondisk_id = len(block_map)
                     block_map[block.ondisk_id] = block
                     block_data = data[i : i + S_BLOCK.size]
                     values = S_BLOCK.unpack(block_data)
                     new_blocks.append((block, values))
                 for b, values in new_blocks:
                     for idx, v in enumerate(values):
                         if v == NO_ENTRY:
                             continue
                         elif v >= 0:
                             b[idx] = block_map[v]
                         else:
                             b[idx] = _transform_rev(v)
                 return block, i // S_BLOCK.size
             # debug utility
             def check_data(ui, index, data):
                 """verify that the provided nodemap data are valid for the given idex"""
                 ret = 0
                 ui.status((b"revision in index:   %d\n") % len(index))
                 root, __ = parse_data(data)
                 all_revs = set(_all_revisions(root))
                 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
                 for r in range(len(index)):
                     if r not in all_revs:
                         msg = b"  revision missing from nodemap: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     else:
                         all_revs.remove(r)
                     nm_rev = _find_node(root, nodemod.hex(index[r][7]))
                     if nm_rev is None:
                         msg = b"  revision node does not match any entries: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     elif nm_rev != r:
                         msg = (
                             b"  revision node does not match the expected revision: "
                             b"%d != %d\n" % (r, nm_rev)
                         )
                         ui.write_err(msg)
                         ret = 1
                 if all_revs:
                     for r in sorted(all_revs):
                         msg = b"  extra revision in  nodemap: %d\n" % r
                         ui.write_err(msg)
                     ret = 1
                 return ret
             def _all_revisions(root):
                 """return all revisions stored in a Trie"""
                 for block in _walk_trie(root):
                     for v in block:
                         if v is None or isinstance(v, Block):
                             continue
                         yield v
             def _find_node(block, node):
                 """find the revision associated with a given node"""
                 entry = block.get(_to_int(node[0:1]))
                 if isinstance(entry, dict):
                     return _find_node(entry, node[1:])
                 return entry