upstream/mercurial-mirror Commit - r45004:448d700e

1

# nodemap.py - nodemap related code and utilities

1

# nodemap.py - nodemap related code and utilities

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import errno

11

import errno

12

import os

12

import os

13

import re

13

import re

14

import struct

14

import struct

15

16

from .. import (

16

from .. import (

17

error,

17

error,

18

node as nodemod,

18

node as nodemod,

19

util,

19

util,

20

)

20

)

21

22

23

class NodeMap(dict):

23

class NodeMap(dict):

24

def __missing__(self, x):

24

def __missing__(self, x):

25

raise error.RevlogError(b'unknown node: %s' % x)

25

raise error.RevlogError(b'unknown node: %s' % x)

26

27

28

def persisted_data(revlog):

28

def persisted_data(revlog):

29

"""read the nodemap for a revlog from disk"""

29

"""read the nodemap for a revlog from disk"""

30

if revlog.nodemap_file is None:

30

if revlog.nodemap_file is None:

31

return None

31

return None

32

pdata = revlog.opener.tryread(revlog.nodemap_file)

32

pdata = revlog.opener.tryread(revlog.nodemap_file)

33

if not pdata:

33

if not pdata:

34

return None

34

return None

35

offset = 0

35

offset = 0

36

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

36

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

37

if version != ONDISK_VERSION:

37

if version != ONDISK_VERSION:

38

return None

38

return None

39

offset += S_VERSION.size

39

offset += S_VERSION.size

40

headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

40

headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

41

uid_size, tip_rev, data_length, data_unused, tip_node_size = headers

41

uid_size, tip_rev, data_length, data_unused, tip_node_size = headers

42

offset += S_HEADER.size

42

offset += S_HEADER.size

43

docket = NodeMapDocket(pdata[offset : offset + uid_size])

43

docket = NodeMapDocket(pdata[offset : offset + uid_size])

44

offset += uid_size

44

offset += uid_size

45

docket.tip_rev = tip_rev

45

docket.tip_rev = tip_rev

46

docket.tip_node = pdata[offset : offset + tip_node_size]

46

docket.tip_node = pdata[offset : offset + tip_node_size]

47

docket.data_length = data_length

47

docket.data_length = data_length

48

docket.data_unused = data_unused

48

docket.data_unused = data_unused

49

50

filename = _rawdata_filepath(revlog, docket)

50

filename = _rawdata_filepath(revlog, docket)

51

use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")

51

use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")

52

try:

52

try:

53

with revlog.opener(filename) as fd:

53

with revlog.opener(filename) as fd:

54

if use_mmap:

54

if use_mmap:

55

data = util.buffer(util.mmapread(fd, data_length))

55

data = util.buffer(util.mmapread(fd, data_length))

56

else:

56

else:

57

data = fd.read(data_length)

57

data = fd.read(data_length)

58

except OSError as e:

58

except OSError as e:

59

if e.errno != errno.ENOENT:

59

if e.errno != errno.ENOENT:

60

raise

60

raise

61

if len(data) < data_length:

61

if len(data) < data_length:

62

return None

62

return None

63

return docket, data

63

return docket, data

64

65

66

def setup_persistent_nodemap(tr, revlog):

66

def setup_persistent_nodemap(tr, revlog):

67

"""Install whatever is needed transaction side to persist a nodemap on disk

67

"""Install whatever is needed transaction side to persist a nodemap on disk

68

69

(only actually persist the nodemap if this is relevant for this revlog)

69

(only actually persist the nodemap if this is relevant for this revlog)

70

"""

70

"""

71

if revlog._inline:

71

if revlog._inline:

72

return # inlined revlog are too small for this to be relevant

72

return # inlined revlog are too small for this to be relevant

73

if revlog.nodemap_file is None:

73

if revlog.nodemap_file is None:

74

return # we do not use persistent_nodemap on this revlog

74

return # we do not use persistent_nodemap on this revlog

75

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

75

76

# we need to happen after the changelog finalization, in that use "cl-"

77

callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file

76

if tr.hasfinalize(callback_id):

78

if tr.hasfinalize(callback_id):

77

return # no need to register again

79

return # no need to register again

78

tr.addpending(

80

tr.addpending(

79

callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)

81

callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)

80

)

82

)

81

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

83

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

82

84

83

85

84

class _NoTransaction(object):

86

class _NoTransaction(object):

85

"""transaction like object to update the nodemap outside a transaction

87

"""transaction like object to update the nodemap outside a transaction

86

"""

88

"""

87

89

88

def __init__(self):

90

def __init__(self):

89

self._postclose = {}

91

self._postclose = {}

90

92

91

def addpostclose(self, callback_id, callback_func):

93

def addpostclose(self, callback_id, callback_func):

92

self._postclose[callback_id] = callback_func

94

self._postclose[callback_id] = callback_func

93

95

94

96

95

def update_persistent_nodemap(revlog):

97

def update_persistent_nodemap(revlog):

96

"""update the persistent nodemap right now

98

"""update the persistent nodemap right now

97

99

98

To be used for updating the nodemap on disk outside of a normal transaction

100

To be used for updating the nodemap on disk outside of a normal transaction

99

setup (eg, `debugupdatecache`).

101

setup (eg, `debugupdatecache`).

100

"""

102

"""

101

notr = _NoTransaction()

103

notr = _NoTransaction()

102

_persist_nodemap(notr, revlog)

104

_persist_nodemap(notr, revlog)

103

for k in sorted(notr._postclose):

105

for k in sorted(notr._postclose):

104

notr._postclose[k](None)

106

notr._postclose[k](None)

105

107

106

108

107

def _persist_nodemap(tr, revlog, pending=False):

109

def _persist_nodemap(tr, revlog, pending=False):

108

"""Write nodemap data on disk for a given revlog

110

"""Write nodemap data on disk for a given revlog

109

"""

111

"""

110

if getattr(revlog, 'filteredrevs', ()):

112

if getattr(revlog, 'filteredrevs', ()):

111

raise error.ProgrammingError(

113

raise error.ProgrammingError(

112

"cannot persist nodemap of a filtered changelog"

114

"cannot persist nodemap of a filtered changelog"

113

)

115

)

114

if revlog.nodemap_file is None:

116

if revlog.nodemap_file is None:

115

msg = "calling persist nodemap on a revlog without the feature enableb"

117

msg = "calling persist nodemap on a revlog without the feature enableb"

116

raise error.ProgrammingError(msg)

118

raise error.ProgrammingError(msg)

117

119

118

can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")

120

can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")

119

ondisk_docket = revlog._nodemap_docket

121

ondisk_docket = revlog._nodemap_docket

120

feed_data = util.safehasattr(revlog.index, "update_nodemap_data")

122

feed_data = util.safehasattr(revlog.index, "update_nodemap_data")

121

use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")

123

use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")

122

124

123

data = None

125

data = None

124

# first attemp an incremental update of the data

126

# first attemp an incremental update of the data

125

if can_incremental and ondisk_docket is not None:

127

if can_incremental and ondisk_docket is not None:

126

target_docket = revlog._nodemap_docket.copy()

128

target_docket = revlog._nodemap_docket.copy()

127

(

129

(

128

src_docket,

130

src_docket,

129

data_changed_count,

131

data_changed_count,

130

data,

132

data,

131

) = revlog.index.nodemap_data_incremental()

133

) = revlog.index.nodemap_data_incremental()

132

if src_docket != target_docket:

134

if src_docket != target_docket:

133

data = None

135

data = None

134

else:

136

else:

135

datafile = _rawdata_filepath(revlog, target_docket)

137

datafile = _rawdata_filepath(revlog, target_docket)

136

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

138

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

137

# store vfs

139

# store vfs

138

new_length = target_docket.data_length + len(data)

140

new_length = target_docket.data_length + len(data)

139

with revlog.opener(datafile, b'r+') as fd:

141

with revlog.opener(datafile, b'r+') as fd:

140

fd.seek(target_docket.data_length)

142

fd.seek(target_docket.data_length)

141

fd.write(data)

143

fd.write(data)

142

if feed_data:

144

if feed_data:

143

if use_mmap:

145

if use_mmap:

144

fd.seek(0)

146

fd.seek(0)

145

new_data = fd.read(new_length)

147

new_data = fd.read(new_length)

146

else:

148

else:

147

fd.flush()

149

fd.flush()

148

new_data = util.buffer(util.mmapread(fd, new_length))

150

new_data = util.buffer(util.mmapread(fd, new_length))

149

target_docket.data_length = new_length

151

target_docket.data_length = new_length

150

target_docket.data_unused += data_changed_count

152

target_docket.data_unused += data_changed_count

151

153

152

if data is None:

154

if data is None:

153

# otherwise fallback to a full new export

155

# otherwise fallback to a full new export

154

target_docket = NodeMapDocket()

156

target_docket = NodeMapDocket()

155

datafile = _rawdata_filepath(revlog, target_docket)

157

datafile = _rawdata_filepath(revlog, target_docket)

156

if util.safehasattr(revlog.index, "nodemap_data_all"):

158

if util.safehasattr(revlog.index, "nodemap_data_all"):

157

data = revlog.index.nodemap_data_all()

159

data = revlog.index.nodemap_data_all()

158

else:

160

else:

159

data = persistent_data(revlog.index)

161

data = persistent_data(revlog.index)

160

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

162

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

161

# store vfs

163

# store vfs

162

with revlog.opener(datafile, b'w+') as fd:

164

with revlog.opener(datafile, b'w+') as fd:

163

fd.write(data)

165

fd.write(data)

164

if feed_data:

166

if feed_data:

165

if use_mmap:

167

if use_mmap:

166

new_data = data

168

new_data = data

167

else:

169

else:

168

fd.flush()

170

fd.flush()

169

new_data = util.buffer(util.mmapread(fd, len(data)))

171

new_data = util.buffer(util.mmapread(fd, len(data)))

170

target_docket.data_length = len(data)

172

target_docket.data_length = len(data)

171

target_docket.tip_rev = revlog.tiprev()

173

target_docket.tip_rev = revlog.tiprev()

172

target_docket.tip_node = revlog.node(target_docket.tip_rev)

174

target_docket.tip_node = revlog.node(target_docket.tip_rev)

173

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

175

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

174

# store vfs

176

# store vfs

175

file_path = revlog.nodemap_file

177

file_path = revlog.nodemap_file

176

if pending:

178

if pending:

177

file_path += b'.a'

179

file_path += b'.a'

178

with revlog.opener(file_path, b'w', atomictemp=True) as fp:

180

with revlog.opener(file_path, b'w', atomictemp=True) as fp:

179

fp.write(target_docket.serialize())

181

fp.write(target_docket.serialize())

180

revlog._nodemap_docket = target_docket

182

revlog._nodemap_docket = target_docket

181

if feed_data:

183

if feed_data:

182

revlog.index.update_nodemap_data(target_docket, new_data)

184

revlog.index.update_nodemap_data(target_docket, new_data)

183

185

184

# EXP-TODO: if the transaction abort, we should remove the new data and

186

# EXP-TODO: if the transaction abort, we should remove the new data and

185

# reinstall the old one.

187

# reinstall the old one.

186

188

187

# search for old index file in all cases, some older process might have

189

# search for old index file in all cases, some older process might have

188

# left one behind.

190

# left one behind.

189

olds = _other_rawdata_filepath(revlog, target_docket)

191

olds = _other_rawdata_filepath(revlog, target_docket)

190

if olds:

192

if olds:

191

realvfs = getattr(revlog, '_realopener', revlog.opener)

193

realvfs = getattr(revlog, '_realopener', revlog.opener)

192

194

193

def cleanup(tr):

195

def cleanup(tr):

194

for oldfile in olds:

196

for oldfile in olds:

195

realvfs.tryunlink(oldfile)

197

realvfs.tryunlink(oldfile)

196

198

197

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

199

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

198

tr.addpostclose(callback_id, cleanup)

200

tr.addpostclose(callback_id, cleanup)

199

201

200

202

201

### Nodemap docket file

203

### Nodemap docket file

202

#

204

#

203

# The nodemap data are stored on disk using 2 files:

205

# The nodemap data are stored on disk using 2 files:

204

#

206

#

205

# * a raw data files containing a persistent nodemap

207

# * a raw data files containing a persistent nodemap

206

# (see `Nodemap Trie` section)

208

# (see `Nodemap Trie` section)

207

#

209

#

208

# * a small "docket" file containing medatadata

210

# * a small "docket" file containing medatadata

209

#

211

#

210

# While the nodemap data can be multiple tens of megabytes, the "docket" is

212

# While the nodemap data can be multiple tens of megabytes, the "docket" is

211

# small, it is easy to update it automatically or to duplicated its content

213

# small, it is easy to update it automatically or to duplicated its content

212

# during a transaction.

214

# during a transaction.

213

#

215

#

214

# Multiple raw data can exist at the same time (The currently valid one and a

216

# Multiple raw data can exist at the same time (The currently valid one and a

215

# new one beind used by an in progress transaction). To accomodate this, the

217

# new one beind used by an in progress transaction). To accomodate this, the

216

# filename hosting the raw data has a variable parts. The exact filename is

218

# filename hosting the raw data has a variable parts. The exact filename is

217

# specified inside the "docket" file.

219

# specified inside the "docket" file.

218

#

220

#

219

# The docket file contains information to find, qualify and validate the raw

221

# The docket file contains information to find, qualify and validate the raw

220

# data. Its content is currently very light, but it will expand as the on disk

222

# data. Its content is currently very light, but it will expand as the on disk

221

# nodemap gains the necessary features to be used in production.

223

# nodemap gains the necessary features to be used in production.

222

224

223

# version 0 is experimental, no BC garantee, do no use outside of tests.

225

# version 0 is experimental, no BC garantee, do no use outside of tests.

224

ONDISK_VERSION = 0

226

ONDISK_VERSION = 0

225

S_VERSION = struct.Struct(">B")

227

S_VERSION = struct.Struct(">B")

226

S_HEADER = struct.Struct(">BQQQQ")

228

S_HEADER = struct.Struct(">BQQQQ")

227

229

228

ID_SIZE = 8

230

ID_SIZE = 8

229

231

230

232

231

def _make_uid():

233

def _make_uid():

232

"""return a new unique identifier.

234

"""return a new unique identifier.

233

235

234

The identifier is random and composed of ascii characters."""

236

The identifier is random and composed of ascii characters."""

235

return nodemod.hex(os.urandom(ID_SIZE))

237

return nodemod.hex(os.urandom(ID_SIZE))

236

238

237

239

238

class NodeMapDocket(object):

240

class NodeMapDocket(object):

239

"""metadata associated with persistent nodemap data

241

"""metadata associated with persistent nodemap data

240

242

241

The persistent data may come from disk or be on their way to disk.

243

The persistent data may come from disk or be on their way to disk.

242

"""

244

"""

243

245

244

def __init__(self, uid=None):

246

def __init__(self, uid=None):

245

if uid is None:

247

if uid is None:

246

uid = _make_uid()

248

uid = _make_uid()

247

# a unique identifier for the data file:

249

# a unique identifier for the data file:

248

# - When new data are appended, it is preserved.

250

# - When new data are appended, it is preserved.

249

# - When a new data file is created, a new identifier is generated.

251

# - When a new data file is created, a new identifier is generated.

250

self.uid = uid

252

self.uid = uid

251

# the tipmost revision stored in the data file. This revision and all

253

# the tipmost revision stored in the data file. This revision and all

252

# revision before it are expected to be encoded in the data file.

254

# revision before it are expected to be encoded in the data file.

253

self.tip_rev = None

255

self.tip_rev = None

254

# the node of that tipmost revision, if it mismatch the current index

256

# the node of that tipmost revision, if it mismatch the current index

255

# data the docket is not valid for the current index and should be

257

# data the docket is not valid for the current index and should be

256

# discarded.

258

# discarded.

257

#

259

#

258

# note: this method is not perfect as some destructive operation could

260

# note: this method is not perfect as some destructive operation could

259

# preserve the same tip_rev + tip_node while altering lower revision.

261

# preserve the same tip_rev + tip_node while altering lower revision.

260

# However this multiple other caches have the same vulnerability (eg:

262

# However this multiple other caches have the same vulnerability (eg:

261

# brancmap cache).

263

# brancmap cache).

262

self.tip_node = None

264

self.tip_node = None

263

# the size (in bytes) of the persisted data to encode the nodemap valid

265

# the size (in bytes) of the persisted data to encode the nodemap valid

264

# for `tip_rev`.

266

# for `tip_rev`.

265

# - data file shorter than this are corrupted,

267

# - data file shorter than this are corrupted,

266

# - any extra data should be ignored.

268

# - any extra data should be ignored.

267

self.data_length = None

269

self.data_length = None

268

# the amount (in bytes) of "dead" data, still in the data file but no

270

# the amount (in bytes) of "dead" data, still in the data file but no

269

# longer used for the nodemap.

271

# longer used for the nodemap.

270

self.data_unused = 0

272

self.data_unused = 0

271

273

272

def copy(self):

274

def copy(self):

273

new = NodeMapDocket(uid=self.uid)

275

new = NodeMapDocket(uid=self.uid)

274

new.tip_rev = self.tip_rev

276

new.tip_rev = self.tip_rev

275

new.tip_node = self.tip_node

277

new.tip_node = self.tip_node

276

new.data_length = self.data_length

278

new.data_length = self.data_length

277

new.data_unused = self.data_unused

279

new.data_unused = self.data_unused

278

return new

280

return new

279

281

280

def __cmp__(self, other):

282

def __cmp__(self, other):

281

if self.uid < other.uid:

283

if self.uid < other.uid:

282

return -1

284

return -1

283

if self.uid > other.uid:

285

if self.uid > other.uid:

284

return 1

286

return 1

285

elif self.data_length < other.data_length:

287

elif self.data_length < other.data_length:

286

return -1

288

return -1

287

elif self.data_length > other.data_length:

289

elif self.data_length > other.data_length:

288

return 1

290

return 1

289

return 0

291

return 0

290

292

291

def __eq__(self, other):

293

def __eq__(self, other):

292

return self.uid == other.uid and self.data_length == other.data_length

294

return self.uid == other.uid and self.data_length == other.data_length

293

295

294

def serialize(self):

296

def serialize(self):

295

"""return serialized bytes for a docket using the passed uid"""

297

"""return serialized bytes for a docket using the passed uid"""

296

data = []

298

data = []

297

data.append(S_VERSION.pack(ONDISK_VERSION))

299

data.append(S_VERSION.pack(ONDISK_VERSION))

298

headers = (

300

headers = (

299

len(self.uid),

301

len(self.uid),

300

self.tip_rev,

302

self.tip_rev,

301

self.data_length,

303

self.data_length,

302

self.data_unused,

304

self.data_unused,

303

len(self.tip_node),

305

len(self.tip_node),

304

)

306

)

305

data.append(S_HEADER.pack(*headers))

307

data.append(S_HEADER.pack(*headers))

306

data.append(self.uid)

308

data.append(self.uid)

307

data.append(self.tip_node)

309

data.append(self.tip_node)

308

return b''.join(data)

310

return b''.join(data)

309

311

310

312

311

def _rawdata_filepath(revlog, docket):

313

def _rawdata_filepath(revlog, docket):

312

"""The (vfs relative) nodemap's rawdata file for a given uid"""

314

"""The (vfs relative) nodemap's rawdata file for a given uid"""

313

if revlog.nodemap_file.endswith(b'.n.a'):

315

if revlog.nodemap_file.endswith(b'.n.a'):

314

prefix = revlog.nodemap_file[:-4]

316

prefix = revlog.nodemap_file[:-4]

315

else:

317

else:

316

prefix = revlog.nodemap_file[:-2]

318

prefix = revlog.nodemap_file[:-2]

317

return b"%s-%s.nd" % (prefix, docket.uid)

319

return b"%s-%s.nd" % (prefix, docket.uid)

318

320

319

321

320

def _other_rawdata_filepath(revlog, docket):

322

def _other_rawdata_filepath(revlog, docket):

321

prefix = revlog.nodemap_file[:-2]

323

prefix = revlog.nodemap_file[:-2]

322

pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

324

pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

323

new_file_path = _rawdata_filepath(revlog, docket)

325

new_file_path = _rawdata_filepath(revlog, docket)

324

new_file_name = revlog.opener.basename(new_file_path)

326

new_file_name = revlog.opener.basename(new_file_path)

325

dirpath = revlog.opener.dirname(new_file_path)

327

dirpath = revlog.opener.dirname(new_file_path)

326

others = []

328

others = []

327

for f in revlog.opener.listdir(dirpath):

329

for f in revlog.opener.listdir(dirpath):

328

if pattern.match(f) and f != new_file_name:

330

if pattern.match(f) and f != new_file_name:

329

others.append(f)

331

others.append(f)

330

return others

332

return others

331

333

332

334

333

### Nodemap Trie

335

### Nodemap Trie

334

#

336

#

335

# This is a simple reference implementation to compute and persist a nodemap

337

# This is a simple reference implementation to compute and persist a nodemap

336

# trie. This reference implementation is write only. The python version of this

338

# trie. This reference implementation is write only. The python version of this

337

# is not expected to be actually used, since it wont provide performance

339

# is not expected to be actually used, since it wont provide performance

338

# improvement over existing non-persistent C implementation.

340

# improvement over existing non-persistent C implementation.

339

#

341

#

340

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

342

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

341

# revision can be adressed using its node shortest prefix.

343

# revision can be adressed using its node shortest prefix.

342

#

344

#

343

# The trie is stored as a sequence of block. Each block contains 16 entries

345

# The trie is stored as a sequence of block. Each block contains 16 entries

344

# (signed 64bit integer, big endian). Each entry can be one of the following:

346

# (signed 64bit integer, big endian). Each entry can be one of the following:

345

#

347

#

346

# * value >= 0 -> index of sub-block

348

# * value >= 0 -> index of sub-block

347

# * value == -1 -> no value

349

# * value == -1 -> no value

348

# * value < -1 -> a revision value: rev = -(value+10)

350

# * value < -1 -> a revision value: rev = -(value+10)

349

#

351

#

350

# The implementation focus on simplicity, not on performance. A Rust

352

# The implementation focus on simplicity, not on performance. A Rust

351

# implementation should provide a efficient version of the same binary

353

# implementation should provide a efficient version of the same binary

352

# persistence. This reference python implementation is never meant to be

354

# persistence. This reference python implementation is never meant to be

353

# extensively use in production.

355

# extensively use in production.

354

356

355

357

356

def persistent_data(index):

358

def persistent_data(index):

357

"""return the persistent binary form for a nodemap for a given index

359

"""return the persistent binary form for a nodemap for a given index

358

"""

360

"""

359

trie = _build_trie(index)

361

trie = _build_trie(index)

360

return _persist_trie(trie)

362

return _persist_trie(trie)

361

363

362

364

363

def update_persistent_data(index, root, max_idx, last_rev):

365

def update_persistent_data(index, root, max_idx, last_rev):

364

"""return the incremental update for persistent nodemap from a given index

366

"""return the incremental update for persistent nodemap from a given index

365

"""

367

"""

366

changed_block, trie = _update_trie(index, root, last_rev)

368

changed_block, trie = _update_trie(index, root, last_rev)

367

return (

369

return (

368

changed_block * S_BLOCK.size,

370

changed_block * S_BLOCK.size,

369

_persist_trie(trie, existing_idx=max_idx),

371

_persist_trie(trie, existing_idx=max_idx),

370

)

372

)

371

373

372

374

373

S_BLOCK = struct.Struct(">" + ("l" * 16))

375

S_BLOCK = struct.Struct(">" + ("l" * 16))

374

376

375

NO_ENTRY = -1

377

NO_ENTRY = -1

376

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

378

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

377

REV_OFFSET = 2

379

REV_OFFSET = 2

378

380

379

381

380

def _transform_rev(rev):

382

def _transform_rev(rev):

381

"""Return the number used to represent the rev in the tree.

383

"""Return the number used to represent the rev in the tree.

382

384

383

(or retrieve a rev number from such representation)

385

(or retrieve a rev number from such representation)

384

386

385

Note that this is an involution, a function equal to its inverse (i.e.

387

Note that this is an involution, a function equal to its inverse (i.e.

386

which gives the identity when applied to itself).

388

which gives the identity when applied to itself).

387

"""

389

"""

388

return -(rev + REV_OFFSET)

390

return -(rev + REV_OFFSET)

389

391

390

392

391

def _to_int(hex_digit):

393

def _to_int(hex_digit):

392

"""turn an hexadecimal digit into a proper integer"""

394

"""turn an hexadecimal digit into a proper integer"""

393

return int(hex_digit, 16)

395

return int(hex_digit, 16)

394

396

395

397

396

class Block(dict):

398

class Block(dict):

397

"""represent a block of the Trie

399

"""represent a block of the Trie

398

400

399

contains up to 16 entry indexed from 0 to 15"""

401

contains up to 16 entry indexed from 0 to 15"""

400

402

401

def __init__(self):

403

def __init__(self):

402

super(Block, self).__init__()

404

super(Block, self).__init__()

403

# If this block exist on disk, here is its ID

405

# If this block exist on disk, here is its ID

404

self.ondisk_id = None

406

self.ondisk_id = None

405

407

406

def __iter__(self):

408

def __iter__(self):

407

return iter(self.get(i) for i in range(16))

409

return iter(self.get(i) for i in range(16))

408

410

409

411

410

def _build_trie(index):

412

def _build_trie(index):

411

"""build a nodemap trie

413

"""build a nodemap trie

412

414

413

The nodemap stores revision number for each unique prefix.

415

The nodemap stores revision number for each unique prefix.

414

416

415

Each block is a dictionary with keys in `[0, 15]`. Values are either

417

Each block is a dictionary with keys in `[0, 15]`. Values are either

416

another block or a revision number.

418

another block or a revision number.

417

"""

419

"""

418

root = Block()

420

root = Block()

419

for rev in range(len(index)):

421

for rev in range(len(index)):

420

hex = nodemod.hex(index[rev][7])

422

hex = nodemod.hex(index[rev][7])

421

_insert_into_block(index, 0, root, rev, hex)

423

_insert_into_block(index, 0, root, rev, hex)

422

return root

424

return root

423

425

424

426

425

def _update_trie(index, root, last_rev):

427

def _update_trie(index, root, last_rev):

426

"""consume"""

428

"""consume"""

427

changed = 0

429

changed = 0

428

for rev in range(last_rev + 1, len(index)):

430

for rev in range(last_rev + 1, len(index)):

429

hex = nodemod.hex(index[rev][7])

431

hex = nodemod.hex(index[rev][7])

430

changed += _insert_into_block(index, 0, root, rev, hex)

432

changed += _insert_into_block(index, 0, root, rev, hex)

431

return changed, root

433

return changed, root

432

434

433

435

434

def _insert_into_block(index, level, block, current_rev, current_hex):

436

def _insert_into_block(index, level, block, current_rev, current_hex):

435

"""insert a new revision in a block

437

"""insert a new revision in a block

436

438

437

index: the index we are adding revision for

439

index: the index we are adding revision for

438

level: the depth of the current block in the trie

440

level: the depth of the current block in the trie

439

block: the block currently being considered

441

block: the block currently being considered

440

current_rev: the revision number we are adding

442

current_rev: the revision number we are adding

441

current_hex: the hexadecimal representation of the of that revision

443

current_hex: the hexadecimal representation of the of that revision

442

"""

444

"""

443

changed = 1

445

changed = 1

444

if block.ondisk_id is not None:

446

if block.ondisk_id is not None:

445

block.ondisk_id = None

447

block.ondisk_id = None

446

hex_digit = _to_int(current_hex[level : level + 1])

448

hex_digit = _to_int(current_hex[level : level + 1])

447

entry = block.get(hex_digit)

449

entry = block.get(hex_digit)

448

if entry is None:

450

if entry is None:

449

# no entry, simply store the revision number

451

# no entry, simply store the revision number

450

block[hex_digit] = current_rev

452

block[hex_digit] = current_rev

451

elif isinstance(entry, dict):

453

elif isinstance(entry, dict):

452

# need to recurse to an underlying block

454

# need to recurse to an underlying block

453

changed += _insert_into_block(

455

changed += _insert_into_block(

454

index, level + 1, entry, current_rev, current_hex

456

index, level + 1, entry, current_rev, current_hex

455

)

457

)

456

else:

458

else:

457

# collision with a previously unique prefix, inserting new

459

# collision with a previously unique prefix, inserting new

458

# vertices to fit both entry.

460

# vertices to fit both entry.

459

other_hex = nodemod.hex(index[entry][7])

461

other_hex = nodemod.hex(index[entry][7])

460

other_rev = entry

462

other_rev = entry

461

new = Block()

463

new = Block()

462

block[hex_digit] = new

464

block[hex_digit] = new

463

_insert_into_block(index, level + 1, new, other_rev, other_hex)

465

_insert_into_block(index, level + 1, new, other_rev, other_hex)

464

_insert_into_block(index, level + 1, new, current_rev, current_hex)

466

_insert_into_block(index, level + 1, new, current_rev, current_hex)

465

return changed

467

return changed

466

468

467

469

468

def _persist_trie(root, existing_idx=None):

470

def _persist_trie(root, existing_idx=None):

469

"""turn a nodemap trie into persistent binary data

471

"""turn a nodemap trie into persistent binary data

470

472

471

See `_build_trie` for nodemap trie structure"""

473

See `_build_trie` for nodemap trie structure"""

472

block_map = {}

474

block_map = {}

473

if existing_idx is not None:

475

if existing_idx is not None:

474

base_idx = existing_idx + 1

476

base_idx = existing_idx + 1

475

else:

477

else:

476

base_idx = 0

478

base_idx = 0

477

chunks = []

479

chunks = []

478

for tn in _walk_trie(root):

480

for tn in _walk_trie(root):

479

if tn.ondisk_id is not None:

481

if tn.ondisk_id is not None:

480

block_map[id(tn)] = tn.ondisk_id

482

block_map[id(tn)] = tn.ondisk_id

481

else:

483

else:

482

block_map[id(tn)] = len(chunks) + base_idx

484

block_map[id(tn)] = len(chunks) + base_idx

483

chunks.append(_persist_block(tn, block_map))

485

chunks.append(_persist_block(tn, block_map))

484

return b''.join(chunks)

486

return b''.join(chunks)

485

487

486

488

487

def _walk_trie(block):

489

def _walk_trie(block):

488

"""yield all the block in a trie

490

"""yield all the block in a trie

489

491

490

Children blocks are always yield before their parent block.

492

Children blocks are always yield before their parent block.

491

"""

493

"""

492

for (_, item) in sorted(block.items()):

494

for (_, item) in sorted(block.items()):

493

if isinstance(item, dict):

495

if isinstance(item, dict):

494

for sub_block in _walk_trie(item):

496

for sub_block in _walk_trie(item):

495

yield sub_block

497

yield sub_block

496

yield block

498

yield block

497

499

498

500

499

def _persist_block(block_node, block_map):

501

def _persist_block(block_node, block_map):

500

"""produce persistent binary data for a single block

502

"""produce persistent binary data for a single block

501

503

502

Children block are assumed to be already persisted and present in

504

Children block are assumed to be already persisted and present in

503

block_map.

505

block_map.

504

"""

506

"""

505

data = tuple(_to_value(v, block_map) for v in block_node)

507

data = tuple(_to_value(v, block_map) for v in block_node)

506

return S_BLOCK.pack(*data)

508

return S_BLOCK.pack(*data)

507

509

508

510

509

def _to_value(item, block_map):

511

def _to_value(item, block_map):

510

"""persist any value as an integer"""

512

"""persist any value as an integer"""

511

if item is None:

513

if item is None:

512

return NO_ENTRY

514

return NO_ENTRY

513

elif isinstance(item, dict):

515

elif isinstance(item, dict):

514

return block_map[id(item)]

516

return block_map[id(item)]

515

else:

517

else:

516

return _transform_rev(item)

518

return _transform_rev(item)

517

519

518

520

519

def parse_data(data):

521

def parse_data(data):

520

"""parse parse nodemap data into a nodemap Trie"""

522

"""parse parse nodemap data into a nodemap Trie"""

521

if (len(data) % S_BLOCK.size) != 0:

523

if (len(data) % S_BLOCK.size) != 0:

522

msg = "nodemap data size is not a multiple of block size (%d): %d"

524

msg = "nodemap data size is not a multiple of block size (%d): %d"

523

raise error.Abort(msg % (S_BLOCK.size, len(data)))

525

raise error.Abort(msg % (S_BLOCK.size, len(data)))

524

if not data:

526

if not data:

525

return Block(), None

527

return Block(), None

526

block_map = {}

528

block_map = {}

527

new_blocks = []

529

new_blocks = []

528

for i in range(0, len(data), S_BLOCK.size):

530

for i in range(0, len(data), S_BLOCK.size):

529

block = Block()

531

block = Block()

530

block.ondisk_id = len(block_map)

532

block.ondisk_id = len(block_map)

531

block_map[block.ondisk_id] = block

533

block_map[block.ondisk_id] = block

532

block_data = data[i : i + S_BLOCK.size]

534

block_data = data[i : i + S_BLOCK.size]

533

values = S_BLOCK.unpack(block_data)

535

values = S_BLOCK.unpack(block_data)

534

new_blocks.append((block, values))

536

new_blocks.append((block, values))

535

for b, values in new_blocks:

537

for b, values in new_blocks:

536

for idx, v in enumerate(values):

538

for idx, v in enumerate(values):

537

if v == NO_ENTRY:

539

if v == NO_ENTRY:

538

continue

540

continue

539

elif v >= 0:

541

elif v >= 0:

540

b[idx] = block_map[v]

542

b[idx] = block_map[v]

541

else:

543

else:

542

b[idx] = _transform_rev(v)

544

b[idx] = _transform_rev(v)

543

return block, i // S_BLOCK.size

545

return block, i // S_BLOCK.size

544

546

545

547

546

# debug utility

548

# debug utility

547

549

548

550

549

def check_data(ui, index, data):

551

def check_data(ui, index, data):

550

"""verify that the provided nodemap data are valid for the given idex"""

552

"""verify that the provided nodemap data are valid for the given idex"""

551

ret = 0

553

ret = 0

552

ui.status((b"revision in index: %d\n") % len(index))

554

ui.status((b"revision in index: %d\n") % len(index))

553

root, __ = parse_data(data)

555

root, __ = parse_data(data)

554

all_revs = set(_all_revisions(root))

556

all_revs = set(_all_revisions(root))

555

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

557

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

556

for r in range(len(index)):

558

for r in range(len(index)):

557

if r not in all_revs:

559

if r not in all_revs:

558

msg = b" revision missing from nodemap: %d\n" % r

560

msg = b" revision missing from nodemap: %d\n" % r

559

ui.write_err(msg)

561

ui.write_err(msg)

560

ret = 1

562

ret = 1

561

else:

563

else:

562

all_revs.remove(r)

564

all_revs.remove(r)

563

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

565

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

564

if nm_rev is None:

566

if nm_rev is None:

565

msg = b" revision node does not match any entries: %d\n" % r

567

msg = b" revision node does not match any entries: %d\n" % r

566

ui.write_err(msg)

568

ui.write_err(msg)

567

ret = 1

569

ret = 1

568

elif nm_rev != r:

570

elif nm_rev != r:

569

msg = (

571

msg = (

570

b" revision node does not match the expected revision: "

572

b" revision node does not match the expected revision: "

571

b"%d != %d\n" % (r, nm_rev)

573

b"%d != %d\n" % (r, nm_rev)

572

)

574

)

573

ui.write_err(msg)

575

ui.write_err(msg)

574

ret = 1

576

ret = 1

575

577

576

if all_revs:

578

if all_revs:

577

for r in sorted(all_revs):

579

for r in sorted(all_revs):

578

msg = b" extra revision in nodemap: %d\n" % r

580

msg = b" extra revision in nodemap: %d\n" % r

579

ui.write_err(msg)

581

ui.write_err(msg)

580

ret = 1

582

ret = 1

581

return ret

583

return ret

582

584

583

585

584

def _all_revisions(root):

586

def _all_revisions(root):

585

"""return all revisions stored in a Trie"""

587

"""return all revisions stored in a Trie"""

586

for block in _walk_trie(root):

588

for block in _walk_trie(root):

587

for v in block:

589

for v in block:

588

if v is None or isinstance(v, Block):

590

if v is None or isinstance(v, Block):

589

continue

591

continue

590

yield v

592

yield v

591

593

592

594

593

def _find_node(block, node):

595

def _find_node(block, node):

594

"""find the revision associated with a given node"""

596

"""find the revision associated with a given node"""

595

entry = block.get(_to_int(node[0:1]))

597

entry = block.get(_to_int(node[0:1]))

596

if isinstance(entry, dict):

598

if isinstance(entry, dict):

597

return _find_node(entry, node[1:])

599

return _find_node(entry, node[1:])

598

return entry

600

return entry

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # nodemap.py - nodemap related code and utilities
             #
             # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2019 George Racinet <georges.racinet@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import errno
             import os
             import re
             import struct
             from .. import (
                 error,
                 node as nodemod,
                 util,
             )
             class NodeMap(dict):
                 def __missing__(self, x):
                     raise error.RevlogError(b'unknown node: %s' % x)
             def persisted_data(revlog):
                 """read the nodemap for a revlog from disk"""
                 if revlog.nodemap_file is None:
                     return None
                 pdata = revlog.opener.tryread(revlog.nodemap_file)
                 if not pdata:
                     return None
                 offset = 0
                 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
                 if version != ONDISK_VERSION:
                     return None
                 offset += S_VERSION.size
                 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
                 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
                 offset += S_HEADER.size
                 docket = NodeMapDocket(pdata[offset : offset + uid_size])
                 offset += uid_size
                 docket.tip_rev = tip_rev
                 docket.tip_node = pdata[offset : offset + tip_node_size]
                 docket.data_length = data_length
                 docket.data_unused = data_unused
                 filename = _rawdata_filepath(revlog, docket)
                 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
                 try:
                     with revlog.opener(filename) as fd:
                         if use_mmap:
                             data = util.buffer(util.mmapread(fd, data_length))
                         else:
                             data = fd.read(data_length)
                 except OSError as e:
                     if e.errno != errno.ENOENT:
                         raise
                 if len(data) < data_length:
                     return None
                 return docket, data
             def setup_persistent_nodemap(tr, revlog):
                 """Install whatever is needed transaction side to persist a nodemap on disk
                 (only actually persist the nodemap if this is relevant for this revlog)
                 """
                 if revlog._inline:
                     return  # inlined revlog are too small for this to be relevant
                 if revlog.nodemap_file is None:
                     return  # we do not use persistent_nodemap on this revlog
-                callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
+                # we need to happen after the changelog finalization, in that use "cl-"
+                callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
                 if tr.hasfinalize(callback_id):
                     return  # no need to register again
                 tr.addpending(
                     callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
                 )
                 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
             class _NoTransaction(object):
                 """transaction like object to update the nodemap outside a transaction
                 """
                 def __init__(self):
                     self._postclose = {}
                 def addpostclose(self, callback_id, callback_func):
                     self._postclose[callback_id] = callback_func
             def update_persistent_nodemap(revlog):
                 """update the persistent nodemap right now
                 To be used for updating the nodemap on disk outside of a normal transaction
                 setup (eg, `debugupdatecache`).
                 """
                 notr = _NoTransaction()
                 _persist_nodemap(notr, revlog)
                 for k in sorted(notr._postclose):
                     notr._postclose[k](None)
             def _persist_nodemap(tr, revlog, pending=False):
                 """Write nodemap data on disk for a given revlog
                 """
                 if getattr(revlog, 'filteredrevs', ()):
                     raise error.ProgrammingError(
                         "cannot persist nodemap of a filtered changelog"
                     )
                 if revlog.nodemap_file is None:
                     msg = "calling persist nodemap on a revlog without the feature enableb"
                     raise error.ProgrammingError(msg)
                 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
                 ondisk_docket = revlog._nodemap_docket
                 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
                 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
                 data = None
                 # first attemp an incremental update of the data
                 if can_incremental and ondisk_docket is not None:
                     target_docket = revlog._nodemap_docket.copy()
                     (
                         src_docket,
                         data_changed_count,
                         data,
                     ) = revlog.index.nodemap_data_incremental()
                     if src_docket != target_docket:
                         data = None
                     else:
                         datafile = _rawdata_filepath(revlog, target_docket)
                         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                         # store vfs
                         new_length = target_docket.data_length + len(data)
                         with revlog.opener(datafile, b'r+') as fd:
                             fd.seek(target_docket.data_length)
                             fd.write(data)
                             if feed_data:
                                 if use_mmap:
                                     fd.seek(0)
                                     new_data = fd.read(new_length)
                                 else:
                                     fd.flush()
                                     new_data = util.buffer(util.mmapread(fd, new_length))
                         target_docket.data_length = new_length
                         target_docket.data_unused += data_changed_count
                 if data is None:
                     # otherwise fallback to a full new export
                     target_docket = NodeMapDocket()
                     datafile = _rawdata_filepath(revlog, target_docket)
                     if util.safehasattr(revlog.index, "nodemap_data_all"):
                         data = revlog.index.nodemap_data_all()
                     else:
                         data = persistent_data(revlog.index)
                     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                     # store vfs
                     with revlog.opener(datafile, b'w+') as fd:
                         fd.write(data)
                         if feed_data:
                             if use_mmap:
                                 new_data = data
                             else:
                                 fd.flush()
                                 new_data = util.buffer(util.mmapread(fd, len(data)))
                     target_docket.data_length = len(data)
                 target_docket.tip_rev = revlog.tiprev()
                 target_docket.tip_node = revlog.node(target_docket.tip_rev)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 file_path = revlog.nodemap_file
                 if pending:
                     file_path += b'.a'
                 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
                     fp.write(target_docket.serialize())
                 revlog._nodemap_docket = target_docket
                 if feed_data:
                     revlog.index.update_nodemap_data(target_docket, new_data)
                 # EXP-TODO: if the transaction abort, we should remove the new data and
                 # reinstall the old one.
                 # search for old index file in all cases, some older process might have
                 # left one behind.
                 olds = _other_rawdata_filepath(revlog, target_docket)
                 if olds:
                     realvfs = getattr(revlog, '_realopener', revlog.opener)
                     def cleanup(tr):
                         for oldfile in olds:
                             realvfs.tryunlink(oldfile)
                     callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
                     tr.addpostclose(callback_id, cleanup)
             ### Nodemap docket file
             #
             # The nodemap data are stored on disk using 2 files:
             #
             # * a raw data files containing a persistent nodemap
             #   (see `Nodemap Trie` section)
             #
             # * a small "docket" file containing medatadata
             #
             # While the nodemap data can be multiple tens of megabytes, the "docket" is
             # small, it is easy to update it automatically or to duplicated its content
             # during a transaction.
             #
             # Multiple raw data can exist at the same time (The currently valid one and a
             # new one beind used by an in progress transaction). To accomodate this, the
             # filename hosting the raw data has a variable parts. The exact filename is
             # specified inside the "docket" file.
             #
             # The docket file contains information to find, qualify and validate the raw
             # data. Its content is currently very light, but it will expand as the on disk
             # nodemap gains the necessary features to be used in production.
             # version 0 is experimental, no BC garantee, do no use outside of tests.
             ONDISK_VERSION = 0
             S_VERSION = struct.Struct(">B")
             S_HEADER = struct.Struct(">BQQQQ")
             ID_SIZE = 8
             def _make_uid():
                 """return a new unique identifier.
                 The identifier is random and composed of ascii characters."""
                 return nodemod.hex(os.urandom(ID_SIZE))
             class NodeMapDocket(object):
                 """metadata associated with persistent nodemap data
                 The persistent data may come from disk or be on their way to disk.
                 """
                 def __init__(self, uid=None):
                     if uid is None:
                         uid = _make_uid()
                     # a unique identifier for the data file:
                     #   - When new data are appended, it is preserved.
                     #   - When a new data file is created, a new identifier is generated.
                     self.uid = uid
                     # the tipmost revision stored in the data file. This revision and all
                     # revision before it are expected to be encoded in the data file.
                     self.tip_rev = None
                     # the node of that tipmost revision, if it mismatch the current index
                     # data the docket is not valid for the current index and should be
                     # discarded.
                     #
                     # note: this method is not perfect as some destructive operation could
                     # preserve the same tip_rev + tip_node while altering lower revision.
                     # However this multiple other caches have the same vulnerability (eg:
                     # brancmap cache).
                     self.tip_node = None
                     # the size (in bytes) of the persisted data to encode the nodemap valid
                     # for `tip_rev`.
                     #   - data file shorter than this are corrupted,
                     #   - any extra data should be ignored.
                     self.data_length = None
                     # the amount (in bytes) of "dead" data, still in the data file but no
                     # longer used for the nodemap.
                     self.data_unused = 0
                 def copy(self):
                     new = NodeMapDocket(uid=self.uid)
                     new.tip_rev = self.tip_rev
                     new.tip_node = self.tip_node
                     new.data_length = self.data_length
                     new.data_unused = self.data_unused
                     return new
                 def __cmp__(self, other):
                     if self.uid < other.uid:
                         return -1
                     if self.uid > other.uid:
                         return 1
                     elif self.data_length < other.data_length:
                         return -1
                     elif self.data_length > other.data_length:
                         return 1
                     return 0
                 def __eq__(self, other):
                     return self.uid == other.uid and self.data_length == other.data_length
                 def serialize(self):
                     """return serialized bytes for a docket using the passed uid"""
                     data = []
                     data.append(S_VERSION.pack(ONDISK_VERSION))
                     headers = (
                         len(self.uid),
                         self.tip_rev,
                         self.data_length,
                         self.data_unused,
                         len(self.tip_node),
                     )
                     data.append(S_HEADER.pack(*headers))
                     data.append(self.uid)
                     data.append(self.tip_node)
                     return b''.join(data)
             def _rawdata_filepath(revlog, docket):
                 """The (vfs relative) nodemap's rawdata file for a given uid"""
                 if revlog.nodemap_file.endswith(b'.n.a'):
                     prefix = revlog.nodemap_file[:-4]
                 else:
                     prefix = revlog.nodemap_file[:-2]
                 return b"%s-%s.nd" % (prefix, docket.uid)
             def _other_rawdata_filepath(revlog, docket):
                 prefix = revlog.nodemap_file[:-2]
                 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
                 new_file_path = _rawdata_filepath(revlog, docket)
                 new_file_name = revlog.opener.basename(new_file_path)
                 dirpath = revlog.opener.dirname(new_file_path)
                 others = []
                 for f in revlog.opener.listdir(dirpath):
                     if pattern.match(f) and f != new_file_name:
                         others.append(f)
                 return others
             ### Nodemap Trie
             #
             # This is a simple reference implementation to compute and persist a nodemap
             # trie. This reference implementation is write only. The python version of this
             # is not expected to be actually used, since it wont provide performance
             # improvement over existing non-persistent C implementation.
             #
             # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
             # revision can be adressed using its node shortest prefix.
             #
             # The trie is stored as a sequence of block. Each block contains 16 entries
             # (signed 64bit integer, big endian). Each entry can be one of the following:
             #
             #  * value >=  0 -> index of sub-block
             #  * value == -1 -> no value
             #  * value <  -1 -> a revision value: rev = -(value+10)
             #
             # The implementation focus on simplicity, not on performance. A Rust
             # implementation should provide a efficient version of the same binary
             # persistence. This reference python implementation is never meant to be
             # extensively use in production.
             def persistent_data(index):
                 """return the persistent binary form for a nodemap for a given index
                 """
                 trie = _build_trie(index)
                 return _persist_trie(trie)
             def update_persistent_data(index, root, max_idx, last_rev):
                 """return the incremental update for persistent nodemap from a given index
                 """
                 changed_block, trie = _update_trie(index, root, last_rev)
                 return (
                     changed_block * S_BLOCK.size,
                     _persist_trie(trie, existing_idx=max_idx),
                 )
             S_BLOCK = struct.Struct(">" + ("l" * 16))
             NO_ENTRY = -1
             # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
             REV_OFFSET = 2
             def _transform_rev(rev):
                 """Return the number used to represent the rev in the tree.
                 (or retrieve a rev number from such representation)
                 Note that this is an involution, a function equal to its inverse (i.e.
                 which gives the identity when applied to itself).
                 """
                 return -(rev + REV_OFFSET)
             def _to_int(hex_digit):
                 """turn an hexadecimal digit into a proper integer"""
                 return int(hex_digit, 16)
             class Block(dict):
                 """represent a block of the Trie
                 contains up to 16 entry indexed from 0 to 15"""
                 def __init__(self):
                     super(Block, self).__init__()
                     # If this block exist on disk, here is its ID
                     self.ondisk_id = None
                 def __iter__(self):
                     return iter(self.get(i) for i in range(16))
             def _build_trie(index):
                 """build a nodemap trie
                 The nodemap stores revision number for each unique prefix.
                 Each block is a dictionary with keys in `[0, 15]`. Values are either
                 another block or a revision number.
                 """
                 root = Block()
                 for rev in range(len(index)):
                     hex = nodemod.hex(index[rev][7])
                     _insert_into_block(index, 0, root, rev, hex)
                 return root
             def _update_trie(index, root, last_rev):
                 """consume"""
                 changed = 0
                 for rev in range(last_rev + 1, len(index)):
                     hex = nodemod.hex(index[rev][7])
                     changed += _insert_into_block(index, 0, root, rev, hex)
                 return changed, root
             def _insert_into_block(index, level, block, current_rev, current_hex):
                 """insert a new revision in a block
                 index: the index we are adding revision for
                 level: the depth of the current block in the trie
                 block: the block currently being considered
                 current_rev: the revision number we are adding
                 current_hex: the hexadecimal representation of the of that revision
                 """
                 changed = 1
                 if block.ondisk_id is not None:
                     block.ondisk_id = None
                 hex_digit = _to_int(current_hex[level : level + 1])
                 entry = block.get(hex_digit)
                 if entry is None:
                     # no entry, simply store the revision number
                     block[hex_digit] = current_rev
                 elif isinstance(entry, dict):
                     # need to recurse to an underlying block
                     changed += _insert_into_block(
                         index, level + 1, entry, current_rev, current_hex
                     )
                 else:
                     # collision with a previously unique prefix, inserting new
                     # vertices to fit both entry.
                     other_hex = nodemod.hex(index[entry][7])
                     other_rev = entry
                     new = Block()
                     block[hex_digit] = new
                     _insert_into_block(index, level + 1, new, other_rev, other_hex)
                     _insert_into_block(index, level + 1, new, current_rev, current_hex)
                 return changed
             def _persist_trie(root, existing_idx=None):
                 """turn a nodemap trie into persistent binary data
                 See `_build_trie` for nodemap trie structure"""
                 block_map = {}
                 if existing_idx is not None:
                     base_idx = existing_idx + 1
                 else:
                     base_idx = 0
                 chunks = []
                 for tn in _walk_trie(root):
                     if tn.ondisk_id is not None:
                         block_map[id(tn)] = tn.ondisk_id
                     else:
                         block_map[id(tn)] = len(chunks) + base_idx
                         chunks.append(_persist_block(tn, block_map))
                 return b''.join(chunks)
             def _walk_trie(block):
                 """yield all the block in a trie
                 Children blocks are always yield before their parent block.
                 """
                 for (_, item) in sorted(block.items()):
                     if isinstance(item, dict):
                         for sub_block in _walk_trie(item):
                             yield sub_block
                 yield block
             def _persist_block(block_node, block_map):
                 """produce persistent binary data for a single block
                 Children block are assumed to be already persisted and present in
                 block_map.
                 """
                 data = tuple(_to_value(v, block_map) for v in block_node)
                 return S_BLOCK.pack(*data)
             def _to_value(item, block_map):
                 """persist any value as an integer"""
                 if item is None:
                     return NO_ENTRY
                 elif isinstance(item, dict):
                     return block_map[id(item)]
                 else:
                     return _transform_rev(item)
             def parse_data(data):
                 """parse parse nodemap data into a nodemap Trie"""
                 if (len(data) % S_BLOCK.size) != 0:
                     msg = "nodemap data size is not a multiple of block size (%d): %d"
                     raise error.Abort(msg % (S_BLOCK.size, len(data)))
                 if not data:
                     return Block(), None
                 block_map = {}
                 new_blocks = []
                 for i in range(0, len(data), S_BLOCK.size):
                     block = Block()
                     block.ondisk_id = len(block_map)
                     block_map[block.ondisk_id] = block
                     block_data = data[i : i + S_BLOCK.size]
                     values = S_BLOCK.unpack(block_data)
                     new_blocks.append((block, values))
                 for b, values in new_blocks:
                     for idx, v in enumerate(values):
                         if v == NO_ENTRY:
                             continue
                         elif v >= 0:
                             b[idx] = block_map[v]
                         else:
                             b[idx] = _transform_rev(v)
                 return block, i // S_BLOCK.size
             # debug utility
             def check_data(ui, index, data):
                 """verify that the provided nodemap data are valid for the given idex"""
                 ret = 0
                 ui.status((b"revision in index:   %d\n") % len(index))
                 root, __ = parse_data(data)
                 all_revs = set(_all_revisions(root))
                 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
                 for r in range(len(index)):
                     if r not in all_revs:
                         msg = b"  revision missing from nodemap: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     else:
                         all_revs.remove(r)
                     nm_rev = _find_node(root, nodemod.hex(index[r][7]))
                     if nm_rev is None:
                         msg = b"  revision node does not match any entries: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     elif nm_rev != r:
                         msg = (
                             b"  revision node does not match the expected revision: "
                             b"%d != %d\n" % (r, nm_rev)
                         )
                         ui.write_err(msg)
                         ret = 1
                 if all_revs:
                     for r in sorted(all_revs):
                         msg = b"  extra revision in  nodemap: %d\n" % r
                         ui.write_err(msg)
                     ret = 1
                 return ret
             def _all_revisions(root):
                 """return all revisions stored in a Trie"""
                 for block in _walk_trie(root):
                     for v in block:
                         if v is None or isinstance(v, Block):
                             continue
                         yield v
             def _find_node(block, node):
                 """find the revision associated with a given node"""
                 entry = block.get(_to_int(node[0:1]))
                 if isinstance(entry, dict):
                     return _find_node(entry, node[1:])
                 return entry