upstream/mercurial-mirror Commit - r44798:78721bbd

1

# nodemap.py - nodemap related code and utilities

1

# nodemap.py - nodemap related code and utilities

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import os

11

import os

12

import re

12

import re

13

import struct

13

import struct

14

15

from .. import (

15

from .. import (

16

error,

16

error,

17

node as nodemod,

17

node as nodemod,

18

util,

18

util,

19

)

19

)

20

21

22

class NodeMap(dict):

22

class NodeMap(dict):

23

def __missing__(self, x):

23

def __missing__(self, x):

24

raise error.RevlogError(b'unknown node: %s' % x)

24

raise error.RevlogError(b'unknown node: %s' % x)

25

26

27

def persisted_data(revlog):

27

def persisted_data(revlog):

28

"""read the nodemap for a revlog from disk"""

28

"""read the nodemap for a revlog from disk"""

29

if revlog.nodemap_file is None:

29

if revlog.nodemap_file is None:

30

return None

30

return None

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

32

if not pdata:

32

if not pdata:

33

return None

33

return None

34

offset = 0

34

offset = 0

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

36

if version != ONDISK_VERSION:

36

if version != ONDISK_VERSION:

37

return None

37

return None

38

offset += S_VERSION.size

38

offset += S_VERSION.size

39

(uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

39

(uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

40

offset += S_HEADER.size

40

offset += S_HEADER.size

41

uid = pdata[offset : offset + uuid_size]

41

uid = pdata[offset : offset + uuid_size]

42

43

filename = _rawdata_filepath(revlog, uid)

43

filename = _rawdata_filepath(revlog, uid)

44

return revlog.opener.tryread(filename)

44

return revlog.opener.tryread(filename)

45

46

47

def setup_persistent_nodemap(tr, revlog):

47

def setup_persistent_nodemap(tr, revlog):

48

"""Install whatever is needed transaction side to persist a nodemap on disk

48

"""Install whatever is needed transaction side to persist a nodemap on disk

49

50

(only actually persist the nodemap if this is relevant for this revlog)

50

(only actually persist the nodemap if this is relevant for this revlog)

51

"""

51

"""

52

if revlog._inline:

52

if revlog._inline:

53

return # inlined revlog are too small for this to be relevant

53

return # inlined revlog are too small for this to be relevant

54

if revlog.nodemap_file is None:

54

if revlog.nodemap_file is None:

55

return # we do not use persistent_nodemap on this revlog

55

return # we do not use persistent_nodemap on this revlog

56

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

56

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

57

if tr.hasfinalize(callback_id):

57

if tr.hasfinalize(callback_id):

58

return # no need to register again

58

return # no need to register again

59

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

59

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

60

61

62

def _persist_nodemap(tr, revlog):

62

def _persist_nodemap(tr, revlog):

63

"""Write nodemap data on disk for a given revlog

63

"""Write nodemap data on disk for a given revlog

64

"""

64

"""

65

if getattr(revlog, 'filteredrevs', ()):

65

if getattr(revlog, 'filteredrevs', ()):

66

raise error.ProgrammingError(

66

raise error.ProgrammingError(

67

"cannot persist nodemap of a filtered changelog"

67

"cannot persist nodemap of a filtered changelog"

68

)

68

)

69

if revlog.nodemap_file is None:

69

if revlog.nodemap_file is None:

70

msg = "calling persist nodemap on a revlog without the feature enableb"

70

msg = "calling persist nodemap on a revlog without the feature enableb"

71

raise error.ProgrammingError(msg)

71

raise error.ProgrammingError(msg)

72

if util.safehasattr(revlog.index, "nodemap_data_all"):

72

if util.safehasattr(revlog.index, "nodemap_data_all"):

73

data = revlog.index.nodemap_data_all()

73

data = revlog.index.nodemap_data_all()

74

else:

74

else:

75

data = persistent_data(revlog.index)

75

data = persistent_data(revlog.index)

76

uid = _make_uid()

76

uid = _make_uid()

77

datafile = _rawdata_filepath(revlog, uid)

77

datafile = _rawdata_filepath(revlog, uid)

78

olds = _other_rawdata_filepath(revlog, uid)

78

olds = _other_rawdata_filepath(revlog, uid)

79

if olds:

79

if olds:

80

realvfs = getattr(revlog, '_realopener', revlog.opener)

80

realvfs = getattr(revlog, '_realopener', revlog.opener)

81

82

def cleanup(tr):

82

def cleanup(tr):

83

for oldfile in olds:

83

for oldfile in olds:

84

realvfs.tryunlink(oldfile)

84

realvfs.tryunlink(oldfile)

85

86

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

86

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

87

tr.addpostclose(callback_id, cleanup)

87

tr.addpostclose(callback_id, cleanup)

88

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

88

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

89

# store vfs

89

# store vfs

90

with revlog.opener(datafile, b'w') as fd:

90

with revlog.opener(datafile, b'w') as fd:

91

fd.write(data)

91

fd.write(data)

92

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

92

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

93

# store vfs

93

# store vfs

94

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

94

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

95

fp.write(_serialize_docket(uid))

95

fp.write(_serialize_docket(uid))

96

# EXP-TODO: if the transaction abort, we should remove the new data and

96

# EXP-TODO: if the transaction abort, we should remove the new data and

97

# reinstall the old one.

97

# reinstall the old one.

98

99

100

### Nodemap docket file

100

### Nodemap docket file

101

#

101

#

102

# The nodemap data are stored on disk using 2 files:

102

# The nodemap data are stored on disk using 2 files:

103

#

103

#

104

# * a raw data files containing a persistent nodemap

104

# * a raw data files containing a persistent nodemap

105

# (see `Nodemap Trie` section)

105

# (see `Nodemap Trie` section)

106

#

106

#

107

# * a small "docket" file containing medatadata

107

# * a small "docket" file containing medatadata

108

#

108

#

109

# While the nodemap data can be multiple tens of megabytes, the "docket" is

109

# While the nodemap data can be multiple tens of megabytes, the "docket" is

110

# small, it is easy to update it automatically or to duplicated its content

110

# small, it is easy to update it automatically or to duplicated its content

111

# during a transaction.

111

# during a transaction.

112

#

112

#

113

# Multiple raw data can exist at the same time (The currently valid one and a

113

# Multiple raw data can exist at the same time (The currently valid one and a

114

# new one beind used by an in progress transaction). To accomodate this, the

114

# new one beind used by an in progress transaction). To accomodate this, the

115

# filename hosting the raw data has a variable parts. The exact filename is

115

# filename hosting the raw data has a variable parts. The exact filename is

116

# specified inside the "docket" file.

116

# specified inside the "docket" file.

117

#

117

#

118

# The docket file contains information to find, qualify and validate the raw

118

# The docket file contains information to find, qualify and validate the raw

119

# data. Its content is currently very light, but it will expand as the on disk

119

# data. Its content is currently very light, but it will expand as the on disk

120

# nodemap gains the necessary features to be used in production.

120

# nodemap gains the necessary features to be used in production.

121

122

# version 0 is experimental, no BC garantee, do no use outside of tests.

122

# version 0 is experimental, no BC garantee, do no use outside of tests.

123

ONDISK_VERSION = 0

123

ONDISK_VERSION = 0

124

125

S_VERSION = struct.Struct(">B")

125

S_VERSION = struct.Struct(">B")

126

S_HEADER = struct.Struct(">B")

126

S_HEADER = struct.Struct(">B")

127

128

ID_SIZE = 8

128

ID_SIZE = 8

129

130

131

def _make_uid():

131

def _make_uid():

132

"""return a new unique identifier.

132

"""return a new unique identifier.

133

134

The identifier is random and composed of ascii characters."""

134

The identifier is random and composed of ascii characters."""

135

return nodemod.hex(os.urandom(ID_SIZE))

135

return nodemod.hex(os.urandom(ID_SIZE))

136

137

138

def _serialize_docket(uid):

138

def _serialize_docket(uid):

139

"""return serialized bytes for a docket using the passed uid"""

139

"""return serialized bytes for a docket using the passed uid"""

140

data = []

140

data = []

141

data.append(S_VERSION.pack(ONDISK_VERSION))

141

data.append(S_VERSION.pack(ONDISK_VERSION))

142

data.append(S_HEADER.pack(len(uid)))

142

data.append(S_HEADER.pack(len(uid)))

143

data.append(uid)

143

data.append(uid)

144

return b''.join(data)

144

return b''.join(data)

145

146

147

def _rawdata_filepath(revlog, uid):

147

def _rawdata_filepath(revlog, uid):

148

"""The (vfs relative) nodemap's rawdata file for a given uid"""

148

"""The (vfs relative) nodemap's rawdata file for a given uid"""

149

prefix = revlog.nodemap_file[:-2]

149

prefix = revlog.nodemap_file[:-2]

150

return b"%s-%s.nd" % (prefix, uid)

150

return b"%s-%s.nd" % (prefix, uid)

151

152

153

def _other_rawdata_filepath(revlog, uid):

153

def _other_rawdata_filepath(revlog, uid):

154

prefix = revlog.nodemap_file[:-2]

154

prefix = revlog.nodemap_file[:-2]

155

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

155

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

156

new_file_path = _rawdata_filepath(revlog, uid)

156

new_file_path = _rawdata_filepath(revlog, uid)

157

new_file_name = revlog.opener.basename(new_file_path)

157

new_file_name = revlog.opener.basename(new_file_path)

158

dirpath = revlog.opener.dirname(new_file_path)

158

dirpath = revlog.opener.dirname(new_file_path)

159

others = []

159

others = []

160

for f in revlog.opener.listdir(dirpath):

160

for f in revlog.opener.listdir(dirpath):

161

if pattern.match(f) and f != new_file_name:

161

if pattern.match(f) and f != new_file_name:

162

others.append(f)

162

others.append(f)

163

return others

163

return others

164

165

166

### Nodemap Trie

166

### Nodemap Trie

167

#

167

#

168

# This is a simple reference implementation to compute and persist a nodemap

168

# This is a simple reference implementation to compute and persist a nodemap

169

# trie. This reference implementation is write only. The python version of this

169

# trie. This reference implementation is write only. The python version of this

170

# is not expected to be actually used, since it wont provide performance

170

# is not expected to be actually used, since it wont provide performance

171

# improvement over existing non-persistent C implementation.

171

# improvement over existing non-persistent C implementation.

172

#

172

#

173

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

173

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

174

# revision can be adressed using its node shortest prefix.

174

# revision can be adressed using its node shortest prefix.

175

#

175

#

176

# The trie is stored as a sequence of block. Each block contains 16 entries

176

# The trie is stored as a sequence of block. Each block contains 16 entries

177

# (signed 64bit integer, big endian). Each entry can be one of the following:

177

# (signed 64bit integer, big endian). Each entry can be one of the following:

178

#

178

#

179

# * value >= 0 -> index of sub-block

179

# * value >= 0 -> index of sub-block

180

# * value == -1 -> no value

180

# * value == -1 -> no value

181

# * value < -1 -> a revision value: rev = -(value+10)

181

# * value < -1 -> a revision value: rev = -(value+10)

182

#

182

#

183

# The implementation focus on simplicity, not on performance. A Rust

183

# The implementation focus on simplicity, not on performance. A Rust

184

# implementation should provide a efficient version of the same binary

184

# implementation should provide a efficient version of the same binary

185

# persistence. This reference python implementation is never meant to be

185

# persistence. This reference python implementation is never meant to be

186

# extensively use in production.

186

# extensively use in production.

187

188

189

def persistent_data(index):

189

def persistent_data(index):

190

"""return the persistent binary form for a nodemap for a given index

190

"""return the persistent binary form for a nodemap for a given index

191

"""

191

"""

192

trie = _build_trie(index)

192

trie = _build_trie(index)

193

return _persist_trie(trie)

193

return _persist_trie(trie)

194

195

196

S_BLOCK = struct.Struct(">" + ("l" * 16))

196

S_BLOCK = struct.Struct(">" + ("l" * 16))

197

198

NO_ENTRY = -1

198

NO_ENTRY = -1

199

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

199

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

200

REV_OFFSET = 2

200

REV_OFFSET = 2

201

202

203

def _transform_rev(rev):

203

def _transform_rev(rev):

204

"""Return the number used to represent the rev in the tree.

204

"""Return the number used to represent the rev in the tree.

205

206

(or retrieve a rev number from such representation)

206

(or retrieve a rev number from such representation)

207

208

Note that this is an involution, a function equal to its inverse (i.e.

208

Note that this is an involution, a function equal to its inverse (i.e.

209

which gives the identity when applied to itself).

209

which gives the identity when applied to itself).

210

"""

210

"""

211

return -(rev + REV_OFFSET)

211

return -(rev + REV_OFFSET)

212

213

214

def _to_int(hex_digit):

214

def _to_int(hex_digit):

215

"""turn an hexadecimal digit into a proper integer"""

215

"""turn an hexadecimal digit into a proper integer"""

216

return int(hex_digit, 16)

216

return int(hex_digit, 16)

217

218

219

class Block(dict):

219

class Block(dict):

220

"""represent a block of the Trie

220

"""represent a block of the Trie

221

222

contains up to 16 entry indexed from 0 to 15"""

222

contains up to 16 entry indexed from 0 to 15"""

223

224

def __iter__(self):

224

def __iter__(self):

225

return iter(self.get(i) for i in range(16))

225

return iter(self.get(i) for i in range(16))

226

227

228

def _build_trie(index):

228

def _build_trie(index):

229

"""build a nodemap trie

229

"""build a nodemap trie

230

231

The nodemap stores revision number for each unique prefix.

231

The nodemap stores revision number for each unique prefix.

232

233

Each block is a dictionary with keys in `[0, 15]`. Values are either

233

Each block is a dictionary with keys in `[0, 15]`. Values are either

234

another block or a revision number.

234

another block or a revision number.

235

"""

235

"""

236

root = Block()

236

root = Block()

237

for rev in range(len(index)):

237

for rev in range(len(index)):

238

hex = nodemod.hex(index[rev][7])

238

hex = nodemod.hex(index[rev][7])

239

_insert_into_block(index, 0, root, rev, hex)

239

_insert_into_block(index, 0, root, rev, hex)

240

return root

240

return root

241

242

243

def _insert_into_block(index, level, block, current_rev, current_hex):

243

def _insert_into_block(index, level, block, current_rev, current_hex):

244

"""insert a new revision in a block

244

"""insert a new revision in a block

245

246

index: the index we are adding revision for

246

index: the index we are adding revision for

247

level: the depth of the current block in the trie

247

level: the depth of the current block in the trie

248

block: the block currently being considered

248

block: the block currently being considered

249

current_rev: the revision number we are adding

249

current_rev: the revision number we are adding

250

current_hex: the hexadecimal representation of the of that revision

250

current_hex: the hexadecimal representation of the of that revision

251

"""

251

"""

252

hex_digit = _to_int(current_hex[level : level + 1])

252

hex_digit = _to_int(current_hex[level : level + 1])

253

entry = block.get(hex_digit)

253

entry = block.get(hex_digit)

254

if entry is None:

254

if entry is None:

255

# no entry, simply store the revision number

255

# no entry, simply store the revision number

256

block[hex_digit] = current_rev

256

block[hex_digit] = current_rev

257

elif isinstance(entry, dict):

257

elif isinstance(entry, dict):

258

# need to recurse to an underlying block

258

# need to recurse to an underlying block

259

_insert_into_block(index, level + 1, entry, current_rev, current_hex)

259

_insert_into_block(index, level + 1, entry, current_rev, current_hex)

260

else:

260

else:

261

# collision with a previously unique prefix, inserting new

261

# collision with a previously unique prefix, inserting new

262

# vertices to fit both entry.

262

# vertices to fit both entry.

263

other_hex = nodemod.hex(index[entry][7])

263

other_hex = nodemod.hex(index[entry][7])

264

other_rev = entry

264

other_rev = entry

265

new = Block()

265

new = Block()

266

block[hex_digit] = new

266

block[hex_digit] = new

267

_insert_into_block(index, level + 1, new, other_rev, other_hex)

267

_insert_into_block(index, level + 1, new, other_rev, other_hex)

268

_insert_into_block(index, level + 1, new, current_rev, current_hex)

268

_insert_into_block(index, level + 1, new, current_rev, current_hex)

269

270

271

def _persist_trie(root):

271

def _persist_trie(root):

272

"""turn a nodemap trie into persistent binary data

272

"""turn a nodemap trie into persistent binary data

273

274

See `_build_trie` for nodemap trie structure"""

274

See `_build_trie` for nodemap trie structure"""

275

block_map = {}

275

block_map = {}

276

chunks = []

276

chunks = []

277

for tn in _walk_trie(root):

277

for tn in _walk_trie(root):

278

block_map[id(tn)] = len(chunks)

278

block_map[id(tn)] = len(chunks)

279

chunks.append(_persist_block(tn, block_map))

279

chunks.append(_persist_block(tn, block_map))

280

return b''.join(chunks)

280

return b''.join(chunks)

281

282

283

def _walk_trie(block):

283

def _walk_trie(block):

284

"""yield all the block in a trie

284

"""yield all the block in a trie

285

286

Children blocks are always yield before their parent block.

286

Children blocks are always yield before their parent block.

287

"""

287

"""

288

for (_, item) in sorted(block.items()):

288

for (_, item) in sorted(block.items()):

289

if isinstance(item, dict):

289

if isinstance(item, dict):

290

for sub_block in _walk_trie(item):

290

for sub_block in _walk_trie(item):

291

yield sub_block

291

yield sub_block

292

yield block

292

yield block

293

294

295

def _persist_block(block_node, block_map):

295

def _persist_block(block_node, block_map):

296

"""produce persistent binary data for a single block

296

"""produce persistent binary data for a single block

297

298

Children block are assumed to be already persisted and present in

298

Children block are assumed to be already persisted and present in

299

block_map.

299

block_map.

300

"""

300

"""

301

data = tuple(_to_value(v, block_map) for v in block_node)

301

data = tuple(_to_value(v, block_map) for v in block_node)

302

return S_BLOCK.pack(*data)

302

return S_BLOCK.pack(*data)

303

304

305

def _to_value(item, block_map):

305

def _to_value(item, block_map):

306

"""persist any value as an integer"""

306

"""persist any value as an integer"""

307

if item is None:

307

if item is None:

308

return NO_ENTRY

308

return NO_ENTRY

309

elif isinstance(item, dict):

309

elif isinstance(item, dict):

310

return block_map[id(item)]

310

return block_map[id(item)]

311

else:

311

else:

312

return _transform_rev(item)

312

return _transform_rev(item)

313

314

315

def parse_data(data):

316

"""parse parse nodemap data into a nodemap Trie"""

317

if (len(data) % S_BLOCK.size) != 0:

318

msg = "nodemap data size is not a multiple of block size (%d): %d"

319

raise error.Abort(msg % (S_BLOCK.size, len(data)))

320

if not data:

321

return Block()

322

block_map = {}

323

new_blocks = []

324

for i in range(0, len(data), S_BLOCK.size):

325

block = Block()

326

ondisk_id = len(block_map)

327

block_map[ondisk_id] = block

328

block_data = data[i : i + S_BLOCK.size]

329

values = S_BLOCK.unpack(block_data)

330

new_blocks.append((block, values))

331

for b, values in new_blocks:

332

for idx, v in enumerate(values):

333

if v == NO_ENTRY:

334

continue

335

elif v >= 0:

336

b[idx] = block_map[v]

337

else:

338

b[idx] = _transform_rev(v)

339

return block

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # nodemap.py - nodemap related code and utilities
             #
             # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2019 George Racinet <georges.racinet@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             import re
             import struct
             from .. import (
                 error,
                 node as nodemod,
                 util,
             )
             class NodeMap(dict):
                 def __missing__(self, x):
                     raise error.RevlogError(b'unknown node: %s' % x)
             def persisted_data(revlog):
                 """read the nodemap for a revlog from disk"""
                 if revlog.nodemap_file is None:
                     return None
                 pdata = revlog.opener.tryread(revlog.nodemap_file)
                 if not pdata:
                     return None
                 offset = 0
                 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
                 if version != ONDISK_VERSION:
                     return None
                 offset += S_VERSION.size
                 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
                 offset += S_HEADER.size
                 uid = pdata[offset : offset + uuid_size]
                 filename = _rawdata_filepath(revlog, uid)
                 return revlog.opener.tryread(filename)
             def setup_persistent_nodemap(tr, revlog):
                 """Install whatever is needed transaction side to persist a nodemap on disk
                 (only actually persist the nodemap if this is relevant for this revlog)
                 """
                 if revlog._inline:
                     return  # inlined revlog are too small for this to be relevant
                 if revlog.nodemap_file is None:
                     return  # we do not use persistent_nodemap on this revlog
                 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
                 if tr.hasfinalize(callback_id):
                     return  # no need to register again
                 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
             def _persist_nodemap(tr, revlog):
                 """Write nodemap data on disk for a given revlog
                 """
                 if getattr(revlog, 'filteredrevs', ()):
                     raise error.ProgrammingError(
                         "cannot persist nodemap of a filtered changelog"
                     )
                 if revlog.nodemap_file is None:
                     msg = "calling persist nodemap on a revlog without the feature enableb"
                     raise error.ProgrammingError(msg)
                 if util.safehasattr(revlog.index, "nodemap_data_all"):
                     data = revlog.index.nodemap_data_all()
                 else:
                     data = persistent_data(revlog.index)
                 uid = _make_uid()
                 datafile = _rawdata_filepath(revlog, uid)
                 olds = _other_rawdata_filepath(revlog, uid)
                 if olds:
                     realvfs = getattr(revlog, '_realopener', revlog.opener)
                     def cleanup(tr):
                         for oldfile in olds:
                             realvfs.tryunlink(oldfile)
                     callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
                     tr.addpostclose(callback_id, cleanup)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(datafile, b'w') as fd:
                     fd.write(data)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
                     fp.write(_serialize_docket(uid))
                 # EXP-TODO: if the transaction abort, we should remove the new data and
                 # reinstall the old one.
             ### Nodemap docket file
             #
             # The nodemap data are stored on disk using 2 files:
             #
             # * a raw data files containing a persistent nodemap
             #   (see `Nodemap Trie` section)
             #
             # * a small "docket" file containing medatadata
             #
             # While the nodemap data can be multiple tens of megabytes, the "docket" is
             # small, it is easy to update it automatically or to duplicated its content
             # during a transaction.
             #
             # Multiple raw data can exist at the same time (The currently valid one and a
             # new one beind used by an in progress transaction). To accomodate this, the
             # filename hosting the raw data has a variable parts. The exact filename is
             # specified inside the "docket" file.
             #
             # The docket file contains information to find, qualify and validate the raw
             # data. Its content is currently very light, but it will expand as the on disk
             # nodemap gains the necessary features to be used in production.
             # version 0 is experimental, no BC garantee, do no use outside of tests.
             ONDISK_VERSION = 0
             S_VERSION = struct.Struct(">B")
             S_HEADER = struct.Struct(">B")
             ID_SIZE = 8
             def _make_uid():
                 """return a new unique identifier.
                 The identifier is random and composed of ascii characters."""
                 return nodemod.hex(os.urandom(ID_SIZE))
             def _serialize_docket(uid):
                 """return serialized bytes for a docket using the passed uid"""
                 data = []
                 data.append(S_VERSION.pack(ONDISK_VERSION))
                 data.append(S_HEADER.pack(len(uid)))
                 data.append(uid)
                 return b''.join(data)
             def _rawdata_filepath(revlog, uid):
                 """The (vfs relative) nodemap's rawdata file for a given uid"""
                 prefix = revlog.nodemap_file[:-2]
                 return b"%s-%s.nd" % (prefix, uid)
             def _other_rawdata_filepath(revlog, uid):
                 prefix = revlog.nodemap_file[:-2]
                 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
                 new_file_path = _rawdata_filepath(revlog, uid)
                 new_file_name = revlog.opener.basename(new_file_path)
                 dirpath = revlog.opener.dirname(new_file_path)
                 others = []
                 for f in revlog.opener.listdir(dirpath):
                     if pattern.match(f) and f != new_file_name:
                         others.append(f)
                 return others
             ### Nodemap Trie
             #
             # This is a simple reference implementation to compute and persist a nodemap
             # trie. This reference implementation is write only. The python version of this
             # is not expected to be actually used, since it wont provide performance
             # improvement over existing non-persistent C implementation.
             #
             # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
             # revision can be adressed using its node shortest prefix.
             #
             # The trie is stored as a sequence of block. Each block contains 16 entries
             # (signed 64bit integer, big endian). Each entry can be one of the following:
             #
             #  * value >=  0 -> index of sub-block
             #  * value == -1 -> no value
             #  * value <  -1 -> a revision value: rev = -(value+10)
             #
             # The implementation focus on simplicity, not on performance. A Rust
             # implementation should provide a efficient version of the same binary
             # persistence. This reference python implementation is never meant to be
             # extensively use in production.
             def persistent_data(index):
                 """return the persistent binary form for a nodemap for a given index
                 """
                 trie = _build_trie(index)
                 return _persist_trie(trie)
             S_BLOCK = struct.Struct(">" + ("l" * 16))
             NO_ENTRY = -1
             # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
             REV_OFFSET = 2
             def _transform_rev(rev):
                 """Return the number used to represent the rev in the tree.
                 (or retrieve a rev number from such representation)
                 Note that this is an involution, a function equal to its inverse (i.e.
                 which gives the identity when applied to itself).
                 """
                 return -(rev + REV_OFFSET)
             def _to_int(hex_digit):
                 """turn an hexadecimal digit into a proper integer"""
                 return int(hex_digit, 16)
             class Block(dict):
                 """represent a block of the Trie
                 contains up to 16 entry indexed from 0 to 15"""
                 def __iter__(self):
                     return iter(self.get(i) for i in range(16))
             def _build_trie(index):
                 """build a nodemap trie
                 The nodemap stores revision number for each unique prefix.
                 Each block is a dictionary with keys in `[0, 15]`. Values are either
                 another block or a revision number.
                 """
                 root = Block()
                 for rev in range(len(index)):
                     hex = nodemod.hex(index[rev][7])
                     _insert_into_block(index, 0, root, rev, hex)
                 return root
             def _insert_into_block(index, level, block, current_rev, current_hex):
                 """insert a new revision in a block
                 index: the index we are adding revision for
                 level: the depth of the current block in the trie
                 block: the block currently being considered
                 current_rev: the revision number we are adding
                 current_hex: the hexadecimal representation of the of that revision
                 """
                 hex_digit = _to_int(current_hex[level : level + 1])
                 entry = block.get(hex_digit)
                 if entry is None:
                     # no entry, simply store the revision number
                     block[hex_digit] = current_rev
                 elif isinstance(entry, dict):
                     # need to recurse to an underlying block
                     _insert_into_block(index, level + 1, entry, current_rev, current_hex)
                 else:
                     # collision with a previously unique prefix, inserting new
                     # vertices to fit both entry.
                     other_hex = nodemod.hex(index[entry][7])
                     other_rev = entry
                     new = Block()
                     block[hex_digit] = new
                     _insert_into_block(index, level + 1, new, other_rev, other_hex)
                     _insert_into_block(index, level + 1, new, current_rev, current_hex)
             def _persist_trie(root):
                 """turn a nodemap trie into persistent binary data
                 See `_build_trie` for nodemap trie structure"""
                 block_map = {}
                 chunks = []
                 for tn in _walk_trie(root):
                     block_map[id(tn)] = len(chunks)
                     chunks.append(_persist_block(tn, block_map))
                 return b''.join(chunks)
             def _walk_trie(block):
                 """yield all the block in a trie
                 Children blocks are always yield before their parent block.
                 """
                 for (_, item) in sorted(block.items()):
                     if isinstance(item, dict):
                         for sub_block in _walk_trie(item):
                             yield sub_block
                 yield block
             def _persist_block(block_node, block_map):
                 """produce persistent binary data for a single block
                 Children block are assumed to be already persisted and present in
                 block_map.
                 """
                 data = tuple(_to_value(v, block_map) for v in block_node)
                 return S_BLOCK.pack(*data)
             def _to_value(item, block_map):
                 """persist any value as an integer"""
                 if item is None:
                     return NO_ENTRY
                 elif isinstance(item, dict):
                     return block_map[id(item)]
                 else:
                     return _transform_rev(item)
+            def parse_data(data):
+                """parse parse nodemap data into a nodemap Trie"""
+                if (len(data) % S_BLOCK.size) != 0:
+                    msg = "nodemap data size is not a multiple of block size (%d): %d"
+                    raise error.Abort(msg % (S_BLOCK.size, len(data)))
+                if not data:
+                    return Block()
+                block_map = {}
+                new_blocks = []
+                for i in range(0, len(data), S_BLOCK.size):
+                    block = Block()
+                    ondisk_id = len(block_map)
+                    block_map[ondisk_id] = block
+                    block_data = data[i : i + S_BLOCK.size]
+                    values = S_BLOCK.unpack(block_data)
+                    new_blocks.append((block, values))
+                for b, values in new_blocks:
+                    for idx, v in enumerate(values):
+                        if v == NO_ENTRY:
+                            continue
+                        elif v >= 0:
+                            b[idx] = block_map[v]
+                        else:
+                            b[idx] = _transform_rev(v)
+                return block