upstream/mercurial-mirror Commit - r44803:72c15641

1

# nodemap.py - nodemap related code and utilities

1

# nodemap.py - nodemap related code and utilities

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import os

11

import os

12

import re

12

import re

13

import struct

13

import struct

14

15

from .. import (

15

from .. import (

16

error,

16

error,

17

node as nodemod,

17

node as nodemod,

18

util,

18

util,

19

)

19

)

20

21

22

class NodeMap(dict):

22

class NodeMap(dict):

23

def __missing__(self, x):

23

def __missing__(self, x):

24

raise error.RevlogError(b'unknown node: %s' % x)

24

raise error.RevlogError(b'unknown node: %s' % x)

25

26

27

def persisted_data(revlog):

27

def persisted_data(revlog):

28

"""read the nodemap for a revlog from disk"""

28

"""read the nodemap for a revlog from disk"""

29

if revlog.nodemap_file is None:

29

if revlog.nodemap_file is None:

30

return None

30

return None

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

31

pdata = revlog.opener.tryread(revlog.nodemap_file)

32

if not pdata:

32

if not pdata:

33

return None

33

return None

34

offset = 0

34

offset = 0

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

35

(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])

36

if version != ONDISK_VERSION:

36

if version != ONDISK_VERSION:

37

return None

37

return None

38

offset += S_VERSION.size

38

offset += S_VERSION.size

39

(uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

39

(uid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])

40

offset += S_HEADER.size

40

offset += S_HEADER.size

41

uid = pdata[offset : offset + uuid_size]

41

docket = NodeMapDocket(pdata[offset : offset + uid_size])

42

43

filename = _rawdata_filepath(revlog, uid)

43

filename = _rawdata_filepath(revlog, docket)

44

return revlog.opener.tryread(filename)

44

return revlog.opener.tryread(filename)

45

46

47

def setup_persistent_nodemap(tr, revlog):

47

def setup_persistent_nodemap(tr, revlog):

48

"""Install whatever is needed transaction side to persist a nodemap on disk

48

"""Install whatever is needed transaction side to persist a nodemap on disk

49

50

(only actually persist the nodemap if this is relevant for this revlog)

50

(only actually persist the nodemap if this is relevant for this revlog)

51

"""

51

"""

52

if revlog._inline:

52

if revlog._inline:

53

return # inlined revlog are too small for this to be relevant

53

return # inlined revlog are too small for this to be relevant

54

if revlog.nodemap_file is None:

54

if revlog.nodemap_file is None:

55

return # we do not use persistent_nodemap on this revlog

55

return # we do not use persistent_nodemap on this revlog

56

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

56

callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file

57

if tr.hasfinalize(callback_id):

57

if tr.hasfinalize(callback_id):

58

return # no need to register again

58

return # no need to register again

59

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

59

tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))

60

61

62

def _persist_nodemap(tr, revlog):

62

def _persist_nodemap(tr, revlog):

63

"""Write nodemap data on disk for a given revlog

63

"""Write nodemap data on disk for a given revlog

64

"""

64

"""

65

if getattr(revlog, 'filteredrevs', ()):

65

if getattr(revlog, 'filteredrevs', ()):

66

raise error.ProgrammingError(

66

raise error.ProgrammingError(

67

"cannot persist nodemap of a filtered changelog"

67

"cannot persist nodemap of a filtered changelog"

68

)

68

)

69

if revlog.nodemap_file is None:

69

if revlog.nodemap_file is None:

70

msg = "calling persist nodemap on a revlog without the feature enableb"

70

msg = "calling persist nodemap on a revlog without the feature enableb"

71

raise error.ProgrammingError(msg)

71

raise error.ProgrammingError(msg)

72

if util.safehasattr(revlog.index, "nodemap_data_all"):

72

if util.safehasattr(revlog.index, "nodemap_data_all"):

73

data = revlog.index.nodemap_data_all()

73

data = revlog.index.nodemap_data_all()

74

else:

74

else:

75

data = persistent_data(revlog.index)

75

data = persistent_data(revlog.index)

76

uid = _make_uid()

76

target_docket = NodeMapDocket()

77

datafile = _rawdata_filepath(revlog, ~~uid~~)

77

datafile = _rawdata_filepath(revlog, target_docket)

78

olds = _other_rawdata_filepath(revlog, ~~uid~~)

78

olds = _other_rawdata_filepath(revlog, target_docket)

79

if olds:

79

if olds:

80

realvfs = getattr(revlog, '_realopener', revlog.opener)

80

realvfs = getattr(revlog, '_realopener', revlog.opener)

81

82

def cleanup(tr):

82

def cleanup(tr):

83

for oldfile in olds:

83

for oldfile in olds:

84

realvfs.tryunlink(oldfile)

84

realvfs.tryunlink(oldfile)

85

86

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

86

callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file

87

tr.addpostclose(callback_id, cleanup)

87

tr.addpostclose(callback_id, cleanup)

88

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

88

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

89

# store vfs

89

# store vfs

90

with revlog.opener(datafile, b'w') as fd:

90

with revlog.opener(datafile, b'w') as fd:

91

fd.write(data)

91

fd.write(data)

92

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

92

# EXP-TODO: if this is a cache, this should use a cache vfs, not a

93

# store vfs

93

# store vfs

94

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

94

with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:

95

fp.write(_serialize~~_docket~~(~~uid~~))

95

fp.write(target_docket.serialize())

96

# EXP-TODO: if the transaction abort, we should remove the new data and

96

# EXP-TODO: if the transaction abort, we should remove the new data and

97

# reinstall the old one.

97

# reinstall the old one.

98

99

100

### Nodemap docket file

100

### Nodemap docket file

101

#

101

#

102

# The nodemap data are stored on disk using 2 files:

102

# The nodemap data are stored on disk using 2 files:

103

#

103

#

104

# * a raw data files containing a persistent nodemap

104

# * a raw data files containing a persistent nodemap

105

# (see `Nodemap Trie` section)

105

# (see `Nodemap Trie` section)

106

#

106

#

107

# * a small "docket" file containing medatadata

107

# * a small "docket" file containing medatadata

108

#

108

#

109

# While the nodemap data can be multiple tens of megabytes, the "docket" is

109

# While the nodemap data can be multiple tens of megabytes, the "docket" is

110

# small, it is easy to update it automatically or to duplicated its content

110

# small, it is easy to update it automatically or to duplicated its content

111

# during a transaction.

111

# during a transaction.

112

#

112

#

113

# Multiple raw data can exist at the same time (The currently valid one and a

113

# Multiple raw data can exist at the same time (The currently valid one and a

114

# new one beind used by an in progress transaction). To accomodate this, the

114

# new one beind used by an in progress transaction). To accomodate this, the

115

# filename hosting the raw data has a variable parts. The exact filename is

115

# filename hosting the raw data has a variable parts. The exact filename is

116

# specified inside the "docket" file.

116

# specified inside the "docket" file.

117

#

117

#

118

# The docket file contains information to find, qualify and validate the raw

118

# The docket file contains information to find, qualify and validate the raw

119

# data. Its content is currently very light, but it will expand as the on disk

119

# data. Its content is currently very light, but it will expand as the on disk

120

# nodemap gains the necessary features to be used in production.

120

# nodemap gains the necessary features to be used in production.

121

122

# version 0 is experimental, no BC garantee, do no use outside of tests.

122

# version 0 is experimental, no BC garantee, do no use outside of tests.

123

ONDISK_VERSION = 0

123

ONDISK_VERSION = 0

124

125

S_VERSION = struct.Struct(">B")

125

S_VERSION = struct.Struct(">B")

126

S_HEADER = struct.Struct(">B")

126

S_HEADER = struct.Struct(">B")

127

128

ID_SIZE = 8

128

ID_SIZE = 8

129

130

131

def _make_uid():

131

def _make_uid():

132

"""return a new unique identifier.

132

"""return a new unique identifier.

133

134

The identifier is random and composed of ascii characters."""

134

The identifier is random and composed of ascii characters."""

135

return nodemod.hex(os.urandom(ID_SIZE))

135

return nodemod.hex(os.urandom(ID_SIZE))

136

137

138

def _serialize_docket(uid):

138

class NodeMapDocket(object):

139

"""return serialized bytes for a docket using the passed uid"""

139

"""metadata associated with persistent nodemap data

140

data = []

140

141

data.append(S_VERSION.pack(ONDISK_VERSION))

141

The persistent data may come from disk or be on their way to disk.

142

data.append(S_HEADER.pack(len(uid)))

142

"""

143

data.append(uid)

143

144

return b''.join(data)

144

def __init__(self, uid=None):

145

if uid is None:

146

uid = _make_uid()

147

self.uid = uid

148

149

def copy(self):

150

return NodeMapDocket(uid=self.uid)

151

152

def serialize(self):

153

"""return serialized bytes for a docket using the passed uid"""

154

data = []

155

data.append(S_VERSION.pack(ONDISK_VERSION))

156

data.append(S_HEADER.pack(len(self.uid)))

157

data.append(self.uid)

158

return b''.join(data)

145

159

146

160

147

def _rawdata_filepath(revlog, uid):

161

def _rawdata_filepath(revlog, docket):

148

"""The (vfs relative) nodemap's rawdata file for a given uid"""

162

"""The (vfs relative) nodemap's rawdata file for a given uid"""

149

prefix = revlog.nodemap_file[:-2]

163

prefix = revlog.nodemap_file[:-2]

150

return b"%s-%s.nd" % (prefix, uid)

164

return b"%s-%s.nd" % (prefix, docket.uid)

151

165

152

166

153

def _other_rawdata_filepath(revlog, uid):

167

def _other_rawdata_filepath(revlog, docket):

154

prefix = revlog.nodemap_file[:-2]

168

prefix = revlog.nodemap_file[:-2]

155

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

169

pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)

156

new_file_path = _rawdata_filepath(revlog, uid)

170

new_file_path = _rawdata_filepath(revlog, docket)

157

new_file_name = revlog.opener.basename(new_file_path)

171

new_file_name = revlog.opener.basename(new_file_path)

158

dirpath = revlog.opener.dirname(new_file_path)

172

dirpath = revlog.opener.dirname(new_file_path)

159

others = []

173

others = []

160

for f in revlog.opener.listdir(dirpath):

174

for f in revlog.opener.listdir(dirpath):

161

if pattern.match(f) and f != new_file_name:

175

if pattern.match(f) and f != new_file_name:

162

others.append(f)

176

others.append(f)

163

return others

177

return others

164

178

165

179

166

### Nodemap Trie

180

### Nodemap Trie

167

#

181

#

168

# This is a simple reference implementation to compute and persist a nodemap

182

# This is a simple reference implementation to compute and persist a nodemap

169

# trie. This reference implementation is write only. The python version of this

183

# trie. This reference implementation is write only. The python version of this

170

# is not expected to be actually used, since it wont provide performance

184

# is not expected to be actually used, since it wont provide performance

171

# improvement over existing non-persistent C implementation.

185

# improvement over existing non-persistent C implementation.

172

#

186

#

173

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

187

# The nodemap is persisted as Trie using 4bits-address/16-entries block. each

174

# revision can be adressed using its node shortest prefix.

188

# revision can be adressed using its node shortest prefix.

175

#

189

#

176

# The trie is stored as a sequence of block. Each block contains 16 entries

190

# The trie is stored as a sequence of block. Each block contains 16 entries

177

# (signed 64bit integer, big endian). Each entry can be one of the following:

191

# (signed 64bit integer, big endian). Each entry can be one of the following:

178

#

192

#

179

# * value >= 0 -> index of sub-block

193

# * value >= 0 -> index of sub-block

180

# * value == -1 -> no value

194

# * value == -1 -> no value

181

# * value < -1 -> a revision value: rev = -(value+10)

195

# * value < -1 -> a revision value: rev = -(value+10)

182

#

196

#

183

# The implementation focus on simplicity, not on performance. A Rust

197

# The implementation focus on simplicity, not on performance. A Rust

184

# implementation should provide a efficient version of the same binary

198

# implementation should provide a efficient version of the same binary

185

# persistence. This reference python implementation is never meant to be

199

# persistence. This reference python implementation is never meant to be

186

# extensively use in production.

200

# extensively use in production.

187

201

188

202

189

def persistent_data(index):

203

def persistent_data(index):

190

"""return the persistent binary form for a nodemap for a given index

204

"""return the persistent binary form for a nodemap for a given index

191

"""

205

"""

192

trie = _build_trie(index)

206

trie = _build_trie(index)

193

return _persist_trie(trie)

207

return _persist_trie(trie)

194

208

195

209

196

S_BLOCK = struct.Struct(">" + ("l" * 16))

210

S_BLOCK = struct.Struct(">" + ("l" * 16))

197

211

198

NO_ENTRY = -1

212

NO_ENTRY = -1

199

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

213

# rev 0 need to be -2 because 0 is used by block, -1 is a special value.

200

REV_OFFSET = 2

214

REV_OFFSET = 2

201

215

202

216

203

def _transform_rev(rev):

217

def _transform_rev(rev):

204

"""Return the number used to represent the rev in the tree.

218

"""Return the number used to represent the rev in the tree.

205

219

206

(or retrieve a rev number from such representation)

220

(or retrieve a rev number from such representation)

207

221

208

Note that this is an involution, a function equal to its inverse (i.e.

222

Note that this is an involution, a function equal to its inverse (i.e.

209

which gives the identity when applied to itself).

223

which gives the identity when applied to itself).

210

"""

224

"""

211

return -(rev + REV_OFFSET)

225

return -(rev + REV_OFFSET)

212

226

213

227

214

def _to_int(hex_digit):

228

def _to_int(hex_digit):

215

"""turn an hexadecimal digit into a proper integer"""

229

"""turn an hexadecimal digit into a proper integer"""

216

return int(hex_digit, 16)

230

return int(hex_digit, 16)

217

231

218

232

219

class Block(dict):

233

class Block(dict):

220

"""represent a block of the Trie

234

"""represent a block of the Trie

221

235

222

contains up to 16 entry indexed from 0 to 15"""

236

contains up to 16 entry indexed from 0 to 15"""

223

237

224

def __init__(self):

238

def __init__(self):

225

super(Block, self).__init__()

239

super(Block, self).__init__()

226

# If this block exist on disk, here is its ID

240

# If this block exist on disk, here is its ID

227

self.ondisk_id = None

241

self.ondisk_id = None

228

242

229

def __iter__(self):

243

def __iter__(self):

230

return iter(self.get(i) for i in range(16))

244

return iter(self.get(i) for i in range(16))

231

245

232

246

233

def _build_trie(index):

247

def _build_trie(index):

234

"""build a nodemap trie

248

"""build a nodemap trie

235

249

236

The nodemap stores revision number for each unique prefix.

250

The nodemap stores revision number for each unique prefix.

237

251

238

Each block is a dictionary with keys in `[0, 15]`. Values are either

252

Each block is a dictionary with keys in `[0, 15]`. Values are either

239

another block or a revision number.

253

another block or a revision number.

240

"""

254

"""

241

root = Block()

255

root = Block()

242

for rev in range(len(index)):

256

for rev in range(len(index)):

243

hex = nodemod.hex(index[rev][7])

257

hex = nodemod.hex(index[rev][7])

244

_insert_into_block(index, 0, root, rev, hex)

258

_insert_into_block(index, 0, root, rev, hex)

245

return root

259

return root

246

260

247

261

248

def _insert_into_block(index, level, block, current_rev, current_hex):

262

def _insert_into_block(index, level, block, current_rev, current_hex):

249

"""insert a new revision in a block

263

"""insert a new revision in a block

250

264

251

index: the index we are adding revision for

265

index: the index we are adding revision for

252

level: the depth of the current block in the trie

266

level: the depth of the current block in the trie

253

block: the block currently being considered

267

block: the block currently being considered

254

current_rev: the revision number we are adding

268

current_rev: the revision number we are adding

255

current_hex: the hexadecimal representation of the of that revision

269

current_hex: the hexadecimal representation of the of that revision

256

"""

270

"""

257

hex_digit = _to_int(current_hex[level : level + 1])

271

hex_digit = _to_int(current_hex[level : level + 1])

258

entry = block.get(hex_digit)

272

entry = block.get(hex_digit)

259

if entry is None:

273

if entry is None:

260

# no entry, simply store the revision number

274

# no entry, simply store the revision number

261

block[hex_digit] = current_rev

275

block[hex_digit] = current_rev

262

elif isinstance(entry, dict):

276

elif isinstance(entry, dict):

263

# need to recurse to an underlying block

277

# need to recurse to an underlying block

264

_insert_into_block(index, level + 1, entry, current_rev, current_hex)

278

_insert_into_block(index, level + 1, entry, current_rev, current_hex)

265

else:

279

else:

266

# collision with a previously unique prefix, inserting new

280

# collision with a previously unique prefix, inserting new

267

# vertices to fit both entry.

281

# vertices to fit both entry.

268

other_hex = nodemod.hex(index[entry][7])

282

other_hex = nodemod.hex(index[entry][7])

269

other_rev = entry

283

other_rev = entry

270

new = Block()

284

new = Block()

271

block[hex_digit] = new

285

block[hex_digit] = new

272

_insert_into_block(index, level + 1, new, other_rev, other_hex)

286

_insert_into_block(index, level + 1, new, other_rev, other_hex)

273

_insert_into_block(index, level + 1, new, current_rev, current_hex)

287

_insert_into_block(index, level + 1, new, current_rev, current_hex)

274

288

275

289

276

def _persist_trie(root):

290

def _persist_trie(root):

277

"""turn a nodemap trie into persistent binary data

291

"""turn a nodemap trie into persistent binary data

278

292

279

See `_build_trie` for nodemap trie structure"""

293

See `_build_trie` for nodemap trie structure"""

280

block_map = {}

294

block_map = {}

281

chunks = []

295

chunks = []

282

for tn in _walk_trie(root):

296

for tn in _walk_trie(root):

283

block_map[id(tn)] = len(chunks)

297

block_map[id(tn)] = len(chunks)

284

chunks.append(_persist_block(tn, block_map))

298

chunks.append(_persist_block(tn, block_map))

285

return b''.join(chunks)

299

return b''.join(chunks)

286

300

287

301

288

def _walk_trie(block):

302

def _walk_trie(block):

289

"""yield all the block in a trie

303

"""yield all the block in a trie

290

304

291

Children blocks are always yield before their parent block.

305

Children blocks are always yield before their parent block.

292

"""

306

"""

293

for (_, item) in sorted(block.items()):

307

for (_, item) in sorted(block.items()):

294

if isinstance(item, dict):

308

if isinstance(item, dict):

295

for sub_block in _walk_trie(item):

309

for sub_block in _walk_trie(item):

296

yield sub_block

310

yield sub_block

297

yield block

311

yield block

298

312

299

313

300

def _persist_block(block_node, block_map):

314

def _persist_block(block_node, block_map):

301

"""produce persistent binary data for a single block

315

"""produce persistent binary data for a single block

302

316

303

Children block are assumed to be already persisted and present in

317

Children block are assumed to be already persisted and present in

304

block_map.

318

block_map.

305

"""

319

"""

306

data = tuple(_to_value(v, block_map) for v in block_node)

320

data = tuple(_to_value(v, block_map) for v in block_node)

307

return S_BLOCK.pack(*data)

321

return S_BLOCK.pack(*data)

308

322

309

323

310

def _to_value(item, block_map):

324

def _to_value(item, block_map):

311

"""persist any value as an integer"""

325

"""persist any value as an integer"""

312

if item is None:

326

if item is None:

313

return NO_ENTRY

327

return NO_ENTRY

314

elif isinstance(item, dict):

328

elif isinstance(item, dict):

315

return block_map[id(item)]

329

return block_map[id(item)]

316

else:

330

else:

317

return _transform_rev(item)

331

return _transform_rev(item)

318

332

319

333

320

def parse_data(data):

334

def parse_data(data):

321

"""parse parse nodemap data into a nodemap Trie"""

335

"""parse parse nodemap data into a nodemap Trie"""

322

if (len(data) % S_BLOCK.size) != 0:

336

if (len(data) % S_BLOCK.size) != 0:

323

msg = "nodemap data size is not a multiple of block size (%d): %d"

337

msg = "nodemap data size is not a multiple of block size (%d): %d"

324

raise error.Abort(msg % (S_BLOCK.size, len(data)))

338

raise error.Abort(msg % (S_BLOCK.size, len(data)))

325

if not data:

339

if not data:

326

return Block()

340

return Block()

327

block_map = {}

341

block_map = {}

328

new_blocks = []

342

new_blocks = []

329

for i in range(0, len(data), S_BLOCK.size):

343

for i in range(0, len(data), S_BLOCK.size):

330

block = Block()

344

block = Block()

331

block.ondisk_id = len(block_map)

345

block.ondisk_id = len(block_map)

332

block_map[block.ondisk_id] = block

346

block_map[block.ondisk_id] = block

333

block_data = data[i : i + S_BLOCK.size]

347

block_data = data[i : i + S_BLOCK.size]

334

values = S_BLOCK.unpack(block_data)

348

values = S_BLOCK.unpack(block_data)

335

new_blocks.append((block, values))

349

new_blocks.append((block, values))

336

for b, values in new_blocks:

350

for b, values in new_blocks:

337

for idx, v in enumerate(values):

351

for idx, v in enumerate(values):

338

if v == NO_ENTRY:

352

if v == NO_ENTRY:

339

continue

353

continue

340

elif v >= 0:

354

elif v >= 0:

341

b[idx] = block_map[v]

355

b[idx] = block_map[v]

342

else:

356

else:

343

b[idx] = _transform_rev(v)

357

b[idx] = _transform_rev(v)

344

return block

358

return block

345

359

346

360

347

# debug utility

361

# debug utility

348

362

349

363

350

def check_data(ui, index, data):

364

def check_data(ui, index, data):

351

"""verify that the provided nodemap data are valid for the given idex"""

365

"""verify that the provided nodemap data are valid for the given idex"""

352

ret = 0

366

ret = 0

353

ui.status((b"revision in index: %d\n") % len(index))

367

ui.status((b"revision in index: %d\n") % len(index))

354

root = parse_data(data)

368

root = parse_data(data)

355

all_revs = set(_all_revisions(root))

369

all_revs = set(_all_revisions(root))

356

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

370

ui.status((b"revision in nodemap: %d\n") % len(all_revs))

357

for r in range(len(index)):

371

for r in range(len(index)):

358

if r not in all_revs:

372

if r not in all_revs:

359

msg = b" revision missing from nodemap: %d\n" % r

373

msg = b" revision missing from nodemap: %d\n" % r

360

ui.write_err(msg)

374

ui.write_err(msg)

361

ret = 1

375

ret = 1

362

else:

376

else:

363

all_revs.remove(r)

377

all_revs.remove(r)

364

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

378

nm_rev = _find_node(root, nodemod.hex(index[r][7]))

365

if nm_rev is None:

379

if nm_rev is None:

366

msg = b" revision node does not match any entries: %d\n" % r

380

msg = b" revision node does not match any entries: %d\n" % r

367

ui.write_err(msg)

381

ui.write_err(msg)

368

ret = 1

382

ret = 1

369

elif nm_rev != r:

383

elif nm_rev != r:

370

msg = (

384

msg = (

371

b" revision node does not match the expected revision: "

385

b" revision node does not match the expected revision: "

372

b"%d != %d\n" % (r, nm_rev)

386

b"%d != %d\n" % (r, nm_rev)

373

)

387

)

374

ui.write_err(msg)

388

ui.write_err(msg)

375

ret = 1

389

ret = 1

376

390

377

if all_revs:

391

if all_revs:

378

for r in sorted(all_revs):

392

for r in sorted(all_revs):

379

msg = b" extra revision in nodemap: %d\n" % r

393

msg = b" extra revision in nodemap: %d\n" % r

380

ui.write_err(msg)

394

ui.write_err(msg)

381

ret = 1

395

ret = 1

382

return ret

396

return ret

383

397

384

398

385

def _all_revisions(root):

399

def _all_revisions(root):

386

"""return all revisions stored in a Trie"""

400

"""return all revisions stored in a Trie"""

387

for block in _walk_trie(root):

401

for block in _walk_trie(root):

388

for v in block:

402

for v in block:

389

if v is None or isinstance(v, Block):

403

if v is None or isinstance(v, Block):

390

continue

404

continue

391

yield v

405

yield v

392

406

393

407

394

def _find_node(block, node):

408

def _find_node(block, node):

395

"""find the revision associated with a given node"""

409

"""find the revision associated with a given node"""

396

entry = block.get(_to_int(node[0:1]))

410

entry = block.get(_to_int(node[0:1]))

397

if isinstance(entry, dict):

411

if isinstance(entry, dict):

398

return _find_node(entry, node[1:])

412

return _find_node(entry, node[1:])

399

return entry

413

return entry

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # nodemap.py - nodemap related code and utilities
             #
             # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
             # Copyright 2019 George Racinet <georges.racinet@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import os
             import re
             import struct
             from .. import (
                 error,
                 node as nodemod,
                 util,
             )
             class NodeMap(dict):
                 def __missing__(self, x):
                     raise error.RevlogError(b'unknown node: %s' % x)
             def persisted_data(revlog):
                 """read the nodemap for a revlog from disk"""
                 if revlog.nodemap_file is None:
                     return None
                 pdata = revlog.opener.tryread(revlog.nodemap_file)
                 if not pdata:
                     return None
                 offset = 0
                 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
                 if version != ONDISK_VERSION:
                     return None
                 offset += S_VERSION.size
-                (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
+                (uid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
                 offset += S_HEADER.size
-                uid = pdata[offset : offset + uuid_size]
+                docket = NodeMapDocket(pdata[offset : offset + uid_size])
-                filename = _rawdata_filepath(revlog, uid)
+                filename = _rawdata_filepath(revlog, docket)
                 return revlog.opener.tryread(filename)
             def setup_persistent_nodemap(tr, revlog):
                 """Install whatever is needed transaction side to persist a nodemap on disk
                 (only actually persist the nodemap if this is relevant for this revlog)
                 """
                 if revlog._inline:
                     return  # inlined revlog are too small for this to be relevant
                 if revlog.nodemap_file is None:
                     return  # we do not use persistent_nodemap on this revlog
                 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
                 if tr.hasfinalize(callback_id):
                     return  # no need to register again
                 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
             def _persist_nodemap(tr, revlog):
                 """Write nodemap data on disk for a given revlog
                 """
                 if getattr(revlog, 'filteredrevs', ()):
                     raise error.ProgrammingError(
                         "cannot persist nodemap of a filtered changelog"
                     )
                 if revlog.nodemap_file is None:
                     msg = "calling persist nodemap on a revlog without the feature enableb"
                     raise error.ProgrammingError(msg)
                 if util.safehasattr(revlog.index, "nodemap_data_all"):
                     data = revlog.index.nodemap_data_all()
                 else:
                     data = persistent_data(revlog.index)
-                uid = _make_uid()
+                target_docket = NodeMapDocket()
-                datafile = _rawdata_filepath(revlog, uid)
+                datafile = _rawdata_filepath(revlog, target_docket)
-                olds = _other_rawdata_filepath(revlog, uid)
+                olds = _other_rawdata_filepath(revlog, target_docket)
                 if olds:
                     realvfs = getattr(revlog, '_realopener', revlog.opener)
                     def cleanup(tr):
                         for oldfile in olds:
                             realvfs.tryunlink(oldfile)
                     callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
                     tr.addpostclose(callback_id, cleanup)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(datafile, b'w') as fd:
                     fd.write(data)
                 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
                 # store vfs
                 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-                    fp.write(_serialize_docket(uid))
+                    fp.write(target_docket.serialize())
                 # EXP-TODO: if the transaction abort, we should remove the new data and
                 # reinstall the old one.
             ### Nodemap docket file
             #
             # The nodemap data are stored on disk using 2 files:
             #
             # * a raw data files containing a persistent nodemap
             #   (see `Nodemap Trie` section)
             #
             # * a small "docket" file containing medatadata
             #
             # While the nodemap data can be multiple tens of megabytes, the "docket" is
             # small, it is easy to update it automatically or to duplicated its content
             # during a transaction.
             #
             # Multiple raw data can exist at the same time (The currently valid one and a
             # new one beind used by an in progress transaction). To accomodate this, the
             # filename hosting the raw data has a variable parts. The exact filename is
             # specified inside the "docket" file.
             #
             # The docket file contains information to find, qualify and validate the raw
             # data. Its content is currently very light, but it will expand as the on disk
             # nodemap gains the necessary features to be used in production.
             # version 0 is experimental, no BC garantee, do no use outside of tests.
             ONDISK_VERSION = 0
             S_VERSION = struct.Struct(">B")
             S_HEADER = struct.Struct(">B")
             ID_SIZE = 8
             def _make_uid():
                 """return a new unique identifier.
                 The identifier is random and composed of ascii characters."""
                 return nodemod.hex(os.urandom(ID_SIZE))
-            def _serialize_docket(uid):
+            class NodeMapDocket(object):
-                """return serialized bytes for a docket using the passed uid"""
+                """metadata associated with persistent nodemap data
-                data = []
-                data.append(S_VERSION.pack(ONDISK_VERSION))
+                The persistent data may come from disk or be on their way to disk.
-                data.append(S_HEADER.pack(len(uid)))
+                """
-                data.append(uid)
-                return b''.join(data)
+                def __init__(self, uid=None):
+                    if uid is None:
+                        uid = _make_uid()
+                    self.uid = uid
+                def copy(self):
+                    return NodeMapDocket(uid=self.uid)
+                def serialize(self):
+                    """return serialized bytes for a docket using the passed uid"""
+                    data = []
+                    data.append(S_VERSION.pack(ONDISK_VERSION))
+                    data.append(S_HEADER.pack(len(self.uid)))
+                    data.append(self.uid)
+                    return b''.join(data)
-            def _rawdata_filepath(revlog, uid):
+            def _rawdata_filepath(revlog, docket):
                 """The (vfs relative) nodemap's rawdata file for a given uid"""
                 prefix = revlog.nodemap_file[:-2]
-                return b"%s-%s.nd" % (prefix, uid)
+                return b"%s-%s.nd" % (prefix, docket.uid)
-            def _other_rawdata_filepath(revlog, uid):
+            def _other_rawdata_filepath(revlog, docket):
                 prefix = revlog.nodemap_file[:-2]
                 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
-                new_file_path = _rawdata_filepath(revlog, uid)
+                new_file_path = _rawdata_filepath(revlog, docket)
                 new_file_name = revlog.opener.basename(new_file_path)
                 dirpath = revlog.opener.dirname(new_file_path)
                 others = []
                 for f in revlog.opener.listdir(dirpath):
                     if pattern.match(f) and f != new_file_name:
                         others.append(f)
                 return others
             ### Nodemap Trie
             #
             # This is a simple reference implementation to compute and persist a nodemap
             # trie. This reference implementation is write only. The python version of this
             # is not expected to be actually used, since it wont provide performance
             # improvement over existing non-persistent C implementation.
             #
             # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
             # revision can be adressed using its node shortest prefix.
             #
             # The trie is stored as a sequence of block. Each block contains 16 entries
             # (signed 64bit integer, big endian). Each entry can be one of the following:
             #
             #  * value >=  0 -> index of sub-block
             #  * value == -1 -> no value
             #  * value <  -1 -> a revision value: rev = -(value+10)
             #
             # The implementation focus on simplicity, not on performance. A Rust
             # implementation should provide a efficient version of the same binary
             # persistence. This reference python implementation is never meant to be
             # extensively use in production.
             def persistent_data(index):
                 """return the persistent binary form for a nodemap for a given index
                 """
                 trie = _build_trie(index)
                 return _persist_trie(trie)
             S_BLOCK = struct.Struct(">" + ("l" * 16))
             NO_ENTRY = -1
             # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
             REV_OFFSET = 2
             def _transform_rev(rev):
                 """Return the number used to represent the rev in the tree.
                 (or retrieve a rev number from such representation)
                 Note that this is an involution, a function equal to its inverse (i.e.
                 which gives the identity when applied to itself).
                 """
                 return -(rev + REV_OFFSET)
             def _to_int(hex_digit):
                 """turn an hexadecimal digit into a proper integer"""
                 return int(hex_digit, 16)
             class Block(dict):
                 """represent a block of the Trie
                 contains up to 16 entry indexed from 0 to 15"""
                 def __init__(self):
                     super(Block, self).__init__()
                     # If this block exist on disk, here is its ID
                     self.ondisk_id = None
                 def __iter__(self):
                     return iter(self.get(i) for i in range(16))
             def _build_trie(index):
                 """build a nodemap trie
                 The nodemap stores revision number for each unique prefix.
                 Each block is a dictionary with keys in `[0, 15]`. Values are either
                 another block or a revision number.
                 """
                 root = Block()
                 for rev in range(len(index)):
                     hex = nodemod.hex(index[rev][7])
                     _insert_into_block(index, 0, root, rev, hex)
                 return root
             def _insert_into_block(index, level, block, current_rev, current_hex):
                 """insert a new revision in a block
                 index: the index we are adding revision for
                 level: the depth of the current block in the trie
                 block: the block currently being considered
                 current_rev: the revision number we are adding
                 current_hex: the hexadecimal representation of the of that revision
                 """
                 hex_digit = _to_int(current_hex[level : level + 1])
                 entry = block.get(hex_digit)
                 if entry is None:
                     # no entry, simply store the revision number
                     block[hex_digit] = current_rev
                 elif isinstance(entry, dict):
                     # need to recurse to an underlying block
                     _insert_into_block(index, level + 1, entry, current_rev, current_hex)
                 else:
                     # collision with a previously unique prefix, inserting new
                     # vertices to fit both entry.
                     other_hex = nodemod.hex(index[entry][7])
                     other_rev = entry
                     new = Block()
                     block[hex_digit] = new
                     _insert_into_block(index, level + 1, new, other_rev, other_hex)
                     _insert_into_block(index, level + 1, new, current_rev, current_hex)
             def _persist_trie(root):
                 """turn a nodemap trie into persistent binary data
                 See `_build_trie` for nodemap trie structure"""
                 block_map = {}
                 chunks = []
                 for tn in _walk_trie(root):
                     block_map[id(tn)] = len(chunks)
                     chunks.append(_persist_block(tn, block_map))
                 return b''.join(chunks)
             def _walk_trie(block):
                 """yield all the block in a trie
                 Children blocks are always yield before their parent block.
                 """
                 for (_, item) in sorted(block.items()):
                     if isinstance(item, dict):
                         for sub_block in _walk_trie(item):
                             yield sub_block
                 yield block
             def _persist_block(block_node, block_map):
                 """produce persistent binary data for a single block
                 Children block are assumed to be already persisted and present in
                 block_map.
                 """
                 data = tuple(_to_value(v, block_map) for v in block_node)
                 return S_BLOCK.pack(*data)
             def _to_value(item, block_map):
                 """persist any value as an integer"""
                 if item is None:
                     return NO_ENTRY
                 elif isinstance(item, dict):
                     return block_map[id(item)]
                 else:
                     return _transform_rev(item)
             def parse_data(data):
                 """parse parse nodemap data into a nodemap Trie"""
                 if (len(data) % S_BLOCK.size) != 0:
                     msg = "nodemap data size is not a multiple of block size (%d): %d"
                     raise error.Abort(msg % (S_BLOCK.size, len(data)))
                 if not data:
                     return Block()
                 block_map = {}
                 new_blocks = []
                 for i in range(0, len(data), S_BLOCK.size):
                     block = Block()
                     block.ondisk_id = len(block_map)
                     block_map[block.ondisk_id] = block
                     block_data = data[i : i + S_BLOCK.size]
                     values = S_BLOCK.unpack(block_data)
                     new_blocks.append((block, values))
                 for b, values in new_blocks:
                     for idx, v in enumerate(values):
                         if v == NO_ENTRY:
                             continue
                         elif v >= 0:
                             b[idx] = block_map[v]
                         else:
                             b[idx] = _transform_rev(v)
                 return block
             # debug utility
             def check_data(ui, index, data):
                 """verify that the provided nodemap data are valid for the given idex"""
                 ret = 0
                 ui.status((b"revision in index:   %d\n") % len(index))
                 root = parse_data(data)
                 all_revs = set(_all_revisions(root))
                 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
                 for r in range(len(index)):
                     if r not in all_revs:
                         msg = b"  revision missing from nodemap: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     else:
                         all_revs.remove(r)
                     nm_rev = _find_node(root, nodemod.hex(index[r][7]))
                     if nm_rev is None:
                         msg = b"  revision node does not match any entries: %d\n" % r
                         ui.write_err(msg)
                         ret = 1
                     elif nm_rev != r:
                         msg = (
                             b"  revision node does not match the expected revision: "
                             b"%d != %d\n" % (r, nm_rev)
                         )
                         ui.write_err(msg)
                         ret = 1
                 if all_revs:
                     for r in sorted(all_revs):
                         msg = b"  extra revision in  nodemap: %d\n" % r
                         ui.write_err(msg)
                     ret = 1
                 return ret
             def _all_revisions(root):
                 """return all revisions stored in a Trie"""
                 for block in _walk_trie(root):
                     for v in block:
                         if v is None or isinstance(v, Block):
                             continue
                         yield v
             def _find_node(block, node):
                 """find the revision associated with a given node"""
                 entry = block.get(_to_int(node[0:1]))
                 if isinstance(entry, dict):
                     return _find_node(entry, node[1:])
                 return entry