upstream/mercurial-mirror Commit - r48043:6be2a7ca

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import contextlib

18

import contextlib

19

import errno

19

import errno

20

import io

20

import io

21

import os

21

import os

22

import struct

22

import struct

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullrev,

29

nullrev,

30

sha1nodeconstants,

30

sha1nodeconstants,

31

short,

31

short,

32

wdirrev,

32

wdirrev,

33

)

33

)

34

from .i18n import _

34

from .i18n import _

35

from .pycompat import getattr

35

from .pycompat import getattr

36

from .revlogutils.constants import (

36

from .revlogutils.constants import (

37

ALL_KINDS,

37

ALL_KINDS,

38

CHANGELOGV2,

38

CHANGELOGV2,

39

COMP_MODE_DEFAULT,

39

COMP_MODE_DEFAULT,

40

COMP_MODE_INLINE,

40

COMP_MODE_INLINE,

41

COMP_MODE_PLAIN,

41

COMP_MODE_PLAIN,

42

FEATURES_BY_VERSION,

42

FEATURES_BY_VERSION,

43

FLAG_GENERALDELTA,

43

FLAG_GENERALDELTA,

44

FLAG_INLINE_DATA,

44

FLAG_INLINE_DATA,

45

INDEX_HEADER,

45

INDEX_HEADER,

46

KIND_CHANGELOG,

46

KIND_CHANGELOG,

47

REVLOGV0,

47

REVLOGV0,

48

REVLOGV1,

48

REVLOGV1,

49

REVLOGV1_FLAGS,

49

REVLOGV1_FLAGS,

50

REVLOGV2,

50

REVLOGV2,

51

REVLOGV2_FLAGS,

51

REVLOGV2_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FORMAT,

53

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_VERSION,

54

REVLOG_DEFAULT_VERSION,

55

SUPPORTED_FLAGS,

55

SUPPORTED_FLAGS,

56

)

56

)

57

from .revlogutils.flagutil import (

57

from .revlogutils.flagutil import (

58

REVIDX_DEFAULT_FLAGS,

58

REVIDX_DEFAULT_FLAGS,

59

REVIDX_ELLIPSIS,

59

REVIDX_ELLIPSIS,

60

REVIDX_EXTSTORED,

60

REVIDX_EXTSTORED,

61

REVIDX_FLAGS_ORDER,

61

REVIDX_FLAGS_ORDER,

62

REVIDX_HASCOPIESINFO,

62

REVIDX_HASCOPIESINFO,

63

REVIDX_ISCENSORED,

63

REVIDX_ISCENSORED,

64

REVIDX_RAWTEXT_CHANGING_FLAGS,

64

REVIDX_RAWTEXT_CHANGING_FLAGS,

65

)

65

)

66

from .thirdparty import attr

66

from .thirdparty import attr

67

from . import (

67

from . import (

68

ancestor,

68

ancestor,

69

dagop,

69

dagop,

70

error,

70

error,

71

mdiff,

71

mdiff,

72

policy,

72

policy,

73

pycompat,

73

pycompat,

74

templatefilters,

74

templatefilters,

75

util,

75

util,

76

)

76

)

77

from .interfaces import (

77

from .interfaces import (

78

repository,

78

repository,

79

util as interfaceutil,

79

util as interfaceutil,

80

)

80

)

81

from .revlogutils import (

81

from .revlogutils import (

82

deltas as deltautil,

82

deltas as deltautil,

83

docket as docketutil,

83

docket as docketutil,

84

flagutil,

84

flagutil,

85

nodemap as nodemaputil,

85

nodemap as nodemaputil,

86

revlogv0,

86

revlogv0,

87

sidedata as sidedatautil,

87

sidedata as sidedatautil,

88

)

88

)

89

from .utils import (

89

from .utils import (

90

storageutil,

90

storageutil,

91

stringutil,

91

stringutil,

92

)

92

)

93

94

# blanked usage of all the name to prevent pyflakes constraints

94

# blanked usage of all the name to prevent pyflakes constraints

95

# We need these name available in the module for extensions.

95

# We need these name available in the module for extensions.

96

97

REVLOGV0

97

REVLOGV0

98

REVLOGV1

98

REVLOGV1

99

REVLOGV2

99

REVLOGV2

100

FLAG_INLINE_DATA

100

FLAG_INLINE_DATA

101

FLAG_GENERALDELTA

101

FLAG_GENERALDELTA

102

REVLOG_DEFAULT_FLAGS

102

REVLOG_DEFAULT_FLAGS

103

REVLOG_DEFAULT_FORMAT

103

REVLOG_DEFAULT_FORMAT

104

REVLOG_DEFAULT_VERSION

104

REVLOG_DEFAULT_VERSION

105

REVLOGV1_FLAGS

105

REVLOGV1_FLAGS

106

REVLOGV2_FLAGS

106

REVLOGV2_FLAGS

107

REVIDX_ISCENSORED

107

REVIDX_ISCENSORED

108

REVIDX_ELLIPSIS

108

REVIDX_ELLIPSIS

109

REVIDX_HASCOPIESINFO

109

REVIDX_HASCOPIESINFO

110

REVIDX_EXTSTORED

110

REVIDX_EXTSTORED

111

REVIDX_DEFAULT_FLAGS

111

REVIDX_DEFAULT_FLAGS

112

REVIDX_FLAGS_ORDER

112

REVIDX_FLAGS_ORDER

113

REVIDX_RAWTEXT_CHANGING_FLAGS

113

REVIDX_RAWTEXT_CHANGING_FLAGS

114

115

parsers = policy.importmod('parsers')

115

parsers = policy.importmod('parsers')

116

rustancestor = policy.importrust('ancestor')

116

rustancestor = policy.importrust('ancestor')

117

rustdagop = policy.importrust('dagop')

117

rustdagop = policy.importrust('dagop')

118

rustrevlog = policy.importrust('revlog')

118

rustrevlog = policy.importrust('revlog')

119

120

# Aliased for performance.

120

# Aliased for performance.

121

_zlibdecompress = zlib.decompress

121

_zlibdecompress = zlib.decompress

122

123

# max size of revlog with inline data

123

# max size of revlog with inline data

124

_maxinline = 131072

124

_maxinline = 131072

125

_chunksize = 1048576

125

_chunksize = 1048576

126

127

# Flag processors for REVIDX_ELLIPSIS.

127

# Flag processors for REVIDX_ELLIPSIS.

128

def ellipsisreadprocessor(rl, text):

128

def ellipsisreadprocessor(rl, text):

129

return text, False

129

return text, False

130

131

132

def ellipsiswriteprocessor(rl, text):

132

def ellipsiswriteprocessor(rl, text):

133

return text, False

133

return text, False

134

135

136

def ellipsisrawprocessor(rl, text):

136

def ellipsisrawprocessor(rl, text):

137

return False

137

return False

138

139

140

ellipsisprocessor = (

140

ellipsisprocessor = (

141

ellipsisreadprocessor,

141

ellipsisreadprocessor,

142

ellipsiswriteprocessor,

142

ellipsiswriteprocessor,

143

ellipsisrawprocessor,

143

ellipsisrawprocessor,

144

)

144

)

145

146

147

def offset_type(offset, type):

147

def offset_type(offset, type):

148

if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:

148

if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:

149

raise ValueError(b'unknown revlog index flags')

149

raise ValueError(b'unknown revlog index flags')

150

return int(int(offset) << 16 | type)

150

return int(int(offset) << 16 | type)

151

152

153

def _verify_revision(rl, skipflags, state, node):

153

def _verify_revision(rl, skipflags, state, node):

154

"""Verify the integrity of the given revlog ``node`` while providing a hook

154

"""Verify the integrity of the given revlog ``node`` while providing a hook

155

point for extensions to influence the operation."""

155

point for extensions to influence the operation."""

156

if skipflags:

156

if skipflags:

157

state[b'skipread'].add(node)

157

state[b'skipread'].add(node)

158

else:

158

else:

159

# Side-effect: read content and verify hash.

159

# Side-effect: read content and verify hash.

160

rl.revision(node)

160

rl.revision(node)

161

162

163

# True if a fast implementation for persistent-nodemap is available

163

# True if a fast implementation for persistent-nodemap is available

164

#

164

#

165

# We also consider we have a "fast" implementation in "pure" python because

165

# We also consider we have a "fast" implementation in "pure" python because

166

# people using pure don't really have performance consideration (and a

166

# people using pure don't really have performance consideration (and a

167

# wheelbarrow of other slowness source)

167

# wheelbarrow of other slowness source)

168

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

168

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

169

parsers, 'BaseIndexObject'

169

parsers, 'BaseIndexObject'

170

)

170

)

171

172

173

@attr.s(slots=True, frozen=True)

173

@attr.s(slots=True, frozen=True)

174

class _revisioninfo(object):

174

class _revisioninfo(object):

175

"""Information about a revision that allows building its fulltext

175

"""Information about a revision that allows building its fulltext

176

node: expected hash of the revision

176

node: expected hash of the revision

177

p1, p2: parent revs of the revision

177

p1, p2: parent revs of the revision

178

btext: built text cache consisting of a one-element list

178

btext: built text cache consisting of a one-element list

179

cachedelta: (baserev, uncompressed_delta) or None

179

cachedelta: (baserev, uncompressed_delta) or None

180

flags: flags associated to the revision storage

180

flags: flags associated to the revision storage

181

182

One of btext[0] or cachedelta must be set.

182

One of btext[0] or cachedelta must be set.

183

"""

183

"""

184

185

node = attr.ib()

185

node = attr.ib()

186

p1 = attr.ib()

186

p1 = attr.ib()

187

p2 = attr.ib()

187

p2 = attr.ib()

188

btext = attr.ib()

188

btext = attr.ib()

189

textlen = attr.ib()

189

textlen = attr.ib()

190

cachedelta = attr.ib()

190

cachedelta = attr.ib()

191

flags = attr.ib()

191

flags = attr.ib()

192

193

194

@interfaceutil.implementer(repository.irevisiondelta)

194

@interfaceutil.implementer(repository.irevisiondelta)

195

@attr.s(slots=True)

195

@attr.s(slots=True)

196

class revlogrevisiondelta(object):

196

class revlogrevisiondelta(object):

197

node = attr.ib()

197

node = attr.ib()

198

p1node = attr.ib()

198

p1node = attr.ib()

199

p2node = attr.ib()

199

p2node = attr.ib()

200

basenode = attr.ib()

200

basenode = attr.ib()

201

flags = attr.ib()

201

flags = attr.ib()

202

baserevisionsize = attr.ib()

202

baserevisionsize = attr.ib()

203

revision = attr.ib()

203

revision = attr.ib()

204

delta = attr.ib()

204

delta = attr.ib()

205

sidedata = attr.ib()

205

sidedata = attr.ib()

206

protocol_flags = attr.ib()

206

protocol_flags = attr.ib()

207

linknode = attr.ib(default=None)

207

linknode = attr.ib(default=None)

208

209

210

@interfaceutil.implementer(repository.iverifyproblem)

210

@interfaceutil.implementer(repository.iverifyproblem)

211

@attr.s(frozen=True)

211

@attr.s(frozen=True)

212

class revlogproblem(object):

212

class revlogproblem(object):

213

warning = attr.ib(default=None)

213

warning = attr.ib(default=None)

214

error = attr.ib(default=None)

214

error = attr.ib(default=None)

215

node = attr.ib(default=None)

215

node = attr.ib(default=None)

216

217

218

def parse_index_v1(data, inline):

218

def parse_index_v1(data, inline):

219

# call the C implementation to parse the index data

219

# call the C implementation to parse the index data

220

index, cache = parsers.parse_index2(data, inline)

220

index, cache = parsers.parse_index2(data, inline)

221

return index, cache

221

return index, cache

222

223

224

def parse_index_v2(data, inline):

224

def parse_index_v2(data, inline):

225

# call the C implementation to parse the index data

225

# call the C implementation to parse the index data

226

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

226

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

227

return index, cache

227

return index, cache

228

229

230

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

230

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

231

232

def parse_index_v1_nodemap(data, inline):

232

def parse_index_v1_nodemap(data, inline):

233

index, cache = parsers.parse_index_devel_nodemap(data, inline)

233

index, cache = parsers.parse_index_devel_nodemap(data, inline)

234

return index, cache

234

return index, cache

235

236

237

else:

237

else:

238

parse_index_v1_nodemap = None

238

parse_index_v1_nodemap = None

239

240

241

def parse_index_v1_mixed(data, inline):

241

def parse_index_v1_mixed(data, inline):

242

index, cache = parse_index_v1(data, inline)

242

index, cache = parse_index_v1(data, inline)

243

return rustrevlog.MixedIndex(index), cache

243

return rustrevlog.MixedIndex(index), cache

244

245

246

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

246

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

247

# signed integer)

247

# signed integer)

248

_maxentrysize = 0x7FFFFFFF

248

_maxentrysize = 0x7FFFFFFF

249

250

251

class revlog(object):

251

class revlog(object):

252

"""

252

"""

253

the underlying revision storage object

253

the underlying revision storage object

254

255

A revlog consists of two parts, an index and the revision data.

255

A revlog consists of two parts, an index and the revision data.

256

257

The index is a file with a fixed record size containing

257

The index is a file with a fixed record size containing

258

information on each revision, including its nodeid (hash), the

258

information on each revision, including its nodeid (hash), the

259

nodeids of its parents, the position and offset of its data within

259

nodeids of its parents, the position and offset of its data within

260

the data file, and the revision it's based on. Finally, each entry

260

the data file, and the revision it's based on. Finally, each entry

261

contains a linkrev entry that can serve as a pointer to external

261

contains a linkrev entry that can serve as a pointer to external

262

data.

262

data.

263

264

The revision data itself is a linear collection of data chunks.

264

The revision data itself is a linear collection of data chunks.

265

Each chunk represents a revision and is usually represented as a

265

Each chunk represents a revision and is usually represented as a

266

delta against the previous chunk. To bound lookup time, runs of

266

delta against the previous chunk. To bound lookup time, runs of

267

deltas are limited to about 2 times the length of the original

267

deltas are limited to about 2 times the length of the original

268

version data. This makes retrieval of a version proportional to

268

version data. This makes retrieval of a version proportional to

269

its size, or O(1) relative to the number of revisions.

269

its size, or O(1) relative to the number of revisions.

270

271

Both pieces of the revlog are written to in an append-only

271

Both pieces of the revlog are written to in an append-only

272

fashion, which means we never need to rewrite a file to insert or

272

fashion, which means we never need to rewrite a file to insert or

273

remove data, and can use some simple techniques to avoid the need

273

remove data, and can use some simple techniques to avoid the need

274

for locking while reading.

274

for locking while reading.

275

276

If checkambig, indexfile is opened with checkambig=True at

276

If checkambig, indexfile is opened with checkambig=True at

277

writing, to avoid file stat ambiguity.

277

writing, to avoid file stat ambiguity.

278

279

If mmaplargeindex is True, and an mmapindexthreshold is set, the

279

If mmaplargeindex is True, and an mmapindexthreshold is set, the

280

index will be mmapped rather than read if it is larger than the

280

index will be mmapped rather than read if it is larger than the

281

configured threshold.

281

configured threshold.

282

283

If censorable is True, the revlog can have censored revisions.

283

If censorable is True, the revlog can have censored revisions.

284

285

If `upperboundcomp` is not None, this is the expected maximal gain from

285

If `upperboundcomp` is not None, this is the expected maximal gain from

286

compression for the data content.

286

compression for the data content.

287

288

`concurrencychecker` is an optional function that receives 3 arguments: a

288

`concurrencychecker` is an optional function that receives 3 arguments: a

289

file handle, a filename, and an expected position. It should check whether

289

file handle, a filename, and an expected position. It should check whether

290

the current position in the file handle is valid, and log/warn/fail (by

290

the current position in the file handle is valid, and log/warn/fail (by

291

raising).

291

raising).

292

293

294

Internal details

294

Internal details

295

----------------

295

----------------

296

297

A large part of the revlog logic deals with revisions' "index entries", tuple

297

A large part of the revlog logic deals with revisions' "index entries", tuple

298

objects that contains the same "items" whatever the revlog version.

298

objects that contains the same "items" whatever the revlog version.

299

Different versions will have different ways of storing these items (sometimes

299

Different versions will have different ways of storing these items (sometimes

300

not having them at all), but the tuple will always be the same. New fields

300

not having them at all), but the tuple will always be the same. New fields

301

are usually added at the end to avoid breaking existing code that relies

301

are usually added at the end to avoid breaking existing code that relies

302

on the existing order. The field are defined as follows:

302

on the existing order. The field are defined as follows:

303

304

[0] offset:

304

[0] offset:

305

The byte index of the start of revision data chunk.

305

The byte index of the start of revision data chunk.

306

That value is shifted up by 16 bits. use "offset = field >> 16" to

306

That value is shifted up by 16 bits. use "offset = field >> 16" to

307

retrieve it.

307

retrieve it.

308

309

flags:

309

flags:

310

A flag field that carries special information or changes the behavior

310

A flag field that carries special information or changes the behavior

311

of the revision. (see `REVIDX_*` constants for details)

311

of the revision. (see `REVIDX_*` constants for details)

312

The flag field only occupies the first 16 bits of this field,

312

The flag field only occupies the first 16 bits of this field,

313

use "flags = field & 0xFFFF" to retrieve the value.

313

use "flags = field & 0xFFFF" to retrieve the value.

314

315

[1] compressed length:

315

[1] compressed length:

316

The size, in bytes, of the chunk on disk

316

The size, in bytes, of the chunk on disk

317

318

[2] uncompressed length:

318

[2] uncompressed length:

319

The size, in bytes, of the full revision once reconstructed.

319

The size, in bytes, of the full revision once reconstructed.

320

321

[3] base rev:

321

[3] base rev:

322

Either the base of the revision delta chain (without general

322

Either the base of the revision delta chain (without general

323

delta), or the base of the delta (stored in the data chunk)

323

delta), or the base of the delta (stored in the data chunk)

324

with general delta.

324

with general delta.

325

326

[4] link rev:

326

[4] link rev:

327

Changelog revision number of the changeset introducing this

327

Changelog revision number of the changeset introducing this

328

revision.

328

revision.

329

330

[5] parent 1 rev:

330

[5] parent 1 rev:

331

Revision number of the first parent

331

Revision number of the first parent

332

333

[6] parent 2 rev:

333

[6] parent 2 rev:

334

Revision number of the second parent

334

Revision number of the second parent

335

336

[7] node id:

336

[7] node id:

337

The node id of the current revision

337

The node id of the current revision

338

339

[8] sidedata offset:

339

[8] sidedata offset:

340

The byte index of the start of the revision's side-data chunk.

340

The byte index of the start of the revision's side-data chunk.

341

342

[9] sidedata chunk length:

342

[9] sidedata chunk length:

343

The size, in bytes, of the revision's side-data chunk.

343

The size, in bytes, of the revision's side-data chunk.

344

345

[10] data compression mode:

345

[10] data compression mode:

346

two bits that detail the way the data chunk is compressed on disk.

346

two bits that detail the way the data chunk is compressed on disk.

347

(see "COMP_MODE_*" constants for details). For revlog version 0 and

347

(see "COMP_MODE_*" constants for details). For revlog version 0 and

348

1 this will always be COMP_MODE_INLINE.

348

1 this will always be COMP_MODE_INLINE.

349

350

[11] side-data compression mode:

350

[11] side-data compression mode:

351

two bits that detail the way the sidedata chunk is compressed on disk.

351

two bits that detail the way the sidedata chunk is compressed on disk.

352

(see "COMP_MODE_*" constants for details)

352

(see "COMP_MODE_*" constants for details)

353

"""

353

"""

354

355

_flagserrorclass = error.RevlogError

355

_flagserrorclass = error.RevlogError

356

357

def __init__(

357

def __init__(

358

self,

358

self,

359

opener,

359

opener,

360

target,

360

target,

361

radix,

361

radix,

362

postfix=None, # only exist for `tmpcensored` now

362

postfix=None, # only exist for `tmpcensored` now

363

checkambig=False,

363

checkambig=False,

364

mmaplargeindex=False,

364

mmaplargeindex=False,

365

censorable=False,

365

censorable=False,

366

upperboundcomp=None,

366

upperboundcomp=None,

367

persistentnodemap=False,

367

persistentnodemap=False,

368

concurrencychecker=None,

368

concurrencychecker=None,

369

trypending=False,

369

trypending=False,

370

):

370

):

371

"""

371

"""

372

create a revlog object

372

create a revlog object

373

374

opener is a function that abstracts the file opening operation

374

opener is a function that abstracts the file opening operation

375

and can be used to implement COW semantics or the like.

375

and can be used to implement COW semantics or the like.

376

377

`target`: a (KIND, ID) tuple that identify the content stored in

377

`target`: a (KIND, ID) tuple that identify the content stored in

378

this revlog. It help the rest of the code to understand what the revlog

378

this revlog. It help the rest of the code to understand what the revlog

379

is about without having to resort to heuristic and index filename

379

is about without having to resort to heuristic and index filename

380

analysis. Note: that this must be reliably be set by normal code, but

380

analysis. Note: that this must be reliably be set by normal code, but

381

that test, debug, or performance measurement code might not set this to

381

that test, debug, or performance measurement code might not set this to

382

accurate value.

382

accurate value.

383

"""

383

"""

384

self.upperboundcomp = upperboundcomp

384

self.upperboundcomp = upperboundcomp

385

386

self.radix = radix

386

self.radix = radix

387

388

self._docket_file = None

388

self._docket_file = None

389

self._indexfile = None

389

self._indexfile = None

390

self._datafile = None

390

self._datafile = None

391

self._nodemap_file = None

391

self._nodemap_file = None

392

self.postfix = postfix

392

self.postfix = postfix

393

self._trypending = trypending

393

self._trypending = trypending

394

self.opener = opener

394

self.opener = opener

395

if persistentnodemap:

395

if persistentnodemap:

396

self._nodemap_file = nodemaputil.get_nodemap_file(self)

396

self._nodemap_file = nodemaputil.get_nodemap_file(self)

397

398

assert target[0] in ALL_KINDS

398

assert target[0] in ALL_KINDS

399

assert len(target) == 2

399

assert len(target) == 2

400

self.target = target

400

self.target = target

401

# When True, indexfile is opened with checkambig=True at writing, to

401

# When True, indexfile is opened with checkambig=True at writing, to

402

# avoid file stat ambiguity.

402

# avoid file stat ambiguity.

403

self._checkambig = checkambig

403

self._checkambig = checkambig

404

self._mmaplargeindex = mmaplargeindex

404

self._mmaplargeindex = mmaplargeindex

405

self._censorable = censorable

405

self._censorable = censorable

406

# 3-tuple of (node, rev, text) for a raw revision.

406

# 3-tuple of (node, rev, text) for a raw revision.

407

self._revisioncache = None

407

self._revisioncache = None

408

# Maps rev to chain base rev.

408

# Maps rev to chain base rev.

409

self._chainbasecache = util.lrucachedict(100)

409

self._chainbasecache = util.lrucachedict(100)

410

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

410

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

411

self._chunkcache = (0, b'')

411

self._chunkcache = (0, b'')

412

# How much data to read and cache into the raw revlog data cache.

412

# How much data to read and cache into the raw revlog data cache.

413

self._chunkcachesize = 65536

413

self._chunkcachesize = 65536

414

self._maxchainlen = None

414

self._maxchainlen = None

415

self._deltabothparents = True

415

self._deltabothparents = True

416

self.index = None

416

self.index = None

417

self._docket = None

417

self._docket = None

418

self._nodemap_docket = None

418

self._nodemap_docket = None

419

# Mapping of partial identifiers to full nodes.

419

# Mapping of partial identifiers to full nodes.

420

self._pcache = {}

420

self._pcache = {}

421

# Mapping of revision integer to full node.

421

# Mapping of revision integer to full node.

422

self._compengine = b'zlib'

422

self._compengine = b'zlib'

423

self._compengineopts = {}

423

self._compengineopts = {}

424

self._maxdeltachainspan = -1

424

self._maxdeltachainspan = -1

425

self._withsparseread = False

425

self._withsparseread = False

426

self._sparserevlog = False

426

self._sparserevlog = False

427

self.hassidedata = False

427

self.hassidedata = False

428

self._srdensitythreshold = 0.50

428

self._srdensitythreshold = 0.50

429

self._srmingapsize = 262144

429

self._srmingapsize = 262144

430

431

# Make copy of flag processors so each revlog instance can support

431

# Make copy of flag processors so each revlog instance can support

432

# custom flags.

432

# custom flags.

433

self._flagprocessors = dict(flagutil.flagprocessors)

433

self._flagprocessors = dict(flagutil.flagprocessors)

434

435

# 2-tuple of file handles being used for active writing.

435

# 2-tuple of file handles being used for active writing.

436

self._writinghandles = None

436

self._writinghandles = None

437

# prevent nesting of addgroup

437

# prevent nesting of addgroup

438

self._adding_group = None

438

self._adding_group = None

439

440

self._loadindex()

440

self._loadindex()

441

442

self._concurrencychecker = concurrencychecker

442

self._concurrencychecker = concurrencychecker

443

444

def _init_opts(self):

444

def _init_opts(self):

445

"""process options (from above/config) to setup associated default revlog mode

445

"""process options (from above/config) to setup associated default revlog mode

446

447

These values might be affected when actually reading on disk information.

447

These values might be affected when actually reading on disk information.

448

449

The relevant values are returned for use in _loadindex().

449

The relevant values are returned for use in _loadindex().

450

451

* newversionflags:

451

* newversionflags:

452

version header to use if we need to create a new revlog

452

version header to use if we need to create a new revlog

453

454

* mmapindexthreshold:

454

* mmapindexthreshold:

455

minimal index size for start to use mmap

455

minimal index size for start to use mmap

456

457

* force_nodemap:

457

* force_nodemap:

458

force the usage of a "development" version of the nodemap code

458

force the usage of a "development" version of the nodemap code

459

"""

459

"""

460

mmapindexthreshold = None

460

mmapindexthreshold = None

461

opts = self.opener.options

461

opts = self.opener.options

462

463

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

463

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

464

new_header = CHANGELOGV2

464

new_header = CHANGELOGV2

465

elif b'revlogv2' in opts:

465

elif b'revlogv2' in opts:

466

new_header = REVLOGV2

466

new_header = REVLOGV2

467

elif b'revlogv1' in opts:

467

elif b'revlogv1' in opts:

468

new_header = REVLOGV1 | FLAG_INLINE_DATA

468

new_header = REVLOGV1 | FLAG_INLINE_DATA

469

if b'generaldelta' in opts:

469

if b'generaldelta' in opts:

470

new_header |= FLAG_GENERALDELTA

470

new_header |= FLAG_GENERALDELTA

471

elif b'revlogv0' in self.opener.options:

471

elif b'revlogv0' in self.opener.options:

472

new_header = REVLOGV0

472

new_header = REVLOGV0

473

else:

473

else:

474

new_header = REVLOG_DEFAULT_VERSION

474

new_header = REVLOG_DEFAULT_VERSION

475

476

if b'chunkcachesize' in opts:

476

if b'chunkcachesize' in opts:

477

self._chunkcachesize = opts[b'chunkcachesize']

477

self._chunkcachesize = opts[b'chunkcachesize']

478

if b'maxchainlen' in opts:

478

if b'maxchainlen' in opts:

479

self._maxchainlen = opts[b'maxchainlen']

479

self._maxchainlen = opts[b'maxchainlen']

480

if b'deltabothparents' in opts:

480

if b'deltabothparents' in opts:

481

self._deltabothparents = opts[b'deltabothparents']

481

self._deltabothparents = opts[b'deltabothparents']

482

self._lazydelta = bool(opts.get(b'lazydelta', True))

482

self._lazydelta = bool(opts.get(b'lazydelta', True))

483

self._lazydeltabase = False

483

self._lazydeltabase = False

484

if self._lazydelta:

484

if self._lazydelta:

485

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

485

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

486

if b'compengine' in opts:

486

if b'compengine' in opts:

487

self._compengine = opts[b'compengine']

487

self._compengine = opts[b'compengine']

488

if b'zlib.level' in opts:

488

if b'zlib.level' in opts:

489

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

489

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

490

if b'zstd.level' in opts:

490

if b'zstd.level' in opts:

491

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

491

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

492

if b'maxdeltachainspan' in opts:

492

if b'maxdeltachainspan' in opts:

493

self._maxdeltachainspan = opts[b'maxdeltachainspan']

493

self._maxdeltachainspan = opts[b'maxdeltachainspan']

494

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

494

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

495

mmapindexthreshold = opts[b'mmapindexthreshold']

495

mmapindexthreshold = opts[b'mmapindexthreshold']

496

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

496

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

497

withsparseread = bool(opts.get(b'with-sparse-read', False))

497

withsparseread = bool(opts.get(b'with-sparse-read', False))

498

# sparse-revlog forces sparse-read

498

# sparse-revlog forces sparse-read

499

self._withsparseread = self._sparserevlog or withsparseread

499

self._withsparseread = self._sparserevlog or withsparseread

500

if b'sparse-read-density-threshold' in opts:

500

if b'sparse-read-density-threshold' in opts:

501

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

501

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

502

if b'sparse-read-min-gap-size' in opts:

502

if b'sparse-read-min-gap-size' in opts:

503

self._srmingapsize = opts[b'sparse-read-min-gap-size']

503

self._srmingapsize = opts[b'sparse-read-min-gap-size']

504

if opts.get(b'enableellipsis'):

504

if opts.get(b'enableellipsis'):

505

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

505

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

506

507

# revlog v0 doesn't have flag processors

507

# revlog v0 doesn't have flag processors

508

for flag, processor in pycompat.iteritems(

508

for flag, processor in pycompat.iteritems(

509

opts.get(b'flagprocessors', {})

509

opts.get(b'flagprocessors', {})

510

):

510

):

511

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

511

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

512

513

if self._chunkcachesize <= 0:

513

if self._chunkcachesize <= 0:

514

raise error.RevlogError(

514

raise error.RevlogError(

515

_(b'revlog chunk cache size %r is not greater than 0')

515

_(b'revlog chunk cache size %r is not greater than 0')

516

% self._chunkcachesize

516

% self._chunkcachesize

517

)

517

)

518

elif self._chunkcachesize & (self._chunkcachesize - 1):

518

elif self._chunkcachesize & (self._chunkcachesize - 1):

519

raise error.RevlogError(

519

raise error.RevlogError(

520

_(b'revlog chunk cache size %r is not a power of 2')

520

_(b'revlog chunk cache size %r is not a power of 2')

521

% self._chunkcachesize

521

% self._chunkcachesize

522

)

522

)

523

force_nodemap = opts.get(b'devel-force-nodemap', False)

523

force_nodemap = opts.get(b'devel-force-nodemap', False)

524

return new_header, mmapindexthreshold, force_nodemap

524

return new_header, mmapindexthreshold, force_nodemap

525

526

def _get_data(self, filepath, mmap_threshold, size=None):

526

def _get_data(self, filepath, mmap_threshold, size=None):

527

"""return a file content with or without mmap

527

"""return a file content with or without mmap

528

529

If the file is missing return the empty string"""

529

If the file is missing return the empty string"""

530

try:

530

try:

531

with self.opener(filepath) as fp:

531

with self.opener(filepath) as fp:

532

if mmap_threshold is not None:

532

if mmap_threshold is not None:

533

file_size = self.opener.fstat(fp).st_size

533

file_size = self.opener.fstat(fp).st_size

534

if file_size >= mmap_threshold:

534

if file_size >= mmap_threshold:

535

if size is not None:

535

if size is not None:

536

# avoid potentiel mmap crash

536

# avoid potentiel mmap crash

537

size = min(file_size, size)

537

size = min(file_size, size)

538

# TODO: should .close() to release resources without

538

# TODO: should .close() to release resources without

539

# relying on Python GC

539

# relying on Python GC

540

if size is None:

540

if size is None:

541

return util.buffer(util.mmapread(fp))

541

return util.buffer(util.mmapread(fp))

542

else:

542

else:

543

return util.buffer(util.mmapread(fp, size))

543

return util.buffer(util.mmapread(fp, size))

544

if size is None:

544

if size is None:

545

return fp.read()

545

return fp.read()

546

else:

546

else:

547

return fp.read(size)

547

return fp.read(size)

548

except IOError as inst:

548

except IOError as inst:

549

if inst.errno != errno.ENOENT:

549

if inst.errno != errno.ENOENT:

550

raise

550

raise

551

return b''

551

return b''

552

553

def _loadindex(self):

553

def _loadindex(self):

554

555

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

555

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

556

557

if self.postfix is not None:

557

if self.postfix is not None:

558

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

558

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

559

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

559

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

560

entry_point = b'%s.i.a' % self.radix

560

entry_point = b'%s.i.a' % self.radix

561

else:

561

else:

562

entry_point = b'%s.i' % self.radix

562

entry_point = b'%s.i' % self.radix

563

564

entry_data = b''

564

entry_data = b''

565

self._initempty = True

565

self._initempty = True

566

entry_data = self._get_data(entry_point, mmapindexthreshold)

566

entry_data = self._get_data(entry_point, mmapindexthreshold)

567

if len(entry_data) > 0:

567

if len(entry_data) > 0:

568

header = INDEX_HEADER.unpack(entry_data[:4])[0]

568

header = INDEX_HEADER.unpack(entry_data[:4])[0]

569

self._initempty = False

569

self._initempty = False

570

else:

570

else:

571

header = new_header

571

header = new_header

572

573

self._format_flags = header & ~0xFFFF

573

self._format_flags = header & ~0xFFFF

574

self._format_version = header & 0xFFFF

574

self._format_version = header & 0xFFFF

575

576

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

576

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

577

if supported_flags is None:

577

if supported_flags is None:

578

msg = _(b'unknown version (%d) in revlog %s')

578

msg = _(b'unknown version (%d) in revlog %s')

579

msg %= (self._format_version, self.display_id)

579

msg %= (self._format_version, self.display_id)

580

raise error.RevlogError(msg)

580

raise error.RevlogError(msg)

581

elif self._format_flags & ~supported_flags:

581

elif self._format_flags & ~supported_flags:

582

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

582

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

583

display_flag = self._format_flags >> 16

583

display_flag = self._format_flags >> 16

584

msg %= (display_flag, self._format_version, self.display_id)

584

msg %= (display_flag, self._format_version, self.display_id)

585

raise error.RevlogError(msg)

585

raise error.RevlogError(msg)

586

587

features = FEATURES_BY_VERSION[self._format_version]

587

features = FEATURES_BY_VERSION[self._format_version]

588

self._inline = features[b'inline'](self._format_flags)

588

self._inline = features[b'inline'](self._format_flags)

589

self._generaldelta = features[b'generaldelta'](self._format_flags)

589

self._generaldelta = features[b'generaldelta'](self._format_flags)

590

self.hassidedata = features[b'sidedata']

590

self.hassidedata = features[b'sidedata']

591

592

if not features[b'docket']:

592

if not features[b'docket']:

593

self._indexfile = entry_point

593

self._indexfile = entry_point

594

index_data = entry_data

594

index_data = entry_data

595

else:

595

else:

596

self._docket_file = entry_point

596

self._docket_file = entry_point

597

if self._initempty:

597

if self._initempty:

598

self._docket = docketutil.default_docket(self, header)

598

self._docket = docketutil.default_docket(self, header)

599

else:

599

else:

600

self._docket = docketutil.parse_docket(

600

self._docket = docketutil.parse_docket(

601

self, entry_data, use_pending=self._trypending

601

self, entry_data, use_pending=self._trypending

602

)

602

)

603

self._indexfile = self._docket.index_filepath()

603

self._indexfile = self._docket.index_filepath()

604

index_data = b''

604

index_data = b''

605

index_size = self._docket.index_end

605

index_size = self._docket.index_end

606

if index_size > 0:

606

if index_size > 0:

607

index_data = self._get_data(

607

index_data = self._get_data(

608

self._indexfile, mmapindexthreshold, size=index_size

608

self._indexfile, mmapindexthreshold, size=index_size

609

)

609

)

610

if len(index_data) < index_size:

610

if len(index_data) < index_size:

611

msg = _(b'too few index data for %s: got %d, expected %d')

611

msg = _(b'too few index data for %s: got %d, expected %d')

612

msg %= (self.display_id, len(index_data), index_size)

612

msg %= (self.display_id, len(index_data), index_size)

613

raise error.RevlogError(msg)

613

raise error.RevlogError(msg)

614

615

self._inline = False

615

self._inline = False

616

# generaldelta implied by version 2 revlogs.

616

# generaldelta implied by version 2 revlogs.

617

self._generaldelta = True

617

self._generaldelta = True

618

# the logic for persistent nodemap will be dealt with within the

618

# the logic for persistent nodemap will be dealt with within the

619

# main docket, so disable it for now.

619

# main docket, so disable it for now.

620

self._nodemap_file = None

620

self._nodemap_file = None

621

622

if self.postfix is None:

622

if self.postfix is None:

623

self._datafile = b'%s.d' % self.radix

623

self._datafile = b'%s.d' % self.radix

624

else:

624

else:

625

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

625

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

626

627

self.nodeconstants = sha1nodeconstants

627

self.nodeconstants = sha1nodeconstants

628

self.nullid = self.nodeconstants.nullid

628

self.nullid = self.nodeconstants.nullid

629

630

# sparse-revlog can't be on without general-delta (issue6056)

630

# sparse-revlog can't be on without general-delta (issue6056)

631

if not self._generaldelta:

631

if not self._generaldelta:

632

self._sparserevlog = False

632

self._sparserevlog = False

633

634

self._storedeltachains = True

634

self._storedeltachains = True

635

636

devel_nodemap = (

636

devel_nodemap = (

637

self._nodemap_file

637

self._nodemap_file

638

and force_nodemap

638

and force_nodemap

639

and parse_index_v1_nodemap is not None

639

and parse_index_v1_nodemap is not None

640

)

640

)

641

642

use_rust_index = False

642

use_rust_index = False

643

if rustrevlog is not None:

643

if rustrevlog is not None:

644

if self._nodemap_file is not None:

644

if self._nodemap_file is not None:

645

use_rust_index = True

645

use_rust_index = True

646

else:

646

else:

647

use_rust_index = self.opener.options.get(b'rust.index')

647

use_rust_index = self.opener.options.get(b'rust.index')

648

649

self._parse_index = parse_index_v1

649

self._parse_index = parse_index_v1

650

if self._format_version == REVLOGV0:

650

if self._format_version == REVLOGV0:

651

self._parse_index = revlogv0.parse_index_v0

651

self._parse_index = revlogv0.parse_index_v0

652

elif self._format_version == REVLOGV2:

652

elif self._format_version == REVLOGV2:

653

self._parse_index = parse_index_v2

653

self._parse_index = parse_index_v2

654

elif self._format_version == CHANGELOGV2:

654

elif self._format_version == CHANGELOGV2:

655

self._parse_index = parse_index_v2

655

self._parse_index = parse_index_v2

656

elif devel_nodemap:

656

elif devel_nodemap:

657

self._parse_index = parse_index_v1_nodemap

657

self._parse_index = parse_index_v1_nodemap

658

elif use_rust_index:

658

elif use_rust_index:

659

self._parse_index = parse_index_v1_mixed

659

self._parse_index = parse_index_v1_mixed

660

try:

660

try:

661

d = self._parse_index(index_data, self._inline)

661

d = self._parse_index(index_data, self._inline)

662

index, _chunkcache = d

662

index, _chunkcache = d

663

use_nodemap = (

663

use_nodemap = (

664

not self._inline

664

not self._inline

665

and self._nodemap_file is not None

665

and self._nodemap_file is not None

666

and util.safehasattr(index, 'update_nodemap_data')

666

and util.safehasattr(index, 'update_nodemap_data')

667

)

667

)

668

if use_nodemap:

668

if use_nodemap:

669

nodemap_data = nodemaputil.persisted_data(self)

669

nodemap_data = nodemaputil.persisted_data(self)

670

if nodemap_data is not None:

670

if nodemap_data is not None:

671

docket = nodemap_data[0]

671

docket = nodemap_data[0]

672

if (

672

if (

673

len(d[0]) > docket.tip_rev

673

len(d[0]) > docket.tip_rev

674

and d[0][docket.tip_rev][7] == docket.tip_node

674

and d[0][docket.tip_rev][7] == docket.tip_node

675

):

675

):

676

# no changelog tampering

676

# no changelog tampering

677

self._nodemap_docket = docket

677

self._nodemap_docket = docket

678

index.update_nodemap_data(*nodemap_data)

678

index.update_nodemap_data(*nodemap_data)

679

except (ValueError, IndexError):

679

except (ValueError, IndexError):

680

raise error.RevlogError(

680

raise error.RevlogError(

681

_(b"index %s is corrupted") % self.display_id

681

_(b"index %s is corrupted") % self.display_id

682

)

682

)

683

self.index, self._chunkcache = d

683

self.index, self._chunkcache = d

684

if not self._chunkcache:

684

if not self._chunkcache:

685

self._chunkclear()

685

self._chunkclear()

686

# revnum -> (chain-length, sum-delta-length)

686

# revnum -> (chain-length, sum-delta-length)

687

self._chaininfocache = util.lrucachedict(500)

687

self._chaininfocache = util.lrucachedict(500)

688

# revlog header -> revlog compressor

688

# revlog header -> revlog compressor

689

self._decompressors = {}

689

self._decompressors = {}

690

691

@util.propertycache

691

@util.propertycache

692

def revlog_kind(self):

692

def revlog_kind(self):

693

return self.target[0]

693

return self.target[0]

694

695

@util.propertycache

695

@util.propertycache

696

def display_id(self):

696

def display_id(self):

697

"""The public facing "ID" of the revlog that we use in message"""

697

"""The public facing "ID" of the revlog that we use in message"""

698

# Maybe we should build a user facing representation of

698

# Maybe we should build a user facing representation of

699

# revlog.target instead of using `self.radix`

699

# revlog.target instead of using `self.radix`

700

return self.radix

700

return self.radix

701

702

def _get_decompressor(self, t):

702

def _get_decompressor(self, t):

703

try:

703

try:

704

compressor = self._decompressors[t]

704

compressor = self._decompressors[t]

705

except KeyError:

705

except KeyError:

706

try:

706

try:

707

engine = util.compengines.forrevlogheader(t)

707

engine = util.compengines.forrevlogheader(t)

708

compressor = engine.revlogcompressor(self._compengineopts)

708

compressor = engine.revlogcompressor(self._compengineopts)

709

self._decompressors[t] = compressor

709

self._decompressors[t] = compressor

710

except KeyError:

710

except KeyError:

711

raise error.RevlogError(

711

raise error.RevlogError(

712

_(b'unknown compression type %s') % binascii.hexlify(t)

712

_(b'unknown compression type %s') % binascii.hexlify(t)

713

)

713

)

714

return compressor

714

return compressor

715

716

@util.propertycache

716

@util.propertycache

717

def _compressor(self):

717

def _compressor(self):

718

engine = util.compengines[self._compengine]

718

engine = util.compengines[self._compengine]

719

return engine.revlogcompressor(self._compengineopts)

719

return engine.revlogcompressor(self._compengineopts)

720

721

@util.propertycache

721

@util.propertycache

722

def _decompressor(self):

722

def _decompressor(self):

723

"""the default decompressor"""

723

"""the default decompressor"""

724

if self._docket is None:

724

if self._docket is None:

725

return None

725

return None

726

t = self._docket.default_compression_header

726

t = self._docket.default_compression_header

727

c = self._get_decompressor(t)

727

c = self._get_decompressor(t)

728

return c.decompress

728

return c.decompress

729

730

def _indexfp(self):

730

def _indexfp(self):

731

"""file object for the revlog's index file"""

731

"""file object for the revlog's index file"""

732

return self.opener(self._indexfile, mode=b"r")

732

return self.opener(self._indexfile, mode=b"r")

733

734

def __index_write_fp(self):

734

def __index_write_fp(self):

735

# You should not use this directly and use `_writing` instead

735

# You should not use this directly and use `_writing` instead

736

try:

736

try:

737

f = self.opener(

737

f = self.opener(

738

self._indexfile, mode=b"r+", checkambig=self._checkambig

738

self._indexfile, mode=b"r+", checkambig=self._checkambig

739

)

739

)

740

if self._docket is None:

740

if self._docket is None:

741

f.seek(0, os.SEEK_END)

741

f.seek(0, os.SEEK_END)

742

else:

742

else:

743

f.seek(self._docket.index_end, os.SEEK_SET)

743

f.seek(self._docket.index_end, os.SEEK_SET)

744

return f

744

return f

745

except IOError as inst:

745

except IOError as inst:

746

if inst.errno != errno.ENOENT:

746

if inst.errno != errno.ENOENT:

747

raise

747

raise

748

return self.opener(

748

return self.opener(

749

self._indexfile, mode=b"w+", checkambig=self._checkambig

749

self._indexfile, mode=b"w+", checkambig=self._checkambig

750

)

750

)

751

752

def __index_new_fp(self):

752

def __index_new_fp(self):

753

# You should not use this unless you are upgrading from inline revlog

753

# You should not use this unless you are upgrading from inline revlog

754

return self.opener(

754

return self.opener(

755

self._indexfile,

755

self._indexfile,

756

mode=b"w",

756

mode=b"w",

757

checkambig=self._checkambig,

757

checkambig=self._checkambig,

758

atomictemp=True,

758

atomictemp=True,

759

)

759

)

760

761

def _datafp(self, mode=b'r'):

761

def _datafp(self, mode=b'r'):

762

"""file object for the revlog's data file"""

762

"""file object for the revlog's data file"""

763

return self.opener(self._datafile, mode=mode)

763

return self.opener(self._datafile, mode=mode)

764

765

@contextlib.contextmanager

765

@contextlib.contextmanager

766

def _datareadfp(self, existingfp=None):

766

def _datareadfp(self, existingfp=None):

767

"""file object suitable to read data"""

767

"""file object suitable to read data"""

768

# Use explicit file handle, if given.

768

# Use explicit file handle, if given.

769

if existingfp is not None:

769

if existingfp is not None:

770

yield existingfp

770

yield existingfp

771

772

# Use a file handle being actively used for writes, if available.

772

# Use a file handle being actively used for writes, if available.

773

# There is some danger to doing this because reads will seek the

773

# There is some danger to doing this because reads will seek the

774

# file. However, _writeentry() performs a SEEK_END before all writes,

774

# file. However, _writeentry() performs a SEEK_END before all writes,

775

# so we should be safe.

775

# so we should be safe.

776

elif self._writinghandles:

776

elif self._writinghandles:

777

if self._inline:

777

if self._inline:

778

yield self._writinghandles[0]

778

yield self._writinghandles[0]

779

else:

779

else:

780

yield self._writinghandles[1]

780

yield self._writinghandles[1]

781

782

# Otherwise open a new file handle.

782

# Otherwise open a new file handle.

783

else:

783

else:

784

if self._inline:

784

if self._inline:

785

func = self._indexfp

785

func = self._indexfp

786

else:

786

else:

787

func = self._datafp

787

func = self._datafp

788

with func() as fp:

788

with func() as fp:

789

yield fp

789

yield fp

790

791

def tiprev(self):

791

def tiprev(self):

792

return len(self.index) - 1

792

return len(self.index) - 1

793

794

def tip(self):

794

def tip(self):

795

return self.node(self.tiprev())

795

return self.node(self.tiprev())

796

797

def __contains__(self, rev):

797

def __contains__(self, rev):

798

return 0 <= rev < len(self)

798

return 0 <= rev < len(self)

799

800

def __len__(self):

800

def __len__(self):

801

return len(self.index)

801

return len(self.index)

802

803

def __iter__(self):

803

def __iter__(self):

804

return iter(pycompat.xrange(len(self)))

804

return iter(pycompat.xrange(len(self)))

805

806

def revs(self, start=0, stop=None):

806

def revs(self, start=0, stop=None):

807

"""iterate over all rev in this revlog (from start to stop)"""

807

"""iterate over all rev in this revlog (from start to stop)"""

808

return storageutil.iterrevs(len(self), start=start, stop=stop)

808

return storageutil.iterrevs(len(self), start=start, stop=stop)

809

810

@property

810

@property

811

def nodemap(self):

811

def nodemap(self):

812

msg = (

812

msg = (

813

b"revlog.nodemap is deprecated, "

813

b"revlog.nodemap is deprecated, "

814

b"use revlog.index.[has_node|rev|get_rev]"

814

b"use revlog.index.[has_node|rev|get_rev]"

815

)

815

)

816

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

816

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

817

return self.index.nodemap

817

return self.index.nodemap

818

819

@property

819

@property

820

def _nodecache(self):

820

def _nodecache(self):

821

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

821

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

822

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

822

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

823

return self.index.nodemap

823

return self.index.nodemap

824

825

def hasnode(self, node):

825

def hasnode(self, node):

826

try:

826

try:

827

self.rev(node)

827

self.rev(node)

828

return True

828

return True

829

except KeyError:

829

except KeyError:

830

return False

830

return False

831

832

def candelta(self, baserev, rev):

832

def candelta(self, baserev, rev):

833

"""whether two revisions (baserev, rev) can be delta-ed or not"""

833

"""whether two revisions (baserev, rev) can be delta-ed or not"""

834

# Disable delta if either rev requires a content-changing flag

834

# Disable delta if either rev requires a content-changing flag

835

# processor (ex. LFS). This is because such flag processor can alter

835

# processor (ex. LFS). This is because such flag processor can alter

836

# the rawtext content that the delta will be based on, and two clients

836

# the rawtext content that the delta will be based on, and two clients

837

# could have a same revlog node with different flags (i.e. different

837

# could have a same revlog node with different flags (i.e. different

838

# rawtext contents) and the delta could be incompatible.

838

# rawtext contents) and the delta could be incompatible.

839

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

839

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

840

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

840

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

841

):

841

):

842

return False

842

return False

843

return True

843

return True

844

845

def update_caches(self, transaction):

845

def update_caches(self, transaction):

846

if self._nodemap_file is not None:

846

if self._nodemap_file is not None:

847

if transaction is None:

847

if transaction is None:

848

nodemaputil.update_persistent_nodemap(self)

848

nodemaputil.update_persistent_nodemap(self)

849

else:

849

else:

850

nodemaputil.setup_persistent_nodemap(transaction, self)

850

nodemaputil.setup_persistent_nodemap(transaction, self)

851

852

def clearcaches(self):

852

def clearcaches(self):

853

self._revisioncache = None

853

self._revisioncache = None

854

self._chainbasecache.clear()

854

self._chainbasecache.clear()

855

self._chunkcache = (0, b'')

855

self._chunkcache = (0, b'')

856

self._pcache = {}

856

self._pcache = {}

857

self._nodemap_docket = None

857

self._nodemap_docket = None

858

self.index.clearcaches()

858

self.index.clearcaches()

859

# The python code is the one responsible for validating the docket, we

859

# The python code is the one responsible for validating the docket, we

860

# end up having to refresh it here.

860

# end up having to refresh it here.

861

use_nodemap = (

861

use_nodemap = (

862

not self._inline

862

not self._inline

863

and self._nodemap_file is not None

863

and self._nodemap_file is not None

864

and util.safehasattr(self.index, 'update_nodemap_data')

864

and util.safehasattr(self.index, 'update_nodemap_data')

865

)

865

)

866

if use_nodemap:

866

if use_nodemap:

867

nodemap_data = nodemaputil.persisted_data(self)

867

nodemap_data = nodemaputil.persisted_data(self)

868

if nodemap_data is not None:

868

if nodemap_data is not None:

869

self._nodemap_docket = nodemap_data[0]

869

self._nodemap_docket = nodemap_data[0]

870

self.index.update_nodemap_data(*nodemap_data)

870

self.index.update_nodemap_data(*nodemap_data)

871

872

def rev(self, node):

872

def rev(self, node):

873

try:

873

try:

874

return self.index.rev(node)

874

return self.index.rev(node)

875

except TypeError:

875

except TypeError:

876

raise

876

raise

877

except error.RevlogError:

877

except error.RevlogError:

878

# parsers.c radix tree lookup failed

878

# parsers.c radix tree lookup failed

879

if (

879

if (

880

node == self.nodeconstants.wdirid

880

node == self.nodeconstants.wdirid

881

or node in self.nodeconstants.wdirfilenodeids

881

or node in self.nodeconstants.wdirfilenodeids

882

):

882

):

883

raise error.WdirUnsupported

883

raise error.WdirUnsupported

884

raise error.LookupError(node, self.display_id, _(b'no node'))

884

raise error.LookupError(node, self.display_id, _(b'no node'))

885

886

# Accessors for index entries.

886

# Accessors for index entries.

887

888

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

888

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

889

# are flags.

889

# are flags.

890

def start(self, rev):

890

def start(self, rev):

891

return int(self.index[rev][0] >> 16)

891

return int(self.index[rev][0] >> 16)

892

893

def flags(self, rev):

893

def flags(self, rev):

894

return self.index[rev][0] & 0xFFFF

894

return self.index[rev][0] & 0xFFFF

895

896

def length(self, rev):

896

def length(self, rev):

897

return self.index[rev][1]

897

return self.index[rev][1]

898

899

def sidedata_length(self, rev):

899

def sidedata_length(self, rev):

900

if not self.hassidedata:

900

if not self.hassidedata:

901

return 0

901

return 0

902

return self.index[rev][9]

902

return self.index[rev][9]

903

904

def rawsize(self, rev):

904

def rawsize(self, rev):

905

"""return the length of the uncompressed text for a given revision"""

905

"""return the length of the uncompressed text for a given revision"""

906

l = self.index[rev][2]

906

l = self.index[rev][2]

907

if l >= 0:

907

if l >= 0:

908

return l

908

return l

909

910

t = self.rawdata(rev)

910

t = self.rawdata(rev)

911

return len(t)

911

return len(t)

912

913

def size(self, rev):

913

def size(self, rev):

914

"""length of non-raw text (processed by a "read" flag processor)"""

914

"""length of non-raw text (processed by a "read" flag processor)"""

915

# fast path: if no "read" flag processor could change the content,

915

# fast path: if no "read" flag processor could change the content,

916

# size is rawsize. note: ELLIPSIS is known to not change the content.

916

# size is rawsize. note: ELLIPSIS is known to not change the content.

917

flags = self.flags(rev)

917

flags = self.flags(rev)

918

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

918

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

919

return self.rawsize(rev)

919

return self.rawsize(rev)

920

921

return len(self.revision(rev, raw=False))

921

return len(self.revision(rev, raw=False))

922

923

def chainbase(self, rev):

923

def chainbase(self, rev):

924

base = self._chainbasecache.get(rev)

924

base = self._chainbasecache.get(rev)

925

if base is not None:

925

if base is not None:

926

return base

926

return base

927

928

index = self.index

928

index = self.index

929

iterrev = rev

929

iterrev = rev

930

base = index[iterrev][3]

930

base = index[iterrev][3]

931

while base != iterrev:

931

while base != iterrev:

932

iterrev = base

932

iterrev = base

933

base = index[iterrev][3]

933

base = index[iterrev][3]

934

935

self._chainbasecache[rev] = base

935

self._chainbasecache[rev] = base

936

return base

936

return base

937

938

def linkrev(self, rev):

938

def linkrev(self, rev):

939

return self.index[rev][4]

939

return self.index[rev][4]

940

941

def parentrevs(self, rev):

941

def parentrevs(self, rev):

942

try:

942

try:

943

entry = self.index[rev]

943

entry = self.index[rev]

944

except IndexError:

944

except IndexError:

945

if rev == wdirrev:

945

if rev == wdirrev:

946

raise error.WdirUnsupported

946

raise error.WdirUnsupported

947

raise

947

raise

948

if entry[5] == nullrev:

948

if entry[5] == nullrev:

949

return entry[6], entry[5]

949

return entry[6], entry[5]

950

else:

950

else:

951

return entry[5], entry[6]

951

return entry[5], entry[6]

952

953

# fast parentrevs(rev) where rev isn't filtered

953

# fast parentrevs(rev) where rev isn't filtered

954

_uncheckedparentrevs = parentrevs

954

_uncheckedparentrevs = parentrevs

955

956

def node(self, rev):

956

def node(self, rev):

957

try:

957

try:

958

return self.index[rev][7]

958

return self.index[rev][7]

959

except IndexError:

959

except IndexError:

960

if rev == wdirrev:

960

if rev == wdirrev:

961

raise error.WdirUnsupported

961

raise error.WdirUnsupported

962

raise

962

raise

963

964

# Derived from index values.

964

# Derived from index values.

965

966

def end(self, rev):

966

def end(self, rev):

967

return self.start(rev) + self.length(rev)

967

return self.start(rev) + self.length(rev)

968

969

def parents(self, node):

969

def parents(self, node):

970

i = self.index

970

i = self.index

971

d = i[self.rev(node)]

971

d = i[self.rev(node)]

972

# inline node() to avoid function call overhead

972

# inline node() to avoid function call overhead

973

if d[5] == self.nullid:

973

if d[5] == self.nullid:

974

return i[d[6]][7], i[d[5]][7]

974

return i[d[6]][7], i[d[5]][7]

975

else:

975

else:

976

return i[d[5]][7], i[d[6]][7]

976

return i[d[5]][7], i[d[6]][7]

977

978

def chainlen(self, rev):

978

def chainlen(self, rev):

979

return self._chaininfo(rev)[0]

979

return self._chaininfo(rev)[0]

980

981

def _chaininfo(self, rev):

981

def _chaininfo(self, rev):

982

chaininfocache = self._chaininfocache

982

chaininfocache = self._chaininfocache

983

if rev in chaininfocache:

983

if rev in chaininfocache:

984

return chaininfocache[rev]

984

return chaininfocache[rev]

985

index = self.index

985

index = self.index

986

generaldelta = self._generaldelta

986

generaldelta = self._generaldelta

987

iterrev = rev

987

iterrev = rev

988

e = index[iterrev]

988

e = index[iterrev]

989

clen = 0

989

clen = 0

990

compresseddeltalen = 0

990

compresseddeltalen = 0

991

while iterrev != e[3]:

991

while iterrev != e[3]:

992

clen += 1

992

clen += 1

993

compresseddeltalen += e[1]

993

compresseddeltalen += e[1]

994

if generaldelta:

994

if generaldelta:

995

iterrev = e[3]

995

iterrev = e[3]

996

else:

996

else:

997

iterrev -= 1

997

iterrev -= 1

998

if iterrev in chaininfocache:

998

if iterrev in chaininfocache:

999

t = chaininfocache[iterrev]

999

t = chaininfocache[iterrev]

1000

clen += t[0]

1000

clen += t[0]

1001

compresseddeltalen += t[1]

1001

compresseddeltalen += t[1]

1002

break

1002

break

1003

e = index[iterrev]

1003

e = index[iterrev]

1004

else:

1004

else:

1005

# Add text length of base since decompressing that also takes

1005

# Add text length of base since decompressing that also takes

1006

# work. For cache hits the length is already included.

1006

# work. For cache hits the length is already included.

1007

compresseddeltalen += e[1]

1007

compresseddeltalen += e[1]

1008

r = (clen, compresseddeltalen)

1008

r = (clen, compresseddeltalen)

1009

chaininfocache[rev] = r

1009

chaininfocache[rev] = r

1010

return r

1010

return r

1011

1012

def _deltachain(self, rev, stoprev=None):

1012

def _deltachain(self, rev, stoprev=None):

1013

"""Obtain the delta chain for a revision.

1013

"""Obtain the delta chain for a revision.

1014

1015

``stoprev`` specifies a revision to stop at. If not specified, we

1015

``stoprev`` specifies a revision to stop at. If not specified, we

1016

stop at the base of the chain.

1016

stop at the base of the chain.

1017

1018

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1018

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1019

revs in ascending order and ``stopped`` is a bool indicating whether

1019

revs in ascending order and ``stopped`` is a bool indicating whether

1020

``stoprev`` was hit.

1020

``stoprev`` was hit.

1021

"""

1021

"""

1022

# Try C implementation.

1022

# Try C implementation.

1023

try:

1023

try:

1024

return self.index.deltachain(rev, stoprev, self._generaldelta)

1024

return self.index.deltachain(rev, stoprev, self._generaldelta)

1025

except AttributeError:

1025

except AttributeError:

1026

pass

1026

pass

1027

1028

chain = []

1028

chain = []

1029

1030

# Alias to prevent attribute lookup in tight loop.

1030

# Alias to prevent attribute lookup in tight loop.

1031

index = self.index

1031

index = self.index

1032

generaldelta = self._generaldelta

1032

generaldelta = self._generaldelta

1033

1034

iterrev = rev

1034

iterrev = rev

1035

e = index[iterrev]

1035

e = index[iterrev]

1036

while iterrev != e[3] and iterrev != stoprev:

1036

while iterrev != e[3] and iterrev != stoprev:

1037

chain.append(iterrev)

1037

chain.append(iterrev)

1038

if generaldelta:

1038

if generaldelta:

1039

iterrev = e[3]

1039

iterrev = e[3]

1040

else:

1040

else:

1041

iterrev -= 1

1041

iterrev -= 1

1042

e = index[iterrev]

1042

e = index[iterrev]

1043

1044

if iterrev == stoprev:

1044

if iterrev == stoprev:

1045

stopped = True

1045

stopped = True

1046

else:

1046

else:

1047

chain.append(iterrev)

1047

chain.append(iterrev)

1048

stopped = False

1048

stopped = False

1049

1050

chain.reverse()

1050

chain.reverse()

1051

return chain, stopped

1051

return chain, stopped

1052

1053

def ancestors(self, revs, stoprev=0, inclusive=False):

1053

def ancestors(self, revs, stoprev=0, inclusive=False):

1054

"""Generate the ancestors of 'revs' in reverse revision order.

1054

"""Generate the ancestors of 'revs' in reverse revision order.

1055

Does not generate revs lower than stoprev.

1055

Does not generate revs lower than stoprev.

1056

1057

See the documentation for ancestor.lazyancestors for more details."""

1057

See the documentation for ancestor.lazyancestors for more details."""

1058

1059

# first, make sure start revisions aren't filtered

1059

# first, make sure start revisions aren't filtered

1060

revs = list(revs)

1060

revs = list(revs)

1061

checkrev = self.node

1061

checkrev = self.node

1062

for r in revs:

1062

for r in revs:

1063

checkrev(r)

1063

checkrev(r)

1064

# and we're sure ancestors aren't filtered as well

1064

# and we're sure ancestors aren't filtered as well

1065

1066

if rustancestor is not None:

1066

if rustancestor is not None and self.index.rust_ext_compat:

1067

lazyancestors = rustancestor.LazyAncestors

1067

lazyancestors = rustancestor.LazyAncestors

1068

arg = self.index

1068

arg = self.index

1069

else:

1069

else:

1070

lazyancestors = ancestor.lazyancestors

1070

lazyancestors = ancestor.lazyancestors

1071

arg = self._uncheckedparentrevs

1071

arg = self._uncheckedparentrevs

1072

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1072

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1073

1074

def descendants(self, revs):

1074

def descendants(self, revs):

1075

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1075

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1076

1077

def findcommonmissing(self, common=None, heads=None):

1077

def findcommonmissing(self, common=None, heads=None):

1078

"""Return a tuple of the ancestors of common and the ancestors of heads

1078

"""Return a tuple of the ancestors of common and the ancestors of heads

1079

that are not ancestors of common. In revset terminology, we return the

1079

that are not ancestors of common. In revset terminology, we return the

1080

tuple:

1080

tuple:

1081

1082

::common, (::heads) - (::common)

1082

::common, (::heads) - (::common)

1083

1084

The list is sorted by revision number, meaning it is

1084

The list is sorted by revision number, meaning it is

1085

topologically sorted.

1085

topologically sorted.

1086

1087

'heads' and 'common' are both lists of node IDs. If heads is

1087

'heads' and 'common' are both lists of node IDs. If heads is

1088

not supplied, uses all of the revlog's heads. If common is not

1088

not supplied, uses all of the revlog's heads. If common is not

1089

supplied, uses nullid."""

1089

supplied, uses nullid."""

1090

if common is None:

1090

if common is None:

1091

common = [self.nullid]

1091

common = [self.nullid]

1092

if heads is None:

1092

if heads is None:

1093

heads = self.heads()

1093

heads = self.heads()

1094

1095

common = [self.rev(n) for n in common]

1095

common = [self.rev(n) for n in common]

1096

heads = [self.rev(n) for n in heads]

1096

heads = [self.rev(n) for n in heads]

1097

1098

# we want the ancestors, but inclusive

1098

# we want the ancestors, but inclusive

1099

class lazyset(object):

1099

class lazyset(object):

1100

def __init__(self, lazyvalues):

1100

def __init__(self, lazyvalues):

1101

self.addedvalues = set()

1101

self.addedvalues = set()

1102

self.lazyvalues = lazyvalues

1102

self.lazyvalues = lazyvalues

1103

1104

def __contains__(self, value):

1104

def __contains__(self, value):

1105

return value in self.addedvalues or value in self.lazyvalues

1105

return value in self.addedvalues or value in self.lazyvalues

1106

1107

def __iter__(self):

1107

def __iter__(self):

1108

added = self.addedvalues

1108

added = self.addedvalues

1109

for r in added:

1109

for r in added:

1110

yield r

1110

yield r

1111

for r in self.lazyvalues:

1111

for r in self.lazyvalues:

1112

if not r in added:

1112

if not r in added:

1113

yield r

1113

yield r

1114

1115

def add(self, value):

1115

def add(self, value):

1116

self.addedvalues.add(value)

1116

self.addedvalues.add(value)

1117

1118

def update(self, values):

1118

def update(self, values):

1119

self.addedvalues.update(values)

1119

self.addedvalues.update(values)

1120

1121

has = lazyset(self.ancestors(common))

1121

has = lazyset(self.ancestors(common))

1122

has.add(nullrev)

1122

has.add(nullrev)

1123

has.update(common)

1123

has.update(common)

1124

1125

# take all ancestors from heads that aren't in has

1125

# take all ancestors from heads that aren't in has

1126

missing = set()

1126

missing = set()

1127

visit = collections.deque(r for r in heads if r not in has)

1127

visit = collections.deque(r for r in heads if r not in has)

1128

while visit:

1128

while visit:

1129

r = visit.popleft()

1129

r = visit.popleft()

1130

if r in missing:

1130

if r in missing:

1131

continue

1131

continue

1132

else:

1132

else:

1133

missing.add(r)

1133

missing.add(r)

1134

for p in self.parentrevs(r):

1134

for p in self.parentrevs(r):

1135

if p not in has:

1135

if p not in has:

1136

visit.append(p)

1136

visit.append(p)

1137

missing = list(missing)

1137

missing = list(missing)

1138

missing.sort()

1138

missing.sort()

1139

return has, [self.node(miss) for miss in missing]

1139

return has, [self.node(miss) for miss in missing]

1140

1141

def incrementalmissingrevs(self, common=None):

1141

def incrementalmissingrevs(self, common=None):

1142

"""Return an object that can be used to incrementally compute the

1142

"""Return an object that can be used to incrementally compute the

1143

revision numbers of the ancestors of arbitrary sets that are not

1143

revision numbers of the ancestors of arbitrary sets that are not

1144

ancestors of common. This is an ancestor.incrementalmissingancestors

1144

ancestors of common. This is an ancestor.incrementalmissingancestors

1145

object.

1145

object.

1146

1147

'common' is a list of revision numbers. If common is not supplied, uses

1147

'common' is a list of revision numbers. If common is not supplied, uses

1148

nullrev.

1148

nullrev.

1149

"""

1149

"""

1150

if common is None:

1150

if common is None:

1151

common = [nullrev]

1151

common = [nullrev]

1152

1153

if rustancestor is not None:

1153

if rustancestor is not None and self.index.rust_ext_compat:

1154

return rustancestor.MissingAncestors(self.index, common)

1154

return rustancestor.MissingAncestors(self.index, common)

1155

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1155

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1156

1157

def findmissingrevs(self, common=None, heads=None):

1157

def findmissingrevs(self, common=None, heads=None):

1158

"""Return the revision numbers of the ancestors of heads that

1158

"""Return the revision numbers of the ancestors of heads that

1159

are not ancestors of common.

1159

are not ancestors of common.

1160

1161

More specifically, return a list of revision numbers corresponding to

1161

More specifically, return a list of revision numbers corresponding to

1162

nodes N such that every N satisfies the following constraints:

1162

nodes N such that every N satisfies the following constraints:

1163

1164

1. N is an ancestor of some node in 'heads'

1164

1. N is an ancestor of some node in 'heads'

1165

2. N is not an ancestor of any node in 'common'

1165

2. N is not an ancestor of any node in 'common'

1166

1167

The list is sorted by revision number, meaning it is

1167

The list is sorted by revision number, meaning it is

1168

topologically sorted.

1168

topologically sorted.

1169

1170

'heads' and 'common' are both lists of revision numbers. If heads is

1170

'heads' and 'common' are both lists of revision numbers. If heads is

1171

not supplied, uses all of the revlog's heads. If common is not

1171

not supplied, uses all of the revlog's heads. If common is not

1172

supplied, uses nullid."""

1172

supplied, uses nullid."""

1173

if common is None:

1173

if common is None:

1174

common = [nullrev]

1174

common = [nullrev]

1175

if heads is None:

1175

if heads is None:

1176

heads = self.headrevs()

1176

heads = self.headrevs()

1177

1178

inc = self.incrementalmissingrevs(common=common)

1178

inc = self.incrementalmissingrevs(common=common)

1179

return inc.missingancestors(heads)

1179

return inc.missingancestors(heads)

1180

1181

def findmissing(self, common=None, heads=None):

1181

def findmissing(self, common=None, heads=None):

1182

"""Return the ancestors of heads that are not ancestors of common.

1182

"""Return the ancestors of heads that are not ancestors of common.

1183

1184

More specifically, return a list of nodes N such that every N

1184

More specifically, return a list of nodes N such that every N

1185

satisfies the following constraints:

1185

satisfies the following constraints:

1186

1187

1. N is an ancestor of some node in 'heads'

1187

1. N is an ancestor of some node in 'heads'

1188

2. N is not an ancestor of any node in 'common'

1188

2. N is not an ancestor of any node in 'common'

1189

1190

The list is sorted by revision number, meaning it is

1190

The list is sorted by revision number, meaning it is

1191

topologically sorted.

1191

topologically sorted.

1192

1193

'heads' and 'common' are both lists of node IDs. If heads is

1193

'heads' and 'common' are both lists of node IDs. If heads is

1194

not supplied, uses all of the revlog's heads. If common is not

1194

not supplied, uses all of the revlog's heads. If common is not

1195

supplied, uses nullid."""

1195

supplied, uses nullid."""

1196

if common is None:

1196

if common is None:

1197

common = [self.nullid]

1197

common = [self.nullid]

1198

if heads is None:

1198

if heads is None:

1199

heads = self.heads()

1199

heads = self.heads()

1200

1201

common = [self.rev(n) for n in common]

1201

common = [self.rev(n) for n in common]

1202

heads = [self.rev(n) for n in heads]

1202

heads = [self.rev(n) for n in heads]

1203

1204

inc = self.incrementalmissingrevs(common=common)

1204

inc = self.incrementalmissingrevs(common=common)

1205

return [self.node(r) for r in inc.missingancestors(heads)]

1205

return [self.node(r) for r in inc.missingancestors(heads)]

1206

1207

def nodesbetween(self, roots=None, heads=None):

1207

def nodesbetween(self, roots=None, heads=None):

1208

"""Return a topological path from 'roots' to 'heads'.

1208

"""Return a topological path from 'roots' to 'heads'.

1209

1210

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1210

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1211

topologically sorted list of all nodes N that satisfy both of

1211

topologically sorted list of all nodes N that satisfy both of

1212

these constraints:

1212

these constraints:

1213

1214

1. N is a descendant of some node in 'roots'

1214

1. N is a descendant of some node in 'roots'

1215

2. N is an ancestor of some node in 'heads'

1215

2. N is an ancestor of some node in 'heads'

1216

1217

Every node is considered to be both a descendant and an ancestor

1217

Every node is considered to be both a descendant and an ancestor

1218

of itself, so every reachable node in 'roots' and 'heads' will be

1218

of itself, so every reachable node in 'roots' and 'heads' will be

1219

included in 'nodes'.

1219

included in 'nodes'.

1220

1221

'outroots' is the list of reachable nodes in 'roots', i.e., the

1221

'outroots' is the list of reachable nodes in 'roots', i.e., the

1222

subset of 'roots' that is returned in 'nodes'. Likewise,

1222

subset of 'roots' that is returned in 'nodes'. Likewise,

1223

'outheads' is the subset of 'heads' that is also in 'nodes'.

1223

'outheads' is the subset of 'heads' that is also in 'nodes'.

1224

1225

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1225

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1226

unspecified, uses nullid as the only root. If 'heads' is

1226

unspecified, uses nullid as the only root. If 'heads' is

1227

unspecified, uses list of all of the revlog's heads."""

1227

unspecified, uses list of all of the revlog's heads."""

1228

nonodes = ([], [], [])

1228

nonodes = ([], [], [])

1229

if roots is not None:

1229

if roots is not None:

1230

roots = list(roots)

1230

roots = list(roots)

1231

if not roots:

1231

if not roots:

1232

return nonodes

1232

return nonodes

1233

lowestrev = min([self.rev(n) for n in roots])

1233

lowestrev = min([self.rev(n) for n in roots])

1234

else:

1234

else:

1235

roots = [self.nullid] # Everybody's a descendant of nullid

1235

roots = [self.nullid] # Everybody's a descendant of nullid

1236

lowestrev = nullrev

1236

lowestrev = nullrev

1237

if (lowestrev == nullrev) and (heads is None):

1237

if (lowestrev == nullrev) and (heads is None):

1238

# We want _all_ the nodes!

1238

# We want _all_ the nodes!

1239

return (

1239

return (

1240

[self.node(r) for r in self],

1240

[self.node(r) for r in self],

1241

[self.nullid],

1241

[self.nullid],

1242

list(self.heads()),

1242

list(self.heads()),

1243

)

1243

)

1244

if heads is None:

1244

if heads is None:

1245

# All nodes are ancestors, so the latest ancestor is the last

1245

# All nodes are ancestors, so the latest ancestor is the last

1246

# node.

1246

# node.

1247

highestrev = len(self) - 1

1247

highestrev = len(self) - 1

1248

# Set ancestors to None to signal that every node is an ancestor.

1248

# Set ancestors to None to signal that every node is an ancestor.

1249

ancestors = None

1249

ancestors = None

1250

# Set heads to an empty dictionary for later discovery of heads

1250

# Set heads to an empty dictionary for later discovery of heads

1251

heads = {}

1251

heads = {}

1252

else:

1252

else:

1253

heads = list(heads)

1253

heads = list(heads)

1254

if not heads:

1254

if not heads:

1255

return nonodes

1255

return nonodes

1256

ancestors = set()

1256

ancestors = set()

1257

# Turn heads into a dictionary so we can remove 'fake' heads.

1257

# Turn heads into a dictionary so we can remove 'fake' heads.

1258

# Also, later we will be using it to filter out the heads we can't

1258

# Also, later we will be using it to filter out the heads we can't

1259

# find from roots.

1259

# find from roots.

1260

heads = dict.fromkeys(heads, False)

1260

heads = dict.fromkeys(heads, False)

1261

# Start at the top and keep marking parents until we're done.

1261

# Start at the top and keep marking parents until we're done.

1262

nodestotag = set(heads)

1262

nodestotag = set(heads)

1263

# Remember where the top was so we can use it as a limit later.

1263

# Remember where the top was so we can use it as a limit later.

1264

highestrev = max([self.rev(n) for n in nodestotag])

1264

highestrev = max([self.rev(n) for n in nodestotag])

1265

while nodestotag:

1265

while nodestotag:

1266

# grab a node to tag

1266

# grab a node to tag

1267

n = nodestotag.pop()

1267

n = nodestotag.pop()

1268

# Never tag nullid

1268

# Never tag nullid

1269

if n == self.nullid:

1269

if n == self.nullid:

1270

continue

1270

continue

1271

# A node's revision number represents its place in a

1271

# A node's revision number represents its place in a

1272

# topologically sorted list of nodes.

1272

# topologically sorted list of nodes.

1273

r = self.rev(n)

1273

r = self.rev(n)

1274

if r >= lowestrev:

1274

if r >= lowestrev:

1275

if n not in ancestors:

1275

if n not in ancestors:

1276

# If we are possibly a descendant of one of the roots

1276

# If we are possibly a descendant of one of the roots

1277

# and we haven't already been marked as an ancestor

1277

# and we haven't already been marked as an ancestor

1278

ancestors.add(n) # Mark as ancestor

1278

ancestors.add(n) # Mark as ancestor

1279

# Add non-nullid parents to list of nodes to tag.

1279

# Add non-nullid parents to list of nodes to tag.

1280

nodestotag.update(

1280

nodestotag.update(

1281

[p for p in self.parents(n) if p != self.nullid]

1281

[p for p in self.parents(n) if p != self.nullid]

1282

)

1282

)

1283

elif n in heads: # We've seen it before, is it a fake head?

1283

elif n in heads: # We've seen it before, is it a fake head?

1284

# So it is, real heads should not be the ancestors of

1284

# So it is, real heads should not be the ancestors of

1285

# any other heads.

1285

# any other heads.

1286

heads.pop(n)

1286

heads.pop(n)

1287

if not ancestors:

1287

if not ancestors:

1288

return nonodes

1288

return nonodes

1289

# Now that we have our set of ancestors, we want to remove any

1289

# Now that we have our set of ancestors, we want to remove any

1290

# roots that are not ancestors.

1290

# roots that are not ancestors.

1291

1292

# If one of the roots was nullid, everything is included anyway.

1292

# If one of the roots was nullid, everything is included anyway.

1293

if lowestrev > nullrev:

1293

if lowestrev > nullrev:

1294

# But, since we weren't, let's recompute the lowest rev to not

1294

# But, since we weren't, let's recompute the lowest rev to not

1295

# include roots that aren't ancestors.

1295

# include roots that aren't ancestors.

1296

1297

# Filter out roots that aren't ancestors of heads

1297

# Filter out roots that aren't ancestors of heads

1298

roots = [root for root in roots if root in ancestors]

1298

roots = [root for root in roots if root in ancestors]

1299

# Recompute the lowest revision

1299

# Recompute the lowest revision

1300

if roots:

1300

if roots:

1301

lowestrev = min([self.rev(root) for root in roots])

1301

lowestrev = min([self.rev(root) for root in roots])

1302

else:

1302

else:

1303

# No more roots? Return empty list

1303

# No more roots? Return empty list

1304

return nonodes

1304

return nonodes

1305

else:

1305

else:

1306

# We are descending from nullid, and don't need to care about

1306

# We are descending from nullid, and don't need to care about

1307

# any other roots.

1307

# any other roots.

1308

lowestrev = nullrev

1308

lowestrev = nullrev

1309

roots = [self.nullid]

1309

roots = [self.nullid]

1310

# Transform our roots list into a set.

1310

# Transform our roots list into a set.

1311

descendants = set(roots)

1311

descendants = set(roots)

1312

# Also, keep the original roots so we can filter out roots that aren't

1312

# Also, keep the original roots so we can filter out roots that aren't

1313

# 'real' roots (i.e. are descended from other roots).

1313

# 'real' roots (i.e. are descended from other roots).

1314

roots = descendants.copy()

1314

roots = descendants.copy()

1315

# Our topologically sorted list of output nodes.

1315

# Our topologically sorted list of output nodes.

1316

orderedout = []

1316

orderedout = []

1317

# Don't start at nullid since we don't want nullid in our output list,

1317

# Don't start at nullid since we don't want nullid in our output list,

1318

# and if nullid shows up in descendants, empty parents will look like

1318

# and if nullid shows up in descendants, empty parents will look like

1319

# they're descendants.

1319

# they're descendants.

1320

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1320

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1321

n = self.node(r)

1321

n = self.node(r)

1322

isdescendant = False

1322

isdescendant = False

1323

if lowestrev == nullrev: # Everybody is a descendant of nullid

1323

if lowestrev == nullrev: # Everybody is a descendant of nullid

1324

isdescendant = True

1324

isdescendant = True

1325

elif n in descendants:

1325

elif n in descendants:

1326

# n is already a descendant

1326

# n is already a descendant

1327

isdescendant = True

1327

isdescendant = True

1328

# This check only needs to be done here because all the roots

1328

# This check only needs to be done here because all the roots

1329

# will start being marked is descendants before the loop.

1329

# will start being marked is descendants before the loop.

1330

if n in roots:

1330

if n in roots:

1331

# If n was a root, check if it's a 'real' root.

1331

# If n was a root, check if it's a 'real' root.

1332

p = tuple(self.parents(n))

1332

p = tuple(self.parents(n))

1333

# If any of its parents are descendants, it's not a root.

1333

# If any of its parents are descendants, it's not a root.

1334

if (p[0] in descendants) or (p[1] in descendants):

1334

if (p[0] in descendants) or (p[1] in descendants):

1335

roots.remove(n)

1335

roots.remove(n)

1336

else:

1336

else:

1337

p = tuple(self.parents(n))

1337

p = tuple(self.parents(n))

1338

# A node is a descendant if either of its parents are

1338

# A node is a descendant if either of its parents are

1339

# descendants. (We seeded the dependents list with the roots

1339

# descendants. (We seeded the dependents list with the roots

1340

# up there, remember?)

1340

# up there, remember?)

1341

if (p[0] in descendants) or (p[1] in descendants):

1341

if (p[0] in descendants) or (p[1] in descendants):

1342

descendants.add(n)

1342

descendants.add(n)

1343

isdescendant = True

1343

isdescendant = True

1344

if isdescendant and ((ancestors is None) or (n in ancestors)):

1344

if isdescendant and ((ancestors is None) or (n in ancestors)):

1345

# Only include nodes that are both descendants and ancestors.

1345

# Only include nodes that are both descendants and ancestors.

1346

orderedout.append(n)

1346

orderedout.append(n)

1347

if (ancestors is not None) and (n in heads):

1347

if (ancestors is not None) and (n in heads):

1348

# We're trying to figure out which heads are reachable

1348

# We're trying to figure out which heads are reachable

1349

# from roots.

1349

# from roots.

1350

# Mark this head as having been reached

1350

# Mark this head as having been reached

1351

heads[n] = True

1351

heads[n] = True

1352

elif ancestors is None:

1352

elif ancestors is None:

1353

# Otherwise, we're trying to discover the heads.

1353

# Otherwise, we're trying to discover the heads.

1354

# Assume this is a head because if it isn't, the next step

1354

# Assume this is a head because if it isn't, the next step

1355

# will eventually remove it.

1355

# will eventually remove it.

1356

heads[n] = True

1356

heads[n] = True

1357

# But, obviously its parents aren't.

1357

# But, obviously its parents aren't.

1358

for p in self.parents(n):

1358

for p in self.parents(n):

1359

heads.pop(p, None)

1359

heads.pop(p, None)

1360

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1360

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1361

roots = list(roots)

1361

roots = list(roots)

1362

assert orderedout

1362

assert orderedout

1363

assert roots

1363

assert roots

1364

assert heads

1364

assert heads

1365

return (orderedout, roots, heads)

1365

return (orderedout, roots, heads)

1366

1367

def headrevs(self, revs=None):

1367

def headrevs(self, revs=None):

1368

if revs is None:

1368

if revs is None:

1369

try:

1369

try:

1370

return self.index.headrevs()

1370

return self.index.headrevs()

1371

except AttributeError:

1371

except AttributeError:

1372

return self._headrevs()

1372

return self._headrevs()

1373

if rustdagop is not None:

1373

if rustdagop is not None and self.index.rust_ext_compat:

1374

return rustdagop.headrevs(self.index, revs)

1374

return rustdagop.headrevs(self.index, revs)

1375

return dagop.headrevs(revs, self._uncheckedparentrevs)

1375

return dagop.headrevs(revs, self._uncheckedparentrevs)

1376

1377

def computephases(self, roots):

1377

def computephases(self, roots):

1378

return self.index.computephasesmapsets(roots)

1378

return self.index.computephasesmapsets(roots)

1379

1380

def _headrevs(self):

1380

def _headrevs(self):

1381

count = len(self)

1381

count = len(self)

1382

if not count:

1382

if not count:

1383

return [nullrev]

1383

return [nullrev]

1384

# we won't iter over filtered rev so nobody is a head at start

1384

# we won't iter over filtered rev so nobody is a head at start

1385

ishead = [0] * (count + 1)

1385

ishead = [0] * (count + 1)

1386

index = self.index

1386

index = self.index

1387

for r in self:

1387

for r in self:

1388

ishead[r] = 1 # I may be an head

1388

ishead[r] = 1 # I may be an head

1389

e = index[r]

1389

e = index[r]

1390

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1390

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1391

return [r for r, val in enumerate(ishead) if val]

1391

return [r for r, val in enumerate(ishead) if val]

1392

1393

def heads(self, start=None, stop=None):

1393

def heads(self, start=None, stop=None):

1394

"""return the list of all nodes that have no children

1394

"""return the list of all nodes that have no children

1395

1396

if start is specified, only heads that are descendants of

1396

if start is specified, only heads that are descendants of

1397

start will be returned

1397

start will be returned

1398

if stop is specified, it will consider all the revs from stop

1398

if stop is specified, it will consider all the revs from stop

1399

as if they had no children

1399

as if they had no children

1400

"""

1400

"""

1401

if start is None and stop is None:

1401

if start is None and stop is None:

1402

if not len(self):

1402

if not len(self):

1403

return [self.nullid]

1403

return [self.nullid]

1404

return [self.node(r) for r in self.headrevs()]

1404

return [self.node(r) for r in self.headrevs()]

1405

1406

if start is None:

1406

if start is None:

1407

start = nullrev

1407

start = nullrev

1408

else:

1408

else:

1409

start = self.rev(start)

1409

start = self.rev(start)

1410

1411

stoprevs = {self.rev(n) for n in stop or []}

1411

stoprevs = {self.rev(n) for n in stop or []}

1412

1413

revs = dagop.headrevssubset(

1413

revs = dagop.headrevssubset(

1414

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1414

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1415

)

1415

)

1416

1417

return [self.node(rev) for rev in revs]

1417

return [self.node(rev) for rev in revs]

1418

1419

def children(self, node):

1419

def children(self, node):

1420

"""find the children of a given node"""

1420

"""find the children of a given node"""

1421

c = []

1421

c = []

1422

p = self.rev(node)

1422

p = self.rev(node)

1423

for r in self.revs(start=p + 1):

1423

for r in self.revs(start=p + 1):

1424

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1424

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1425

if prevs:

1425

if prevs:

1426

for pr in prevs:

1426

for pr in prevs:

1427

if pr == p:

1427

if pr == p:

1428

c.append(self.node(r))

1428

c.append(self.node(r))

1429

elif p == nullrev:

1429

elif p == nullrev:

1430

c.append(self.node(r))

1430

c.append(self.node(r))

1431

return c

1431

return c

1432

1433

def commonancestorsheads(self, a, b):

1433

def commonancestorsheads(self, a, b):

1434

"""calculate all the heads of the common ancestors of nodes a and b"""

1434

"""calculate all the heads of the common ancestors of nodes a and b"""

1435

a, b = self.rev(a), self.rev(b)

1435

a, b = self.rev(a), self.rev(b)

1436

ancs = self._commonancestorsheads(a, b)

1436

ancs = self._commonancestorsheads(a, b)

1437

return pycompat.maplist(self.node, ancs)

1437

return pycompat.maplist(self.node, ancs)

1438

1439

def _commonancestorsheads(self, *revs):

1439

def _commonancestorsheads(self, *revs):

1440

"""calculate all the heads of the common ancestors of revs"""

1440

"""calculate all the heads of the common ancestors of revs"""

1441

try:

1441

try:

1442

ancs = self.index.commonancestorsheads(*revs)

1442

ancs = self.index.commonancestorsheads(*revs)

1443

except (AttributeError, OverflowError): # C implementation failed

1443

except (AttributeError, OverflowError): # C implementation failed

1444

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1444

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1445

return ancs

1445

return ancs

1446

1447

def isancestor(self, a, b):

1447

def isancestor(self, a, b):

1448

"""return True if node a is an ancestor of node b

1448

"""return True if node a is an ancestor of node b

1449

1450

A revision is considered an ancestor of itself."""

1450

A revision is considered an ancestor of itself."""

1451

a, b = self.rev(a), self.rev(b)

1451

a, b = self.rev(a), self.rev(b)

1452

return self.isancestorrev(a, b)

1452

return self.isancestorrev(a, b)

1453

1454

def isancestorrev(self, a, b):

1454

def isancestorrev(self, a, b):

1455

"""return True if revision a is an ancestor of revision b

1455

"""return True if revision a is an ancestor of revision b

1456

1457

A revision is considered an ancestor of itself.

1457

A revision is considered an ancestor of itself.

1458

1459

The implementation of this is trivial but the use of

1459

The implementation of this is trivial but the use of

1460

reachableroots is not."""

1460

reachableroots is not."""

1461

if a == nullrev:

1461

if a == nullrev:

1462

return True

1462

return True

1463

elif a == b:

1463

elif a == b:

1464

return True

1464

return True

1465

elif a > b:

1465

elif a > b:

1466

return False

1466

return False

1467

return bool(self.reachableroots(a, [b], [a], includepath=False))

1467

return bool(self.reachableroots(a, [b], [a], includepath=False))

1468

1469

def reachableroots(self, minroot, heads, roots, includepath=False):

1469

def reachableroots(self, minroot, heads, roots, includepath=False):

1470

"""return (heads(::(<roots> and <roots>::<heads>)))

1470

"""return (heads(::(<roots> and <roots>::<heads>)))

1471

1472

If includepath is True, return (<roots>::<heads>)."""

1472

If includepath is True, return (<roots>::<heads>)."""

1473

try:

1473

try:

1474

return self.index.reachableroots2(

1474

return self.index.reachableroots2(

1475

minroot, heads, roots, includepath

1475

minroot, heads, roots, includepath

1476

)

1476

)

1477

except AttributeError:

1477

except AttributeError:

1478

return dagop._reachablerootspure(

1478

return dagop._reachablerootspure(

1479

self.parentrevs, minroot, roots, heads, includepath

1479

self.parentrevs, minroot, roots, heads, includepath

1480

)

1480

)

1481

1482

def ancestor(self, a, b):

1482

def ancestor(self, a, b):

1483

"""calculate the "best" common ancestor of nodes a and b"""

1483

"""calculate the "best" common ancestor of nodes a and b"""

1484

1485

a, b = self.rev(a), self.rev(b)

1485

a, b = self.rev(a), self.rev(b)

1486

try:

1486

try:

1487

ancs = self.index.ancestors(a, b)

1487

ancs = self.index.ancestors(a, b)

1488

except (AttributeError, OverflowError):

1488

except (AttributeError, OverflowError):

1489

ancs = ancestor.ancestors(self.parentrevs, a, b)

1489

ancs = ancestor.ancestors(self.parentrevs, a, b)

1490

if ancs:

1490

if ancs:

1491

# choose a consistent winner when there's a tie

1491

# choose a consistent winner when there's a tie

1492

return min(map(self.node, ancs))

1492

return min(map(self.node, ancs))

1493

return self.nullid

1493

return self.nullid

1494

1495

def _match(self, id):

1495

def _match(self, id):

1496

if isinstance(id, int):

1496

if isinstance(id, int):

1497

# rev

1497

# rev

1498

return self.node(id)

1498

return self.node(id)

1499

if len(id) == self.nodeconstants.nodelen:

1499

if len(id) == self.nodeconstants.nodelen:

1500

# possibly a binary node

1500

# possibly a binary node

1501

# odds of a binary node being all hex in ASCII are 1 in 10**25

1501

# odds of a binary node being all hex in ASCII are 1 in 10**25

1502

try:

1502

try:

1503

node = id

1503

node = id

1504

self.rev(node) # quick search the index

1504

self.rev(node) # quick search the index

1505

return node

1505

return node

1506

except error.LookupError:

1506

except error.LookupError:

1507

pass # may be partial hex id

1507

pass # may be partial hex id

1508

try:

1508

try:

1509

# str(rev)

1509

# str(rev)

1510

rev = int(id)

1510

rev = int(id)

1511

if b"%d" % rev != id:

1511

if b"%d" % rev != id:

1512

raise ValueError

1512

raise ValueError

1513

if rev < 0:

1513

if rev < 0:

1514

rev = len(self) + rev

1514

rev = len(self) + rev

1515

if rev < 0 or rev >= len(self):

1515

if rev < 0 or rev >= len(self):

1516

raise ValueError

1516

raise ValueError

1517

return self.node(rev)

1517

return self.node(rev)

1518

except (ValueError, OverflowError):

1518

except (ValueError, OverflowError):

1519

pass

1519

pass

1520

if len(id) == 2 * self.nodeconstants.nodelen:

1520

if len(id) == 2 * self.nodeconstants.nodelen:

1521

try:

1521

try:

1522

# a full hex nodeid?

1522

# a full hex nodeid?

1523

node = bin(id)

1523

node = bin(id)

1524

self.rev(node)

1524

self.rev(node)

1525

return node

1525

return node

1526

except (TypeError, error.LookupError):

1526

except (TypeError, error.LookupError):

1527

pass

1527

pass

1528

1529

def _partialmatch(self, id):

1529

def _partialmatch(self, id):

1530

# we don't care wdirfilenodeids as they should be always full hash

1530

# we don't care wdirfilenodeids as they should be always full hash

1531

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1531

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1532

try:

1532

try:

1533

partial = self.index.partialmatch(id)

1533

partial = self.index.partialmatch(id)

1534

if partial and self.hasnode(partial):

1534

if partial and self.hasnode(partial):

1535

if maybewdir:

1535

if maybewdir:

1536

# single 'ff...' match in radix tree, ambiguous with wdir

1536

# single 'ff...' match in radix tree, ambiguous with wdir

1537

raise error.RevlogError

1537

raise error.RevlogError

1538

return partial

1538

return partial

1539

if maybewdir:

1539

if maybewdir:

1540

# no 'ff...' match in radix tree, wdir identified

1540

# no 'ff...' match in radix tree, wdir identified

1541

raise error.WdirUnsupported

1541

raise error.WdirUnsupported

1542

return None

1542

return None

1543

except error.RevlogError:

1543

except error.RevlogError:

1544

# parsers.c radix tree lookup gave multiple matches

1544

# parsers.c radix tree lookup gave multiple matches

1545

# fast path: for unfiltered changelog, radix tree is accurate

1545

# fast path: for unfiltered changelog, radix tree is accurate

1546

if not getattr(self, 'filteredrevs', None):

1546

if not getattr(self, 'filteredrevs', None):

1547

raise error.AmbiguousPrefixLookupError(

1547

raise error.AmbiguousPrefixLookupError(

1548

id, self.display_id, _(b'ambiguous identifier')

1548

id, self.display_id, _(b'ambiguous identifier')

1549

)

1549

)

1550

# fall through to slow path that filters hidden revisions

1550

# fall through to slow path that filters hidden revisions

1551

except (AttributeError, ValueError):

1551

except (AttributeError, ValueError):

1552

# we are pure python, or key was too short to search radix tree

1552

# we are pure python, or key was too short to search radix tree

1553

pass

1553

pass

1554

1555

if id in self._pcache:

1555

if id in self._pcache:

1556

return self._pcache[id]

1556

return self._pcache[id]

1557

1558

if len(id) <= 40:

1558

if len(id) <= 40:

1559

try:

1559

try:

1560

# hex(node)[:...]

1560

# hex(node)[:...]

1561

l = len(id) // 2 # grab an even number of digits

1561

l = len(id) // 2 # grab an even number of digits

1562

prefix = bin(id[: l * 2])

1562

prefix = bin(id[: l * 2])

1563

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1563

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1564

nl = [

1564

nl = [

1565

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1565

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1566

]

1566

]

1567

if self.nodeconstants.nullhex.startswith(id):

1567

if self.nodeconstants.nullhex.startswith(id):

1568

nl.append(self.nullid)

1568

nl.append(self.nullid)

1569

if len(nl) > 0:

1569

if len(nl) > 0:

1570

if len(nl) == 1 and not maybewdir:

1570

if len(nl) == 1 and not maybewdir:

1571

self._pcache[id] = nl[0]

1571

self._pcache[id] = nl[0]

1572

return nl[0]

1572

return nl[0]

1573

raise error.AmbiguousPrefixLookupError(

1573

raise error.AmbiguousPrefixLookupError(

1574

id, self.display_id, _(b'ambiguous identifier')

1574

id, self.display_id, _(b'ambiguous identifier')

1575

)

1575

)

1576

if maybewdir:

1576

if maybewdir:

1577

raise error.WdirUnsupported

1577

raise error.WdirUnsupported

1578

return None

1578

return None

1579

except TypeError:

1579

except TypeError:

1580

pass

1580

pass

1581

1582

def lookup(self, id):

1582

def lookup(self, id):

1583

"""locate a node based on:

1583

"""locate a node based on:

1584

- revision number or str(revision number)

1584

- revision number or str(revision number)

1585

- nodeid or subset of hex nodeid

1585

- nodeid or subset of hex nodeid

1586

"""

1586

"""

1587

n = self._match(id)

1587

n = self._match(id)

1588

if n is not None:

1588

if n is not None:

1589

return n

1589

return n

1590

n = self._partialmatch(id)

1590

n = self._partialmatch(id)

1591

if n:

1591

if n:

1592

return n

1592

return n

1593

1594

raise error.LookupError(id, self.display_id, _(b'no match found'))

1594

raise error.LookupError(id, self.display_id, _(b'no match found'))

1595

1596

def shortest(self, node, minlength=1):

1596

def shortest(self, node, minlength=1):

1597

"""Find the shortest unambiguous prefix that matches node."""

1597

"""Find the shortest unambiguous prefix that matches node."""

1598

1599

def isvalid(prefix):

1599

def isvalid(prefix):

1600

try:

1600

try:

1601

matchednode = self._partialmatch(prefix)

1601

matchednode = self._partialmatch(prefix)

1602

except error.AmbiguousPrefixLookupError:

1602

except error.AmbiguousPrefixLookupError:

1603

return False

1603

return False

1604

except error.WdirUnsupported:

1604

except error.WdirUnsupported:

1605

# single 'ff...' match

1605

# single 'ff...' match

1606

return True

1606

return True

1607

if matchednode is None:

1607

if matchednode is None:

1608

raise error.LookupError(node, self.display_id, _(b'no node'))

1608

raise error.LookupError(node, self.display_id, _(b'no node'))

1609

return True

1609

return True

1610

1611

def maybewdir(prefix):

1611

def maybewdir(prefix):

1612

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1612

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1613

1614

hexnode = hex(node)

1614

hexnode = hex(node)

1615

1616

def disambiguate(hexnode, minlength):

1616

def disambiguate(hexnode, minlength):

1617

"""Disambiguate against wdirid."""

1617

"""Disambiguate against wdirid."""

1618

for length in range(minlength, len(hexnode) + 1):

1618

for length in range(minlength, len(hexnode) + 1):

1619

prefix = hexnode[:length]

1619

prefix = hexnode[:length]

1620

if not maybewdir(prefix):

1620

if not maybewdir(prefix):

1621

return prefix

1621

return prefix

1622

1623

if not getattr(self, 'filteredrevs', None):

1623

if not getattr(self, 'filteredrevs', None):

1624

try:

1624

try:

1625

length = max(self.index.shortest(node), minlength)

1625

length = max(self.index.shortest(node), minlength)

1626

return disambiguate(hexnode, length)

1626

return disambiguate(hexnode, length)

1627

except error.RevlogError:

1627

except error.RevlogError:

1628

if node != self.nodeconstants.wdirid:

1628

if node != self.nodeconstants.wdirid:

1629

raise error.LookupError(

1629

raise error.LookupError(

1630

node, self.display_id, _(b'no node')

1630

node, self.display_id, _(b'no node')

1631

)

1631

)

1632

except AttributeError:

1632

except AttributeError:

1633

# Fall through to pure code

1633

# Fall through to pure code

1634

pass

1634

pass

1635

1636

if node == self.nodeconstants.wdirid:

1636

if node == self.nodeconstants.wdirid:

1637

for length in range(minlength, len(hexnode) + 1):

1637

for length in range(minlength, len(hexnode) + 1):

1638

prefix = hexnode[:length]

1638

prefix = hexnode[:length]

1639

if isvalid(prefix):

1639

if isvalid(prefix):

1640

return prefix

1640

return prefix

1641

1642

for length in range(minlength, len(hexnode) + 1):

1642

for length in range(minlength, len(hexnode) + 1):

1643

prefix = hexnode[:length]

1643

prefix = hexnode[:length]

1644

if isvalid(prefix):

1644

if isvalid(prefix):

1645

return disambiguate(hexnode, length)

1645

return disambiguate(hexnode, length)

1646

1647

def cmp(self, node, text):

1647

def cmp(self, node, text):

1648

"""compare text with a given file revision

1648

"""compare text with a given file revision

1649

1650

returns True if text is different than what is stored.

1650

returns True if text is different than what is stored.

1651

"""

1651

"""

1652

p1, p2 = self.parents(node)

1652

p1, p2 = self.parents(node)

1653

return storageutil.hashrevisionsha1(text, p1, p2) != node

1653

return storageutil.hashrevisionsha1(text, p1, p2) != node

1654

1655

def _cachesegment(self, offset, data):

1655

def _cachesegment(self, offset, data):

1656

"""Add a segment to the revlog cache.

1656

"""Add a segment to the revlog cache.

1657

1658

Accepts an absolute offset and the data that is at that location.

1658

Accepts an absolute offset and the data that is at that location.

1659

"""

1659

"""

1660

o, d = self._chunkcache

1660

o, d = self._chunkcache

1661

# try to add to existing cache

1661

# try to add to existing cache

1662

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1662

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1663

self._chunkcache = o, d + data

1663

self._chunkcache = o, d + data

1664

else:

1664

else:

1665

self._chunkcache = offset, data

1665

self._chunkcache = offset, data

1666

1667

def _readsegment(self, offset, length, df=None):

1667

def _readsegment(self, offset, length, df=None):

1668

"""Load a segment of raw data from the revlog.

1668

"""Load a segment of raw data from the revlog.

1669

1670

Accepts an absolute offset, length to read, and an optional existing

1670

Accepts an absolute offset, length to read, and an optional existing

1671

file handle to read from.

1671

file handle to read from.

1672

1673

If an existing file handle is passed, it will be seeked and the

1673

If an existing file handle is passed, it will be seeked and the

1674

original seek position will NOT be restored.

1674

original seek position will NOT be restored.

1675

1676

Returns a str or buffer of raw byte data.

1676

Returns a str or buffer of raw byte data.

1677

1678

Raises if the requested number of bytes could not be read.

1678

Raises if the requested number of bytes could not be read.

1679

"""

1679

"""

1680

# Cache data both forward and backward around the requested

1680

# Cache data both forward and backward around the requested

1681

# data, in a fixed size window. This helps speed up operations

1681

# data, in a fixed size window. This helps speed up operations

1682

# involving reading the revlog backwards.

1682

# involving reading the revlog backwards.

1683

cachesize = self._chunkcachesize

1683

cachesize = self._chunkcachesize

1684

realoffset = offset & ~(cachesize - 1)

1684

realoffset = offset & ~(cachesize - 1)

1685

reallength = (

1685

reallength = (

1686

(offset + length + cachesize) & ~(cachesize - 1)

1686

(offset + length + cachesize) & ~(cachesize - 1)

1687

) - realoffset

1687

) - realoffset

1688

with self._datareadfp(df) as df:

1688

with self._datareadfp(df) as df:

1689

df.seek(realoffset)

1689

df.seek(realoffset)

1690

d = df.read(reallength)

1690

d = df.read(reallength)

1691

1692

self._cachesegment(realoffset, d)

1692

self._cachesegment(realoffset, d)

1693

if offset != realoffset or reallength != length:

1693

if offset != realoffset or reallength != length:

1694

startoffset = offset - realoffset

1694

startoffset = offset - realoffset

1695

if len(d) - startoffset < length:

1695

if len(d) - startoffset < length:

1696

raise error.RevlogError(

1696

raise error.RevlogError(

1697

_(

1697

_(

1698

b'partial read of revlog %s; expected %d bytes from '

1698

b'partial read of revlog %s; expected %d bytes from '

1699

b'offset %d, got %d'

1699

b'offset %d, got %d'

1700

)

1700

)

1701

% (

1701

% (

1702

self._indexfile if self._inline else self._datafile,

1702

self._indexfile if self._inline else self._datafile,

1703

length,

1703

length,

1704

offset,

1704

offset,

1705

len(d) - startoffset,

1705

len(d) - startoffset,

1706

)

1706

)

1707

)

1707

)

1708

1709

return util.buffer(d, startoffset, length)

1709

return util.buffer(d, startoffset, length)

1710

1711

if len(d) < length:

1711

if len(d) < length:

1712

raise error.RevlogError(

1712

raise error.RevlogError(

1713

_(

1713

_(

1714

b'partial read of revlog %s; expected %d bytes from offset '

1714

b'partial read of revlog %s; expected %d bytes from offset '

1715

b'%d, got %d'

1715

b'%d, got %d'

1716

)

1716

)

1717

% (

1717

% (

1718

self._indexfile if self._inline else self._datafile,

1718

self._indexfile if self._inline else self._datafile,

1719

length,

1719

length,

1720

offset,

1720

offset,

1721

len(d),

1721

len(d),

1722

)

1722

)

1723

)

1723

)

1724

1725

return d

1725

return d

1726

1727

def _getsegment(self, offset, length, df=None):

1727

def _getsegment(self, offset, length, df=None):

1728

"""Obtain a segment of raw data from the revlog.

1728

"""Obtain a segment of raw data from the revlog.

1729

1730

Accepts an absolute offset, length of bytes to obtain, and an

1730

Accepts an absolute offset, length of bytes to obtain, and an

1731

optional file handle to the already-opened revlog. If the file

1731

optional file handle to the already-opened revlog. If the file

1732

handle is used, it's original seek position will not be preserved.

1732

handle is used, it's original seek position will not be preserved.

1733

1734

Requests for data may be returned from a cache.

1734

Requests for data may be returned from a cache.

1735

1736

Returns a str or a buffer instance of raw byte data.

1736

Returns a str or a buffer instance of raw byte data.

1737

"""

1737

"""

1738

o, d = self._chunkcache

1738

o, d = self._chunkcache

1739

l = len(d)

1739

l = len(d)

1740

1741

# is it in the cache?

1741

# is it in the cache?

1742

cachestart = offset - o

1742

cachestart = offset - o

1743

cacheend = cachestart + length

1743

cacheend = cachestart + length

1744

if cachestart >= 0 and cacheend <= l:

1744

if cachestart >= 0 and cacheend <= l:

1745

if cachestart == 0 and cacheend == l:

1745

if cachestart == 0 and cacheend == l:

1746

return d # avoid a copy

1746

return d # avoid a copy

1747

return util.buffer(d, cachestart, cacheend - cachestart)

1747

return util.buffer(d, cachestart, cacheend - cachestart)

1748

1749

return self._readsegment(offset, length, df=df)

1749

return self._readsegment(offset, length, df=df)

1750

1751

def _getsegmentforrevs(self, startrev, endrev, df=None):

1751

def _getsegmentforrevs(self, startrev, endrev, df=None):

1752

"""Obtain a segment of raw data corresponding to a range of revisions.

1752

"""Obtain a segment of raw data corresponding to a range of revisions.

1753

1754

Accepts the start and end revisions and an optional already-open

1754

Accepts the start and end revisions and an optional already-open

1755

file handle to be used for reading. If the file handle is read, its

1755

file handle to be used for reading. If the file handle is read, its

1756

seek position will not be preserved.

1756

seek position will not be preserved.

1757

1758

Requests for data may be satisfied by a cache.

1758

Requests for data may be satisfied by a cache.

1759

1760

Returns a 2-tuple of (offset, data) for the requested range of

1760

Returns a 2-tuple of (offset, data) for the requested range of

1761

revisions. Offset is the integer offset from the beginning of the

1761

revisions. Offset is the integer offset from the beginning of the

1762

revlog and data is a str or buffer of the raw byte data.

1762

revlog and data is a str or buffer of the raw byte data.

1763

1764

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1764

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1765

to determine where each revision's data begins and ends.

1765

to determine where each revision's data begins and ends.

1766

"""

1766

"""

1767

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1767

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1768

# (functions are expensive).

1768

# (functions are expensive).

1769

index = self.index

1769

index = self.index

1770

istart = index[startrev]

1770

istart = index[startrev]

1771

start = int(istart[0] >> 16)

1771

start = int(istart[0] >> 16)

1772

if startrev == endrev:

1772

if startrev == endrev:

1773

end = start + istart[1]

1773

end = start + istart[1]

1774

else:

1774

else:

1775

iend = index[endrev]

1775

iend = index[endrev]

1776

end = int(iend[0] >> 16) + iend[1]

1776

end = int(iend[0] >> 16) + iend[1]

1777

1778

if self._inline:

1778

if self._inline:

1779

start += (startrev + 1) * self.index.entry_size

1779

start += (startrev + 1) * self.index.entry_size

1780

end += (endrev + 1) * self.index.entry_size

1780

end += (endrev + 1) * self.index.entry_size

1781

length = end - start

1781

length = end - start

1782

1783

return start, self._getsegment(start, length, df=df)

1783

return start, self._getsegment(start, length, df=df)

1784

1785

def _chunk(self, rev, df=None):

1785

def _chunk(self, rev, df=None):

1786

"""Obtain a single decompressed chunk for a revision.

1786

"""Obtain a single decompressed chunk for a revision.

1787

1788

Accepts an integer revision and an optional already-open file handle

1788

Accepts an integer revision and an optional already-open file handle

1789

to be used for reading. If used, the seek position of the file will not

1789

to be used for reading. If used, the seek position of the file will not

1790

be preserved.

1790

be preserved.

1791

1792

Returns a str holding uncompressed data for the requested revision.

1792

Returns a str holding uncompressed data for the requested revision.

1793

"""

1793

"""

1794

compression_mode = self.index[rev][10]

1794

compression_mode = self.index[rev][10]

1795

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1795

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1796

if compression_mode == COMP_MODE_PLAIN:

1796

if compression_mode == COMP_MODE_PLAIN:

1797

return data

1797

return data

1798

elif compression_mode == COMP_MODE_DEFAULT:

1798

elif compression_mode == COMP_MODE_DEFAULT:

1799

return self._decompressor(data)

1799

return self._decompressor(data)

1800

elif compression_mode == COMP_MODE_INLINE:

1800

elif compression_mode == COMP_MODE_INLINE:

1801

return self.decompress(data)

1801

return self.decompress(data)

1802

else:

1802

else:

1803

msg = 'unknown compression mode %d'

1803

msg = 'unknown compression mode %d'

1804

msg %= compression_mode

1804

msg %= compression_mode

1805

raise error.RevlogError(msg)

1805

raise error.RevlogError(msg)

1806

1807

def _chunks(self, revs, df=None, targetsize=None):

1807

def _chunks(self, revs, df=None, targetsize=None):

1808

"""Obtain decompressed chunks for the specified revisions.

1808

"""Obtain decompressed chunks for the specified revisions.

1809

1810

Accepts an iterable of numeric revisions that are assumed to be in

1810

Accepts an iterable of numeric revisions that are assumed to be in

1811

ascending order. Also accepts an optional already-open file handle

1811

ascending order. Also accepts an optional already-open file handle

1812

to be used for reading. If used, the seek position of the file will

1812

to be used for reading. If used, the seek position of the file will

1813

not be preserved.

1813

not be preserved.

1814

1815

This function is similar to calling ``self._chunk()`` multiple times,

1815

This function is similar to calling ``self._chunk()`` multiple times,

1816

but is faster.

1816

but is faster.

1817

1818

Returns a list with decompressed data for each requested revision.

1818

Returns a list with decompressed data for each requested revision.

1819

"""

1819

"""

1820

if not revs:

1820

if not revs:

1821

return []

1821

return []

1822

start = self.start

1822

start = self.start

1823

length = self.length

1823

length = self.length

1824

inline = self._inline

1824

inline = self._inline

1825

iosize = self.index.entry_size

1825

iosize = self.index.entry_size

1826

buffer = util.buffer

1826

buffer = util.buffer

1827

1828

l = []

1828

l = []

1829

ladd = l.append

1829

ladd = l.append

1830

1831

if not self._withsparseread:

1831

if not self._withsparseread:

1832

slicedchunks = (revs,)

1832

slicedchunks = (revs,)

1833

else:

1833

else:

1834

slicedchunks = deltautil.slicechunk(

1834

slicedchunks = deltautil.slicechunk(

1835

self, revs, targetsize=targetsize

1835

self, revs, targetsize=targetsize

1836

)

1836

)

1837

1838

for revschunk in slicedchunks:

1838

for revschunk in slicedchunks:

1839

firstrev = revschunk[0]

1839

firstrev = revschunk[0]

1840

# Skip trailing revisions with empty diff

1840

# Skip trailing revisions with empty diff

1841

for lastrev in revschunk[::-1]:

1841

for lastrev in revschunk[::-1]:

1842

if length(lastrev) != 0:

1842

if length(lastrev) != 0:

1843

break

1843

break

1844

1845

try:

1845

try:

1846

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1846

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1847

except OverflowError:

1847

except OverflowError:

1848

# issue4215 - we can't cache a run of chunks greater than

1848

# issue4215 - we can't cache a run of chunks greater than

1849

# 2G on Windows

1849

# 2G on Windows

1850

return [self._chunk(rev, df=df) for rev in revschunk]

1850

return [self._chunk(rev, df=df) for rev in revschunk]

1851

1852

decomp = self.decompress

1852

decomp = self.decompress

1853

# self._decompressor might be None, but will not be used in that case

1853

# self._decompressor might be None, but will not be used in that case

1854

def_decomp = self._decompressor

1854

def_decomp = self._decompressor

1855

for rev in revschunk:

1855

for rev in revschunk:

1856

chunkstart = start(rev)

1856

chunkstart = start(rev)

1857

if inline:

1857

if inline:

1858

chunkstart += (rev + 1) * iosize

1858

chunkstart += (rev + 1) * iosize

1859

chunklength = length(rev)

1859

chunklength = length(rev)

1860

comp_mode = self.index[rev][10]

1860

comp_mode = self.index[rev][10]

1861

c = buffer(data, chunkstart - offset, chunklength)

1861

c = buffer(data, chunkstart - offset, chunklength)

1862

if comp_mode == COMP_MODE_PLAIN:

1862

if comp_mode == COMP_MODE_PLAIN:

1863

ladd(c)

1863

ladd(c)

1864

elif comp_mode == COMP_MODE_INLINE:

1864

elif comp_mode == COMP_MODE_INLINE:

1865

ladd(decomp(c))

1865

ladd(decomp(c))

1866

elif comp_mode == COMP_MODE_DEFAULT:

1866

elif comp_mode == COMP_MODE_DEFAULT:

1867

ladd(def_decomp(c))

1867

ladd(def_decomp(c))

1868

else:

1868

else:

1869

msg = 'unknown compression mode %d'

1869

msg = 'unknown compression mode %d'

1870

msg %= comp_mode

1870

msg %= comp_mode

1871

raise error.RevlogError(msg)

1871

raise error.RevlogError(msg)

1872

1873

return l

1873

return l

1874

1875

def _chunkclear(self):

1875

def _chunkclear(self):

1876

"""Clear the raw chunk cache."""

1876

"""Clear the raw chunk cache."""

1877

self._chunkcache = (0, b'')

1877

self._chunkcache = (0, b'')

1878

1879

def deltaparent(self, rev):

1879

def deltaparent(self, rev):

1880

"""return deltaparent of the given revision"""

1880

"""return deltaparent of the given revision"""

1881

base = self.index[rev][3]

1881

base = self.index[rev][3]

1882

if base == rev:

1882

if base == rev:

1883

return nullrev

1883

return nullrev

1884

elif self._generaldelta:

1884

elif self._generaldelta:

1885

return base

1885

return base

1886

else:

1886

else:

1887

return rev - 1

1887

return rev - 1

1888

1889

def issnapshot(self, rev):

1889

def issnapshot(self, rev):

1890

"""tells whether rev is a snapshot"""

1890

"""tells whether rev is a snapshot"""

1891

if not self._sparserevlog:

1891

if not self._sparserevlog:

1892

return self.deltaparent(rev) == nullrev

1892

return self.deltaparent(rev) == nullrev

1893

elif util.safehasattr(self.index, b'issnapshot'):

1893

elif util.safehasattr(self.index, b'issnapshot'):

1894

# directly assign the method to cache the testing and access

1894

# directly assign the method to cache the testing and access

1895

self.issnapshot = self.index.issnapshot

1895

self.issnapshot = self.index.issnapshot

1896

return self.issnapshot(rev)

1896

return self.issnapshot(rev)

1897

if rev == nullrev:

1897

if rev == nullrev:

1898

return True

1898

return True

1899

entry = self.index[rev]

1899

entry = self.index[rev]

1900

base = entry[3]

1900

base = entry[3]

1901

if base == rev:

1901

if base == rev:

1902

return True

1902

return True

1903

if base == nullrev:

1903

if base == nullrev:

1904

return True

1904

return True

1905

p1 = entry[5]

1905

p1 = entry[5]

1906

p2 = entry[6]

1906

p2 = entry[6]

1907

if base == p1 or base == p2:

1907

if base == p1 or base == p2:

1908

return False

1908

return False

1909

return self.issnapshot(base)

1909

return self.issnapshot(base)

1910

1911

def snapshotdepth(self, rev):

1911

def snapshotdepth(self, rev):

1912

"""number of snapshot in the chain before this one"""

1912

"""number of snapshot in the chain before this one"""

1913

if not self.issnapshot(rev):

1913

if not self.issnapshot(rev):

1914

raise error.ProgrammingError(b'revision %d not a snapshot')

1914

raise error.ProgrammingError(b'revision %d not a snapshot')

1915

return len(self._deltachain(rev)[0]) - 1

1915

return len(self._deltachain(rev)[0]) - 1

1916

1917

def revdiff(self, rev1, rev2):

1917

def revdiff(self, rev1, rev2):

1918

"""return or calculate a delta between two revisions

1918

"""return or calculate a delta between two revisions

1919

1920

The delta calculated is in binary form and is intended to be written to

1920

The delta calculated is in binary form and is intended to be written to

1921

revlog data directly. So this function needs raw revision data.

1921

revlog data directly. So this function needs raw revision data.

1922

"""

1922

"""

1923

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1923

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1924

return bytes(self._chunk(rev2))

1924

return bytes(self._chunk(rev2))

1925

1926

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1926

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1927

1928

def _processflags(self, text, flags, operation, raw=False):

1928

def _processflags(self, text, flags, operation, raw=False):

1929

"""deprecated entry point to access flag processors"""

1929

"""deprecated entry point to access flag processors"""

1930

msg = b'_processflag(...) use the specialized variant'

1930

msg = b'_processflag(...) use the specialized variant'

1931

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1931

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1932

if raw:

1932

if raw:

1933

return text, flagutil.processflagsraw(self, text, flags)

1933

return text, flagutil.processflagsraw(self, text, flags)

1934

elif operation == b'read':

1934

elif operation == b'read':

1935

return flagutil.processflagsread(self, text, flags)

1935

return flagutil.processflagsread(self, text, flags)

1936

else: # write operation

1936

else: # write operation

1937

return flagutil.processflagswrite(self, text, flags)

1937

return flagutil.processflagswrite(self, text, flags)

1938

1939

def revision(self, nodeorrev, _df=None, raw=False):

1939

def revision(self, nodeorrev, _df=None, raw=False):

1940

"""return an uncompressed revision of a given node or revision

1940

"""return an uncompressed revision of a given node or revision

1941

number.

1941

number.

1942

1943

_df - an existing file handle to read from. (internal-only)

1943

_df - an existing file handle to read from. (internal-only)

1944

raw - an optional argument specifying if the revision data is to be

1944

raw - an optional argument specifying if the revision data is to be

1945

treated as raw data when applying flag transforms. 'raw' should be set

1945

treated as raw data when applying flag transforms. 'raw' should be set

1946

to True when generating changegroups or in debug commands.

1946

to True when generating changegroups or in debug commands.

1947

"""

1947

"""

1948

if raw:

1948

if raw:

1949

msg = (

1949

msg = (

1950

b'revlog.revision(..., raw=True) is deprecated, '

1950

b'revlog.revision(..., raw=True) is deprecated, '

1951

b'use revlog.rawdata(...)'

1951

b'use revlog.rawdata(...)'

1952

)

1952

)

1953

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1953

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1954

return self._revisiondata(nodeorrev, _df, raw=raw)[0]

1954

return self._revisiondata(nodeorrev, _df, raw=raw)[0]

1955

1956

def sidedata(self, nodeorrev, _df=None):

1956

def sidedata(self, nodeorrev, _df=None):

1957

"""a map of extra data related to the changeset but not part of the hash

1957

"""a map of extra data related to the changeset but not part of the hash

1958

1959

This function currently return a dictionary. However, more advanced

1959

This function currently return a dictionary. However, more advanced

1960

mapping object will likely be used in the future for a more

1960

mapping object will likely be used in the future for a more

1961

efficient/lazy code.

1961

efficient/lazy code.

1962

"""

1962

"""

1963

return self._revisiondata(nodeorrev, _df)[1]

1963

return self._revisiondata(nodeorrev, _df)[1]

1964

1965

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1965

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1966

# deal with <nodeorrev> argument type

1966

# deal with <nodeorrev> argument type

1967

if isinstance(nodeorrev, int):

1967

if isinstance(nodeorrev, int):

1968

rev = nodeorrev

1968

rev = nodeorrev

1969

node = self.node(rev)

1969

node = self.node(rev)

1970

else:

1970

else:

1971

node = nodeorrev

1971

node = nodeorrev

1972

rev = None

1972

rev = None

1973

1974

# fast path the special `nullid` rev

1974

# fast path the special `nullid` rev

1975

if node == self.nullid:

1975

if node == self.nullid:

1976

return b"", {}

1976

return b"", {}

1977

1978

# ``rawtext`` is the text as stored inside the revlog. Might be the

1978

# ``rawtext`` is the text as stored inside the revlog. Might be the

1979

# revision or might need to be processed to retrieve the revision.

1979

# revision or might need to be processed to retrieve the revision.

1980

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1980

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1981

1982

if self.hassidedata:

1982

if self.hassidedata:

1983

if rev is None:

1983

if rev is None:

1984

rev = self.rev(node)

1984

rev = self.rev(node)

1985

sidedata = self._sidedata(rev)

1985

sidedata = self._sidedata(rev)

1986

else:

1986

else:

1987

sidedata = {}

1987

sidedata = {}

1988

1989

if raw and validated:

1989

if raw and validated:

1990

# if we don't want to process the raw text and that raw

1990

# if we don't want to process the raw text and that raw

1991

# text is cached, we can exit early.

1991

# text is cached, we can exit early.

1992

return rawtext, sidedata

1992

return rawtext, sidedata

1993

if rev is None:

1993

if rev is None:

1994

rev = self.rev(node)

1994

rev = self.rev(node)

1995

# the revlog's flag for this revision

1995

# the revlog's flag for this revision

1996

# (usually alter its state or content)

1996

# (usually alter its state or content)

1997

flags = self.flags(rev)

1997

flags = self.flags(rev)

1998

1999

if validated and flags == REVIDX_DEFAULT_FLAGS:

1999

if validated and flags == REVIDX_DEFAULT_FLAGS:

2000

# no extra flags set, no flag processor runs, text = rawtext

2000

# no extra flags set, no flag processor runs, text = rawtext

2001

return rawtext, sidedata

2001

return rawtext, sidedata

2002

2003

if raw:

2003

if raw:

2004

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2004

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2005

text = rawtext

2005

text = rawtext

2006

else:

2006

else:

2007

r = flagutil.processflagsread(self, rawtext, flags)

2007

r = flagutil.processflagsread(self, rawtext, flags)

2008

text, validatehash = r

2008

text, validatehash = r

2009

if validatehash:

2009

if validatehash:

2010

self.checkhash(text, node, rev=rev)

2010

self.checkhash(text, node, rev=rev)

2011

if not validated:

2011

if not validated:

2012

self._revisioncache = (node, rev, rawtext)

2012

self._revisioncache = (node, rev, rawtext)

2013

2014

return text, sidedata

2014

return text, sidedata

2015

2016

def _rawtext(self, node, rev, _df=None):

2016

def _rawtext(self, node, rev, _df=None):

2017

"""return the possibly unvalidated rawtext for a revision

2017

"""return the possibly unvalidated rawtext for a revision

2018

2019

returns (rev, rawtext, validated)

2019

returns (rev, rawtext, validated)

2020

"""

2020

"""

2021

2022

# revision in the cache (could be useful to apply delta)

2022

# revision in the cache (could be useful to apply delta)

2023

cachedrev = None

2023

cachedrev = None

2024

# An intermediate text to apply deltas to

2024

# An intermediate text to apply deltas to

2025

basetext = None

2025

basetext = None

2026

2027

# Check if we have the entry in cache

2027

# Check if we have the entry in cache

2028

# The cache entry looks like (node, rev, rawtext)

2028

# The cache entry looks like (node, rev, rawtext)

2029

if self._revisioncache:

2029

if self._revisioncache:

2030

if self._revisioncache[0] == node:

2030

if self._revisioncache[0] == node:

2031

return (rev, self._revisioncache[2], True)

2031

return (rev, self._revisioncache[2], True)

2032

cachedrev = self._revisioncache[1]

2032

cachedrev = self._revisioncache[1]

2033

2034

if rev is None:

2034

if rev is None:

2035

rev = self.rev(node)

2035

rev = self.rev(node)

2036

2037

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2037

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2038

if stopped:

2038

if stopped:

2039

basetext = self._revisioncache[2]

2039

basetext = self._revisioncache[2]

2040

2041

# drop cache to save memory, the caller is expected to

2041

# drop cache to save memory, the caller is expected to

2042

# update self._revisioncache after validating the text

2042

# update self._revisioncache after validating the text

2043

self._revisioncache = None

2043

self._revisioncache = None

2044

2045

targetsize = None

2045

targetsize = None

2046

rawsize = self.index[rev][2]

2046

rawsize = self.index[rev][2]

2047

if 0 <= rawsize:

2047

if 0 <= rawsize:

2048

targetsize = 4 * rawsize

2048

targetsize = 4 * rawsize

2049

2050

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2050

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2051

if basetext is None:

2051

if basetext is None:

2052

basetext = bytes(bins[0])

2052

basetext = bytes(bins[0])

2053

bins = bins[1:]

2053

bins = bins[1:]

2054

2055

rawtext = mdiff.patches(basetext, bins)

2055

rawtext = mdiff.patches(basetext, bins)

2056

del basetext # let us have a chance to free memory early

2056

del basetext # let us have a chance to free memory early

2057

return (rev, rawtext, False)

2057

return (rev, rawtext, False)

2058

2059

def _sidedata(self, rev):

2059

def _sidedata(self, rev):

2060

"""Return the sidedata for a given revision number."""

2060

"""Return the sidedata for a given revision number."""

2061

index_entry = self.index[rev]

2061

index_entry = self.index[rev]

2062

sidedata_offset = index_entry[8]

2062

sidedata_offset = index_entry[8]

2063

sidedata_size = index_entry[9]

2063

sidedata_size = index_entry[9]

2064

2065

if self._inline:

2065

if self._inline:

2066

sidedata_offset += self.index.entry_size * (1 + rev)

2066

sidedata_offset += self.index.entry_size * (1 + rev)

2067

if sidedata_size == 0:

2067

if sidedata_size == 0:

2068

return {}

2068

return {}

2069

2070

comp_segment = self._getsegment(sidedata_offset, sidedata_size)

2070

comp_segment = self._getsegment(sidedata_offset, sidedata_size)

2071

comp = self.index[rev][11]

2071

comp = self.index[rev][11]

2072

if comp == COMP_MODE_PLAIN:

2072

if comp == COMP_MODE_PLAIN:

2073

segment = comp_segment

2073

segment = comp_segment

2074

elif comp == COMP_MODE_DEFAULT:

2074

elif comp == COMP_MODE_DEFAULT:

2075

segment = self._decompressor(comp_segment)

2075

segment = self._decompressor(comp_segment)

2076

elif comp == COMP_MODE_INLINE:

2076

elif comp == COMP_MODE_INLINE:

2077

segment = self.decompress(comp_segment)

2077

segment = self.decompress(comp_segment)

2078

else:

2078

else:

2079

msg = 'unknown compression mode %d'

2079

msg = 'unknown compression mode %d'

2080

msg %= comp

2080

msg %= comp

2081

raise error.RevlogError(msg)

2081

raise error.RevlogError(msg)

2082

2083

sidedata = sidedatautil.deserialize_sidedata(segment)

2083

sidedata = sidedatautil.deserialize_sidedata(segment)

2084

return sidedata

2084

return sidedata

2085

2086

def rawdata(self, nodeorrev, _df=None):

2086

def rawdata(self, nodeorrev, _df=None):

2087

"""return an uncompressed raw data of a given node or revision number.

2087

"""return an uncompressed raw data of a given node or revision number.

2088

2089

_df - an existing file handle to read from. (internal-only)

2089

_df - an existing file handle to read from. (internal-only)

2090

"""

2090

"""

2091

return self._revisiondata(nodeorrev, _df, raw=True)[0]

2091

return self._revisiondata(nodeorrev, _df, raw=True)[0]

2092

2093

def hash(self, text, p1, p2):

2093

def hash(self, text, p1, p2):

2094

"""Compute a node hash.

2094

"""Compute a node hash.

2095

2096

Available as a function so that subclasses can replace the hash

2096

Available as a function so that subclasses can replace the hash

2097

as needed.

2097

as needed.

2098

"""

2098

"""

2099

return storageutil.hashrevisionsha1(text, p1, p2)

2099

return storageutil.hashrevisionsha1(text, p1, p2)

2100

2101

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2101

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2102

"""Check node hash integrity.

2102

"""Check node hash integrity.

2103

2104

Available as a function so that subclasses can extend hash mismatch

2104

Available as a function so that subclasses can extend hash mismatch

2105

behaviors as needed.

2105

behaviors as needed.

2106

"""

2106

"""

2107

try:

2107

try:

2108

if p1 is None and p2 is None:

2108

if p1 is None and p2 is None:

2109

p1, p2 = self.parents(node)

2109

p1, p2 = self.parents(node)

2110

if node != self.hash(text, p1, p2):

2110

if node != self.hash(text, p1, p2):

2111

# Clear the revision cache on hash failure. The revision cache

2111

# Clear the revision cache on hash failure. The revision cache

2112

# only stores the raw revision and clearing the cache does have

2112

# only stores the raw revision and clearing the cache does have

2113

# the side-effect that we won't have a cache hit when the raw

2113

# the side-effect that we won't have a cache hit when the raw

2114

# revision data is accessed. But this case should be rare and

2114

# revision data is accessed. But this case should be rare and

2115

# it is extra work to teach the cache about the hash

2115

# it is extra work to teach the cache about the hash

2116

# verification state.

2116

# verification state.

2117

if self._revisioncache and self._revisioncache[0] == node:

2117

if self._revisioncache and self._revisioncache[0] == node:

2118

self._revisioncache = None

2118

self._revisioncache = None

2119

2120

revornode = rev

2120

revornode = rev

2121

if revornode is None:

2121

if revornode is None:

2122

revornode = templatefilters.short(hex(node))

2122

revornode = templatefilters.short(hex(node))

2123

raise error.RevlogError(

2123

raise error.RevlogError(

2124

_(b"integrity check failed on %s:%s")

2124

_(b"integrity check failed on %s:%s")

2125

% (self.display_id, pycompat.bytestr(revornode))

2125

% (self.display_id, pycompat.bytestr(revornode))

2126

)

2126

)

2127

except error.RevlogError:

2127

except error.RevlogError:

2128

if self._censorable and storageutil.iscensoredtext(text):

2128

if self._censorable and storageutil.iscensoredtext(text):

2129

raise error.CensoredNodeError(self.display_id, node, text)

2129

raise error.CensoredNodeError(self.display_id, node, text)

2130

raise

2130

raise

2131

2132

def _enforceinlinesize(self, tr):

2132

def _enforceinlinesize(self, tr):

2133

"""Check if the revlog is too big for inline and convert if so.

2133

"""Check if the revlog is too big for inline and convert if so.

2134

2135

This should be called after revisions are added to the revlog. If the

2135

This should be called after revisions are added to the revlog. If the

2136

revlog has grown too large to be an inline revlog, it will convert it

2136

revlog has grown too large to be an inline revlog, it will convert it

2137

to use multiple index and data files.

2137

to use multiple index and data files.

2138

"""

2138

"""

2139

tiprev = len(self) - 1

2139

tiprev = len(self) - 1

2140

total_size = self.start(tiprev) + self.length(tiprev)

2140

total_size = self.start(tiprev) + self.length(tiprev)

2141

if not self._inline or total_size < _maxinline:

2141

if not self._inline or total_size < _maxinline:

2142

return

2142

return

2143

2144

troffset = tr.findoffset(self._indexfile)

2144

troffset = tr.findoffset(self._indexfile)

2145

if troffset is None:

2145

if troffset is None:

2146

raise error.RevlogError(

2146

raise error.RevlogError(

2147

_(b"%s not found in the transaction") % self._indexfile

2147

_(b"%s not found in the transaction") % self._indexfile

2148

)

2148

)

2149

trindex = 0

2149

trindex = 0

2150

tr.add(self._datafile, 0)

2150

tr.add(self._datafile, 0)

2151

2152

existing_handles = False

2152

existing_handles = False

2153

if self._writinghandles is not None:

2153

if self._writinghandles is not None:

2154

existing_handles = True

2154

existing_handles = True

2155

fp = self._writinghandles[0]

2155

fp = self._writinghandles[0]

2156

fp.flush()

2156

fp.flush()

2157

fp.close()

2157

fp.close()

2158

# We can't use the cached file handle after close(). So prevent

2158

# We can't use the cached file handle after close(). So prevent

2159

# its usage.

2159

# its usage.

2160

self._writinghandles = None

2160

self._writinghandles = None

2161

2162

new_dfh = self._datafp(b'w+')

2162

new_dfh = self._datafp(b'w+')

2163

new_dfh.truncate(0) # drop any potentially existing data

2163

new_dfh.truncate(0) # drop any potentially existing data

2164

try:

2164

try:

2165

with self._indexfp() as read_ifh:

2165

with self._indexfp() as read_ifh:

2166

for r in self:

2166

for r in self:

2167

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2167

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2168

if troffset <= self.start(r):

2168

if troffset <= self.start(r):

2169

trindex = r

2169

trindex = r

2170

new_dfh.flush()

2170

new_dfh.flush()

2171

2172

with self.__index_new_fp() as fp:

2172

with self.__index_new_fp() as fp:

2173

self._format_flags &= ~FLAG_INLINE_DATA

2173

self._format_flags &= ~FLAG_INLINE_DATA

2174

self._inline = False

2174

self._inline = False

2175

for i in self:

2175

for i in self:

2176

e = self.index.entry_binary(i)

2176

e = self.index.entry_binary(i)

2177

if i == 0 and self._docket is None:

2177

if i == 0 and self._docket is None:

2178

header = self._format_flags | self._format_version

2178

header = self._format_flags | self._format_version

2179

header = self.index.pack_header(header)

2179

header = self.index.pack_header(header)

2180

e = header + e

2180

e = header + e

2181

fp.write(e)

2181

fp.write(e)

2182

if self._docket is not None:

2182

if self._docket is not None:

2183

self._docket.index_end = fp.tell()

2183

self._docket.index_end = fp.tell()

2184

# the temp file replace the real index when we exit the context

2184

# the temp file replace the real index when we exit the context

2185

# manager

2185

# manager

2186

2187

tr.replace(self._indexfile, trindex * self.index.entry_size)

2187

tr.replace(self._indexfile, trindex * self.index.entry_size)

2188

nodemaputil.setup_persistent_nodemap(tr, self)

2188

nodemaputil.setup_persistent_nodemap(tr, self)

2189

self._chunkclear()

2189

self._chunkclear()

2190

2191

if existing_handles:

2191

if existing_handles:

2192

# switched from inline to conventional reopen the index

2192

# switched from inline to conventional reopen the index

2193

ifh = self.__index_write_fp()

2193

ifh = self.__index_write_fp()

2194

self._writinghandles = (ifh, new_dfh)

2194

self._writinghandles = (ifh, new_dfh)

2195

new_dfh = None

2195

new_dfh = None

2196

finally:

2196

finally:

2197

if new_dfh is not None:

2197

if new_dfh is not None:

2198

new_dfh.close()

2198

new_dfh.close()

2199

2200

def _nodeduplicatecallback(self, transaction, node):

2200

def _nodeduplicatecallback(self, transaction, node):

2201

"""called when trying to add a node already stored."""

2201

"""called when trying to add a node already stored."""

2202

2203

@contextlib.contextmanager

2203

@contextlib.contextmanager

2204

def _writing(self, transaction):

2204

def _writing(self, transaction):

2205

if self._trypending:

2205

if self._trypending:

2206

msg = b'try to write in a `trypending` revlog: %s'

2206

msg = b'try to write in a `trypending` revlog: %s'

2207

msg %= self.display_id

2207

msg %= self.display_id

2208

raise error.ProgrammingError(msg)

2208

raise error.ProgrammingError(msg)

2209

if self._writinghandles is not None:

2209

if self._writinghandles is not None:

2210

yield

2210

yield

2211

else:

2211

else:

2212

r = len(self)

2212

r = len(self)

2213

dsize = 0

2213

dsize = 0

2214

if r:

2214

if r:

2215

dsize = self.end(r - 1)

2215

dsize = self.end(r - 1)

2216

dfh = None

2216

dfh = None

2217

if not self._inline:

2217

if not self._inline:

2218

try:

2218

try:

2219

dfh = self._datafp(b"r+")

2219

dfh = self._datafp(b"r+")

2220

if self._docket is None:

2220

if self._docket is None:

2221

dfh.seek(0, os.SEEK_END)

2221

dfh.seek(0, os.SEEK_END)

2222

else:

2222

else:

2223

dfh.seek(self._docket.data_end, os.SEEK_SET)

2223

dfh.seek(self._docket.data_end, os.SEEK_SET)

2224

except IOError as inst:

2224

except IOError as inst:

2225

if inst.errno != errno.ENOENT:

2225

if inst.errno != errno.ENOENT:

2226

raise

2226

raise

2227

dfh = self._datafp(b"w+")

2227

dfh = self._datafp(b"w+")

2228

transaction.add(self._datafile, dsize)

2228

transaction.add(self._datafile, dsize)

2229

try:

2229

try:

2230

isize = r * self.index.entry_size

2230

isize = r * self.index.entry_size

2231

ifh = self.__index_write_fp()

2231

ifh = self.__index_write_fp()

2232

if self._inline:

2232

if self._inline:

2233

transaction.add(self._indexfile, dsize + isize)

2233

transaction.add(self._indexfile, dsize + isize)

2234

else:

2234

else:

2235

transaction.add(self._indexfile, isize)

2235

transaction.add(self._indexfile, isize)

2236

try:

2236

try:

2237

self._writinghandles = (ifh, dfh)

2237

self._writinghandles = (ifh, dfh)

2238

try:

2238

try:

2239

yield

2239

yield

2240

if self._docket is not None:

2240

if self._docket is not None:

2241

self._write_docket(transaction)

2241

self._write_docket(transaction)

2242

finally:

2242

finally:

2243

self._writinghandles = None

2243

self._writinghandles = None

2244

finally:

2244

finally:

2245

ifh.close()

2245

ifh.close()

2246

finally:

2246

finally:

2247

if dfh is not None:

2247

if dfh is not None:

2248

dfh.close()

2248

dfh.close()

2249

2250

def _write_docket(self, transaction):

2250

def _write_docket(self, transaction):

2251

"""write the current docket on disk

2251

"""write the current docket on disk

2252

2253

Exist as a method to help changelog to implement transaction logic

2253

Exist as a method to help changelog to implement transaction logic

2254

2255

We could also imagine using the same transaction logic for all revlog

2255

We could also imagine using the same transaction logic for all revlog

2256

since docket are cheap."""

2256

since docket are cheap."""

2257

self._docket.write(transaction)

2257

self._docket.write(transaction)

2258

2259

def addrevision(

2259

def addrevision(

2260

self,

2260

self,

2261

text,

2261

text,

2262

transaction,

2262

transaction,

2263

link,

2263

link,

2264

p1,

2264

p1,

2265

p2,

2265

p2,

2266

cachedelta=None,

2266

cachedelta=None,

2267

node=None,

2267

node=None,

2268

flags=REVIDX_DEFAULT_FLAGS,

2268

flags=REVIDX_DEFAULT_FLAGS,

2269

deltacomputer=None,

2269

deltacomputer=None,

2270

sidedata=None,

2270

sidedata=None,

2271

):

2271

):

2272

"""add a revision to the log

2272

"""add a revision to the log

2273

2274

text - the revision data to add

2274

text - the revision data to add

2275

transaction - the transaction object used for rollback

2275

transaction - the transaction object used for rollback

2276

link - the linkrev data to add

2276

link - the linkrev data to add

2277

p1, p2 - the parent nodeids of the revision

2277

p1, p2 - the parent nodeids of the revision

2278

cachedelta - an optional precomputed delta

2278

cachedelta - an optional precomputed delta

2279

node - nodeid of revision; typically node is not specified, and it is

2279

node - nodeid of revision; typically node is not specified, and it is

2280

computed by default as hash(text, p1, p2), however subclasses might

2280

computed by default as hash(text, p1, p2), however subclasses might

2281

use different hashing method (and override checkhash() in such case)

2281

use different hashing method (and override checkhash() in such case)

2282

flags - the known flags to set on the revision

2282

flags - the known flags to set on the revision

2283

deltacomputer - an optional deltacomputer instance shared between

2283

deltacomputer - an optional deltacomputer instance shared between

2284

multiple calls

2284

multiple calls

2285

"""

2285

"""

2286

if link == nullrev:

2286

if link == nullrev:

2287

raise error.RevlogError(

2287

raise error.RevlogError(

2288

_(b"attempted to add linkrev -1 to %s") % self.display_id

2288

_(b"attempted to add linkrev -1 to %s") % self.display_id

2289

)

2289

)

2290

2291

if sidedata is None:

2291

if sidedata is None:

2292

sidedata = {}

2292

sidedata = {}

2293

elif sidedata and not self.hassidedata:

2293

elif sidedata and not self.hassidedata:

2294

raise error.ProgrammingError(

2294

raise error.ProgrammingError(

2295

_(b"trying to add sidedata to a revlog who don't support them")

2295

_(b"trying to add sidedata to a revlog who don't support them")

2296

)

2296

)

2297

2298

if flags:

2298

if flags:

2299

node = node or self.hash(text, p1, p2)

2299

node = node or self.hash(text, p1, p2)

2300

2301

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2301

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2302

2303

# If the flag processor modifies the revision data, ignore any provided

2303

# If the flag processor modifies the revision data, ignore any provided

2304

# cachedelta.

2304

# cachedelta.

2305

if rawtext != text:

2305

if rawtext != text:

2306

cachedelta = None

2306

cachedelta = None

2307

2308

if len(rawtext) > _maxentrysize:

2308

if len(rawtext) > _maxentrysize:

2309

raise error.RevlogError(

2309

raise error.RevlogError(

2310

_(

2310

_(

2311

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2311

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2312

)

2312

)

2313

% (self.display_id, len(rawtext))

2313

% (self.display_id, len(rawtext))

2314

)

2314

)

2315

2316

node = node or self.hash(rawtext, p1, p2)

2316

node = node or self.hash(rawtext, p1, p2)

2317

rev = self.index.get_rev(node)

2317

rev = self.index.get_rev(node)

2318

if rev is not None:

2318

if rev is not None:

2319

return rev

2319

return rev

2320

2321

if validatehash:

2321

if validatehash:

2322

self.checkhash(rawtext, node, p1=p1, p2=p2)

2322

self.checkhash(rawtext, node, p1=p1, p2=p2)

2323

2324

return self.addrawrevision(

2324

return self.addrawrevision(

2325

rawtext,

2325

rawtext,

2326

transaction,

2326

transaction,

2327

link,

2327

link,

2328

p1,

2328

p1,

2329

p2,

2329

p2,

2330

node,

2330

node,

2331

flags,

2331

flags,

2332

cachedelta=cachedelta,

2332

cachedelta=cachedelta,

2333

deltacomputer=deltacomputer,

2333

deltacomputer=deltacomputer,

2334

sidedata=sidedata,

2334

sidedata=sidedata,

2335

)

2335

)

2336

2337

def addrawrevision(

2337

def addrawrevision(

2338

self,

2338

self,

2339

rawtext,

2339

rawtext,

2340

transaction,

2340

transaction,

2341

link,

2341

link,

2342

p1,

2342

p1,

2343

p2,

2343

p2,

2344

node,

2344

node,

2345

flags,

2345

flags,

2346

cachedelta=None,

2346

cachedelta=None,

2347

deltacomputer=None,

2347

deltacomputer=None,

2348

sidedata=None,

2348

sidedata=None,

2349

):

2349

):

2350

"""add a raw revision with known flags, node and parents

2350

"""add a raw revision with known flags, node and parents

2351

useful when reusing a revision not stored in this revlog (ex: received

2351

useful when reusing a revision not stored in this revlog (ex: received

2352

over wire, or read from an external bundle).

2352

over wire, or read from an external bundle).

2353

"""

2353

"""

2354

with self._writing(transaction):

2354

with self._writing(transaction):

2355

return self._addrevision(

2355

return self._addrevision(

2356

node,

2356

node,

2357

rawtext,

2357

rawtext,

2358

transaction,

2358

transaction,

2359

link,

2359

link,

2360

p1,

2360

p1,

2361

p2,

2361

p2,

2362

flags,

2362

flags,

2363

cachedelta,

2363

cachedelta,

2364

deltacomputer=deltacomputer,

2364

deltacomputer=deltacomputer,

2365

sidedata=sidedata,

2365

sidedata=sidedata,

2366

)

2366

)

2367

2368

def compress(self, data):

2368

def compress(self, data):

2369

"""Generate a possibly-compressed representation of data."""

2369

"""Generate a possibly-compressed representation of data."""

2370

if not data:

2370

if not data:

2371

return b'', data

2371

return b'', data

2372

2373

compressed = self._compressor.compress(data)

2373

compressed = self._compressor.compress(data)

2374

2375

if compressed:

2375

if compressed:

2376

# The revlog compressor added the header in the returned data.

2376

# The revlog compressor added the header in the returned data.

2377

return b'', compressed

2377

return b'', compressed

2378

2379

if data[0:1] == b'\0':

2379

if data[0:1] == b'\0':

2380

return b'', data

2380

return b'', data

2381

return b'u', data

2381

return b'u', data

2382

2383

def decompress(self, data):

2383

def decompress(self, data):

2384

"""Decompress a revlog chunk.

2384

"""Decompress a revlog chunk.

2385

2386

The chunk is expected to begin with a header identifying the

2386

The chunk is expected to begin with a header identifying the

2387

format type so it can be routed to an appropriate decompressor.

2387

format type so it can be routed to an appropriate decompressor.

2388

"""

2388

"""

2389

if not data:

2389

if not data:

2390

return data

2390

return data

2391

2392

# Revlogs are read much more frequently than they are written and many

2392

# Revlogs are read much more frequently than they are written and many

2393

# chunks only take microseconds to decompress, so performance is

2393

# chunks only take microseconds to decompress, so performance is

2394

# important here.

2394

# important here.

2395

#

2395

#

2396

# We can make a few assumptions about revlogs:

2396

# We can make a few assumptions about revlogs:

2397

#

2397

#

2398

# 1) the majority of chunks will be compressed (as opposed to inline

2398

# 1) the majority of chunks will be compressed (as opposed to inline

2399

# raw data).

2399

# raw data).

2400

# 2) decompressing *any* data will likely by at least 10x slower than

2400

# 2) decompressing *any* data will likely by at least 10x slower than

2401

# returning raw inline data.

2401

# returning raw inline data.

2402

# 3) we want to prioritize common and officially supported compression

2402

# 3) we want to prioritize common and officially supported compression

2403

# engines

2403

# engines

2404

#

2404

#

2405

# It follows that we want to optimize for "decompress compressed data

2405

# It follows that we want to optimize for "decompress compressed data

2406

# when encoded with common and officially supported compression engines"

2406

# when encoded with common and officially supported compression engines"

2407

# case over "raw data" and "data encoded by less common or non-official

2407

# case over "raw data" and "data encoded by less common or non-official

2408

# compression engines." That is why we have the inline lookup first

2408

# compression engines." That is why we have the inline lookup first

2409

# followed by the compengines lookup.

2409

# followed by the compengines lookup.

2410

#

2410

#

2411

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2411

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2412

# compressed chunks. And this matters for changelog and manifest reads.

2412

# compressed chunks. And this matters for changelog and manifest reads.

2413

t = data[0:1]

2413

t = data[0:1]

2414

2415

if t == b'x':

2415

if t == b'x':

2416

try:

2416

try:

2417

return _zlibdecompress(data)

2417

return _zlibdecompress(data)

2418

except zlib.error as e:

2418

except zlib.error as e:

2419

raise error.RevlogError(

2419

raise error.RevlogError(

2420

_(b'revlog decompress error: %s')

2420

_(b'revlog decompress error: %s')

2421

% stringutil.forcebytestr(e)

2421

% stringutil.forcebytestr(e)

2422

)

2422

)

2423

# '\0' is more common than 'u' so it goes first.

2423

# '\0' is more common than 'u' so it goes first.

2424

elif t == b'\0':

2424

elif t == b'\0':

2425

return data

2425

return data

2426

elif t == b'u':

2426

elif t == b'u':

2427

return util.buffer(data, 1)

2427

return util.buffer(data, 1)

2428

2429

compressor = self._get_decompressor(t)

2429

compressor = self._get_decompressor(t)

2430

2431

return compressor.decompress(data)

2431

return compressor.decompress(data)

2432

2433

def _addrevision(

2433

def _addrevision(

2434

self,

2434

self,

2435

node,

2435

node,

2436

rawtext,

2436

rawtext,

2437

transaction,

2437

transaction,

2438

link,

2438

link,

2439

p1,

2439

p1,

2440

p2,

2440

p2,

2441

flags,

2441

flags,

2442

cachedelta,

2442

cachedelta,

2443

alwayscache=False,

2443

alwayscache=False,

2444

deltacomputer=None,

2444

deltacomputer=None,

2445

sidedata=None,

2445

sidedata=None,

2446

):

2446

):

2447

"""internal function to add revisions to the log

2447

"""internal function to add revisions to the log

2448

2449

see addrevision for argument descriptions.

2449

see addrevision for argument descriptions.

2450

2451

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2451

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2452

2453

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2453

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2454

be used.

2454

be used.

2455

2456

invariants:

2456

invariants:

2457

- rawtext is optional (can be None); if not set, cachedelta must be set.

2457

- rawtext is optional (can be None); if not set, cachedelta must be set.

2458

if both are set, they must correspond to each other.

2458

if both are set, they must correspond to each other.

2459

"""

2459

"""

2460

if node == self.nullid:

2460

if node == self.nullid:

2461

raise error.RevlogError(

2461

raise error.RevlogError(

2462

_(b"%s: attempt to add null revision") % self.display_id

2462

_(b"%s: attempt to add null revision") % self.display_id

2463

)

2463

)

2464

if (

2464

if (

2465

node == self.nodeconstants.wdirid

2465

node == self.nodeconstants.wdirid

2466

or node in self.nodeconstants.wdirfilenodeids

2466

or node in self.nodeconstants.wdirfilenodeids

2467

):

2467

):

2468

raise error.RevlogError(

2468

raise error.RevlogError(

2469

_(b"%s: attempt to add wdir revision") % self.display_id

2469

_(b"%s: attempt to add wdir revision") % self.display_id

2470

)

2470

)

2471

if self._writinghandles is None:

2471

if self._writinghandles is None:

2472

msg = b'adding revision outside `revlog._writing` context'

2472

msg = b'adding revision outside `revlog._writing` context'

2473

raise error.ProgrammingError(msg)

2473

raise error.ProgrammingError(msg)

2474

2475

if self._inline:

2475

if self._inline:

2476

fh = self._writinghandles[0]

2476

fh = self._writinghandles[0]

2477

else:

2477

else:

2478

fh = self._writinghandles[1]

2478

fh = self._writinghandles[1]

2479

2480

btext = [rawtext]

2480

btext = [rawtext]

2481

2482

curr = len(self)

2482

curr = len(self)

2483

prev = curr - 1

2483

prev = curr - 1

2484

2485

offset = self._get_data_offset(prev)

2485

offset = self._get_data_offset(prev)

2486

2487

if self._concurrencychecker:

2487

if self._concurrencychecker:

2488

ifh, dfh = self._writinghandles

2488

ifh, dfh = self._writinghandles

2489

if self._inline:

2489

if self._inline:

2490

# offset is "as if" it were in the .d file, so we need to add on

2490

# offset is "as if" it were in the .d file, so we need to add on

2491

# the size of the entry metadata.

2491

# the size of the entry metadata.

2492

self._concurrencychecker(

2492

self._concurrencychecker(

2493

ifh, self._indexfile, offset + curr * self.index.entry_size

2493

ifh, self._indexfile, offset + curr * self.index.entry_size

2494

)

2494

)

2495

else:

2495

else:

2496

# Entries in the .i are a consistent size.

2496

# Entries in the .i are a consistent size.

2497

self._concurrencychecker(

2497

self._concurrencychecker(

2498

ifh, self._indexfile, curr * self.index.entry_size

2498

ifh, self._indexfile, curr * self.index.entry_size

2499

)

2499

)

2500

self._concurrencychecker(dfh, self._datafile, offset)

2500

self._concurrencychecker(dfh, self._datafile, offset)

2501

2502

p1r, p2r = self.rev(p1), self.rev(p2)

2502

p1r, p2r = self.rev(p1), self.rev(p2)

2503

2504

# full versions are inserted when the needed deltas

2504

# full versions are inserted when the needed deltas

2505

# become comparable to the uncompressed text

2505

# become comparable to the uncompressed text

2506

if rawtext is None:

2506

if rawtext is None:

2507

# need rawtext size, before changed by flag processors, which is

2507

# need rawtext size, before changed by flag processors, which is

2508

# the non-raw size. use revlog explicitly to avoid filelog's extra

2508

# the non-raw size. use revlog explicitly to avoid filelog's extra

2509

# logic that might remove metadata size.

2509

# logic that might remove metadata size.

2510

textlen = mdiff.patchedsize(

2510

textlen = mdiff.patchedsize(

2511

revlog.size(self, cachedelta[0]), cachedelta[1]

2511

revlog.size(self, cachedelta[0]), cachedelta[1]

2512

)

2512

)

2513

else:

2513

else:

2514

textlen = len(rawtext)

2514

textlen = len(rawtext)

2515

2516

if deltacomputer is None:

2516

if deltacomputer is None:

2517

deltacomputer = deltautil.deltacomputer(self)

2517

deltacomputer = deltautil.deltacomputer(self)

2518

2519

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2519

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2520

2521

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2521

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2522

2523

compression_mode = COMP_MODE_INLINE

2523

compression_mode = COMP_MODE_INLINE

2524

if self._docket is not None:

2524

if self._docket is not None:

2525

h, d = deltainfo.data

2525

h, d = deltainfo.data

2526

if not h and not d:

2526

if not h and not d:

2527

# not data to store at all... declare them uncompressed

2527

# not data to store at all... declare them uncompressed

2528

compression_mode = COMP_MODE_PLAIN

2528

compression_mode = COMP_MODE_PLAIN

2529

elif not h:

2529

elif not h:

2530

t = d[0:1]

2530

t = d[0:1]

2531

if t == b'\0':

2531

if t == b'\0':

2532

compression_mode = COMP_MODE_PLAIN

2532

compression_mode = COMP_MODE_PLAIN

2533

elif t == self._docket.default_compression_header:

2533

elif t == self._docket.default_compression_header:

2534

compression_mode = COMP_MODE_DEFAULT

2534

compression_mode = COMP_MODE_DEFAULT

2535

elif h == b'u':

2535

elif h == b'u':

2536

# we have a more efficient way to declare uncompressed

2536

# we have a more efficient way to declare uncompressed

2537

h = b''

2537

h = b''

2538

compression_mode = COMP_MODE_PLAIN

2538

compression_mode = COMP_MODE_PLAIN

2539

deltainfo = deltautil.drop_u_compression(deltainfo)

2539

deltainfo = deltautil.drop_u_compression(deltainfo)

2540

2541

sidedata_compression_mode = COMP_MODE_INLINE

2541

sidedata_compression_mode = COMP_MODE_INLINE

2542

if sidedata and self.hassidedata:

2542

if sidedata and self.hassidedata:

2543

sidedata_compression_mode = COMP_MODE_PLAIN

2543

sidedata_compression_mode = COMP_MODE_PLAIN

2544

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2544

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2545

sidedata_offset = offset + deltainfo.deltalen

2545

sidedata_offset = offset + deltainfo.deltalen

2546

h, comp_sidedata = self.compress(serialized_sidedata)

2546

h, comp_sidedata = self.compress(serialized_sidedata)

2547

if (

2547

if (

2548

h != b'u'

2548

h != b'u'

2549

and comp_sidedata[0:1] != b'\0'

2549

and comp_sidedata[0:1] != b'\0'

2550

and len(comp_sidedata) < len(serialized_sidedata)

2550

and len(comp_sidedata) < len(serialized_sidedata)

2551

):

2551

):

2552

assert not h

2552

assert not h

2553

if (

2553

if (

2554

comp_sidedata[0:1]

2554

comp_sidedata[0:1]

2555

== self._docket.default_compression_header

2555

== self._docket.default_compression_header

2556

):

2556

):

2557

sidedata_compression_mode = COMP_MODE_DEFAULT

2557

sidedata_compression_mode = COMP_MODE_DEFAULT

2558

serialized_sidedata = comp_sidedata

2558

serialized_sidedata = comp_sidedata

2559

else:

2559

else:

2560

sidedata_compression_mode = COMP_MODE_INLINE

2560

sidedata_compression_mode = COMP_MODE_INLINE

2561

serialized_sidedata = comp_sidedata

2561

serialized_sidedata = comp_sidedata

2562

else:

2562

else:

2563

serialized_sidedata = b""

2563

serialized_sidedata = b""

2564

# Don't store the offset if the sidedata is empty, that way

2564

# Don't store the offset if the sidedata is empty, that way

2565

# we can easily detect empty sidedata and they will be no different

2565

# we can easily detect empty sidedata and they will be no different

2566

# than ones we manually add.

2566

# than ones we manually add.

2567

sidedata_offset = 0

2567

sidedata_offset = 0

2568

2569

e = (

2569

e = (

2570

offset_type(offset, flags),

2570

offset_type(offset, flags),

2571

deltainfo.deltalen,

2571

deltainfo.deltalen,

2572

textlen,

2572

textlen,

2573

deltainfo.base,

2573

deltainfo.base,

2574

link,

2574

link,

2575

p1r,

2575

p1r,

2576

p2r,

2576

p2r,

2577

node,

2577

node,

2578

sidedata_offset,

2578

sidedata_offset,

2579

len(serialized_sidedata),

2579

len(serialized_sidedata),

2580

compression_mode,

2580

compression_mode,

2581

sidedata_compression_mode,

2581

sidedata_compression_mode,

2582

)

2582

)

2583

2584

self.index.append(e)

2584

self.index.append(e)

2585

entry = self.index.entry_binary(curr)

2585

entry = self.index.entry_binary(curr)

2586

if curr == 0 and self._docket is None:

2586

if curr == 0 and self._docket is None:

2587

header = self._format_flags | self._format_version

2587

header = self._format_flags | self._format_version

2588

header = self.index.pack_header(header)

2588

header = self.index.pack_header(header)

2589

entry = header + entry

2589

entry = header + entry

2590

self._writeentry(

2590

self._writeentry(

2591

transaction,

2591

transaction,

2592

entry,

2592

entry,

2593

deltainfo.data,

2593

deltainfo.data,

2594

link,

2594

link,

2595

offset,

2595

offset,

2596

serialized_sidedata,

2596

serialized_sidedata,

2597

)

2597

)

2598

2599

rawtext = btext[0]

2599

rawtext = btext[0]

2600

2601

if alwayscache and rawtext is None:

2601

if alwayscache and rawtext is None:

2602

rawtext = deltacomputer.buildtext(revinfo, fh)

2602

rawtext = deltacomputer.buildtext(revinfo, fh)

2603

2604

if type(rawtext) == bytes: # only accept immutable objects

2604

if type(rawtext) == bytes: # only accept immutable objects

2605

self._revisioncache = (node, curr, rawtext)

2605

self._revisioncache = (node, curr, rawtext)

2606

self._chainbasecache[curr] = deltainfo.chainbase

2606

self._chainbasecache[curr] = deltainfo.chainbase

2607

return curr

2607

return curr

2608

2609

def _get_data_offset(self, prev):

2609

def _get_data_offset(self, prev):

2610

"""Returns the current offset in the (in-transaction) data file.

2610

"""Returns the current offset in the (in-transaction) data file.

2611

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2611

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2612

file to store that information: since sidedata can be rewritten to the

2612

file to store that information: since sidedata can be rewritten to the

2613

end of the data file within a transaction, you can have cases where, for

2613

end of the data file within a transaction, you can have cases where, for

2614

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2614

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2615

to `n - 1`'s sidedata being written after `n`'s data.

2615

to `n - 1`'s sidedata being written after `n`'s data.

2616

2617

TODO cache this in a docket file before getting out of experimental."""

2617

TODO cache this in a docket file before getting out of experimental."""

2618

if self._docket is None:

2618

if self._docket is None:

2619

return self.end(prev)

2619

return self.end(prev)

2620

else:

2620

else:

2621

return self._docket.data_end

2621

return self._docket.data_end

2622

2623

def _writeentry(self, transaction, entry, data, link, offset, sidedata):

2623

def _writeentry(self, transaction, entry, data, link, offset, sidedata):

2624

# Files opened in a+ mode have inconsistent behavior on various

2624

# Files opened in a+ mode have inconsistent behavior on various

2625

# platforms. Windows requires that a file positioning call be made

2625

# platforms. Windows requires that a file positioning call be made

2626

# when the file handle transitions between reads and writes. See

2626

# when the file handle transitions between reads and writes. See

2627

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2627

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2628

# platforms, Python or the platform itself can be buggy. Some versions

2628

# platforms, Python or the platform itself can be buggy. Some versions

2629

# of Solaris have been observed to not append at the end of the file

2629

# of Solaris have been observed to not append at the end of the file

2630

# if the file was seeked to before the end. See issue4943 for more.

2630

# if the file was seeked to before the end. See issue4943 for more.

2631

#

2631

#

2632

# We work around this issue by inserting a seek() before writing.

2632

# We work around this issue by inserting a seek() before writing.

2633

# Note: This is likely not necessary on Python 3. However, because

2633

# Note: This is likely not necessary on Python 3. However, because

2634

# the file handle is reused for reads and may be seeked there, we need

2634

# the file handle is reused for reads and may be seeked there, we need

2635

# to be careful before changing this.

2635

# to be careful before changing this.

2636

if self._writinghandles is None:

2636

if self._writinghandles is None:

2637

msg = b'adding revision outside `revlog._writing` context'

2637

msg = b'adding revision outside `revlog._writing` context'

2638

raise error.ProgrammingError(msg)

2638

raise error.ProgrammingError(msg)

2639

ifh, dfh = self._writinghandles

2639

ifh, dfh = self._writinghandles

2640

if self._docket is None:

2640

if self._docket is None:

2641

ifh.seek(0, os.SEEK_END)

2641

ifh.seek(0, os.SEEK_END)

2642

else:

2642

else:

2643

ifh.seek(self._docket.index_end, os.SEEK_SET)

2643

ifh.seek(self._docket.index_end, os.SEEK_SET)

2644

if dfh:

2644

if dfh:

2645

if self._docket is None:

2645

if self._docket is None:

2646

dfh.seek(0, os.SEEK_END)

2646

dfh.seek(0, os.SEEK_END)

2647

else:

2647

else:

2648

dfh.seek(self._docket.data_end, os.SEEK_SET)

2648

dfh.seek(self._docket.data_end, os.SEEK_SET)

2649

2650

curr = len(self) - 1

2650

curr = len(self) - 1

2651

if not self._inline:

2651

if not self._inline:

2652

transaction.add(self._datafile, offset)

2652

transaction.add(self._datafile, offset)

2653

transaction.add(self._indexfile, curr * len(entry))

2653

transaction.add(self._indexfile, curr * len(entry))

2654

if data[0]:

2654

if data[0]:

2655

dfh.write(data[0])

2655

dfh.write(data[0])

2656

dfh.write(data[1])

2656

dfh.write(data[1])

2657

if sidedata:

2657

if sidedata:

2658

dfh.write(sidedata)

2658

dfh.write(sidedata)

2659

ifh.write(entry)

2659

ifh.write(entry)

2660

else:

2660

else:

2661

offset += curr * self.index.entry_size

2661

offset += curr * self.index.entry_size

2662

transaction.add(self._indexfile, offset)

2662

transaction.add(self._indexfile, offset)

2663

ifh.write(entry)

2663

ifh.write(entry)

2664

ifh.write(data[0])

2664

ifh.write(data[0])

2665

ifh.write(data[1])

2665

ifh.write(data[1])

2666

if sidedata:

2666

if sidedata:

2667

ifh.write(sidedata)

2667

ifh.write(sidedata)

2668

self._enforceinlinesize(transaction)

2668

self._enforceinlinesize(transaction)

2669

if self._docket is not None:

2669

if self._docket is not None:

2670

self._docket.index_end = self._writinghandles[0].tell()

2670

self._docket.index_end = self._writinghandles[0].tell()

2671

self._docket.data_end = self._writinghandles[1].tell()

2671

self._docket.data_end = self._writinghandles[1].tell()

2672

2673

nodemaputil.setup_persistent_nodemap(transaction, self)

2673

nodemaputil.setup_persistent_nodemap(transaction, self)

2674

2675

def addgroup(

2675

def addgroup(

2676

self,

2676

self,

2677

deltas,

2677

deltas,

2678

linkmapper,

2678

linkmapper,

2679

transaction,

2679

transaction,

2680

alwayscache=False,

2680

alwayscache=False,

2681

addrevisioncb=None,

2681

addrevisioncb=None,

2682

duplicaterevisioncb=None,

2682

duplicaterevisioncb=None,

2683

):

2683

):

2684

"""

2684

"""

2685

add a delta group

2685

add a delta group

2686

2687

given a set of deltas, add them to the revision log. the

2687

given a set of deltas, add them to the revision log. the

2688

first delta is against its parent, which should be in our

2688

first delta is against its parent, which should be in our

2689

log, the rest are against the previous delta.

2689

log, the rest are against the previous delta.

2690

2691

If ``addrevisioncb`` is defined, it will be called with arguments of

2691

If ``addrevisioncb`` is defined, it will be called with arguments of

2692

this revlog and the node that was added.

2692

this revlog and the node that was added.

2693

"""

2693

"""

2694

2695

if self._adding_group:

2695

if self._adding_group:

2696

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2696

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2697

2698

self._adding_group = True

2698

self._adding_group = True

2699

empty = True

2699

empty = True

2700

try:

2700

try:

2701

with self._writing(transaction):

2701

with self._writing(transaction):

2702

deltacomputer = deltautil.deltacomputer(self)

2702

deltacomputer = deltautil.deltacomputer(self)

2703

# loop through our set of deltas

2703

# loop through our set of deltas

2704

for data in deltas:

2704

for data in deltas:

2705

(

2705

(

2706

node,

2706

node,

2707

p1,

2707

p1,

2708

p2,

2708

p2,

2709

linknode,

2709

linknode,

2710

deltabase,

2710

deltabase,

2711

delta,

2711

delta,

2712

flags,

2712

flags,

2713

sidedata,

2713

sidedata,

2714

) = data

2714

) = data

2715

link = linkmapper(linknode)

2715

link = linkmapper(linknode)

2716

flags = flags or REVIDX_DEFAULT_FLAGS

2716

flags = flags or REVIDX_DEFAULT_FLAGS

2717

2718

rev = self.index.get_rev(node)

2718

rev = self.index.get_rev(node)

2719

if rev is not None:

2719

if rev is not None:

2720

# this can happen if two branches make the same change

2720

# this can happen if two branches make the same change

2721

self._nodeduplicatecallback(transaction, rev)

2721

self._nodeduplicatecallback(transaction, rev)

2722

if duplicaterevisioncb:

2722

if duplicaterevisioncb:

2723

duplicaterevisioncb(self, rev)

2723

duplicaterevisioncb(self, rev)

2724

empty = False

2724

empty = False

2725

continue

2725

continue

2726

2727

for p in (p1, p2):

2727

for p in (p1, p2):

2728

if not self.index.has_node(p):

2728

if not self.index.has_node(p):

2729

raise error.LookupError(

2729

raise error.LookupError(

2730

p, self.radix, _(b'unknown parent')

2730

p, self.radix, _(b'unknown parent')

2731

)

2731

)

2732

2733

if not self.index.has_node(deltabase):

2733

if not self.index.has_node(deltabase):

2734

raise error.LookupError(

2734

raise error.LookupError(

2735

deltabase, self.display_id, _(b'unknown delta base')

2735

deltabase, self.display_id, _(b'unknown delta base')

2736

)

2736

)

2737

2738

baserev = self.rev(deltabase)

2738

baserev = self.rev(deltabase)

2739

2740

if baserev != nullrev and self.iscensored(baserev):

2740

if baserev != nullrev and self.iscensored(baserev):

2741

# if base is censored, delta must be full replacement in a

2741

# if base is censored, delta must be full replacement in a

2742

# single patch operation

2742

# single patch operation

2743

hlen = struct.calcsize(b">lll")

2743

hlen = struct.calcsize(b">lll")

2744

oldlen = self.rawsize(baserev)

2744

oldlen = self.rawsize(baserev)

2745

newlen = len(delta) - hlen

2745

newlen = len(delta) - hlen

2746

if delta[:hlen] != mdiff.replacediffheader(

2746

if delta[:hlen] != mdiff.replacediffheader(

2747

oldlen, newlen

2747

oldlen, newlen

2748

):

2748

):

2749

raise error.CensoredBaseError(

2749

raise error.CensoredBaseError(

2750

self.display_id, self.node(baserev)

2750

self.display_id, self.node(baserev)

2751

)

2751

)

2752

2753

if not flags and self._peek_iscensored(baserev, delta):

2753

if not flags and self._peek_iscensored(baserev, delta):

2754

flags |= REVIDX_ISCENSORED

2754

flags |= REVIDX_ISCENSORED

2755

2756

# We assume consumers of addrevisioncb will want to retrieve

2756

# We assume consumers of addrevisioncb will want to retrieve

2757

# the added revision, which will require a call to

2757

# the added revision, which will require a call to

2758

# revision(). revision() will fast path if there is a cache

2758

# revision(). revision() will fast path if there is a cache

2759

# hit. So, we tell _addrevision() to always cache in this case.

2759

# hit. So, we tell _addrevision() to always cache in this case.

2760

# We're only using addgroup() in the context of changegroup

2760

# We're only using addgroup() in the context of changegroup

2761

# generation so the revision data can always be handled as raw

2761

# generation so the revision data can always be handled as raw

2762

# by the flagprocessor.

2762

# by the flagprocessor.

2763

rev = self._addrevision(

2763

rev = self._addrevision(

2764

node,

2764

node,

2765

None,

2765

None,

2766

transaction,

2766

transaction,

2767

link,

2767

link,

2768

p1,

2768

p1,

2769

p2,

2769

p2,

2770

flags,

2770

flags,

2771

(baserev, delta),

2771

(baserev, delta),

2772

alwayscache=alwayscache,

2772

alwayscache=alwayscache,

2773

deltacomputer=deltacomputer,

2773

deltacomputer=deltacomputer,

2774

sidedata=sidedata,

2774

sidedata=sidedata,

2775

)

2775

)

2776

2777

if addrevisioncb:

2777

if addrevisioncb:

2778

addrevisioncb(self, rev)

2778

addrevisioncb(self, rev)

2779

empty = False

2779

empty = False

2780

finally:

2780

finally:

2781

self._adding_group = False

2781

self._adding_group = False

2782

return not empty

2782

return not empty

2783

2784

def iscensored(self, rev):

2784

def iscensored(self, rev):

2785

"""Check if a file revision is censored."""

2785

"""Check if a file revision is censored."""

2786

if not self._censorable:

2786

if not self._censorable:

2787

return False

2787

return False

2788

2789

return self.flags(rev) & REVIDX_ISCENSORED

2789

return self.flags(rev) & REVIDX_ISCENSORED

2790

2791

def _peek_iscensored(self, baserev, delta):

2791

def _peek_iscensored(self, baserev, delta):

2792

"""Quickly check if a delta produces a censored revision."""

2792

"""Quickly check if a delta produces a censored revision."""

2793

if not self._censorable:

2793

if not self._censorable:

2794

return False

2794

return False

2795

2796

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2796

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2797

2798

def getstrippoint(self, minlink):

2798

def getstrippoint(self, minlink):

2799

"""find the minimum rev that must be stripped to strip the linkrev

2799

"""find the minimum rev that must be stripped to strip the linkrev

2800

2801

Returns a tuple containing the minimum rev and a set of all revs that

2801

Returns a tuple containing the minimum rev and a set of all revs that

2802

have linkrevs that will be broken by this strip.

2802

have linkrevs that will be broken by this strip.

2803

"""

2803

"""

2804

return storageutil.resolvestripinfo(

2804

return storageutil.resolvestripinfo(

2805

minlink,

2805

minlink,

2806

len(self) - 1,

2806

len(self) - 1,

2807

self.headrevs(),

2807

self.headrevs(),

2808

self.linkrev,

2808

self.linkrev,

2809

self.parentrevs,

2809

self.parentrevs,

2810

)

2810

)

2811

2812

def strip(self, minlink, transaction):

2812

def strip(self, minlink, transaction):

2813

"""truncate the revlog on the first revision with a linkrev >= minlink

2813

"""truncate the revlog on the first revision with a linkrev >= minlink

2814

2815

This function is called when we're stripping revision minlink and

2815

This function is called when we're stripping revision minlink and

2816

its descendants from the repository.

2816

its descendants from the repository.

2817

2818

We have to remove all revisions with linkrev >= minlink, because

2818

We have to remove all revisions with linkrev >= minlink, because

2819

the equivalent changelog revisions will be renumbered after the

2819

the equivalent changelog revisions will be renumbered after the

2820

strip.

2820

strip.

2821

2822

So we truncate the revlog on the first of these revisions, and

2822

So we truncate the revlog on the first of these revisions, and

2823

trust that the caller has saved the revisions that shouldn't be

2823

trust that the caller has saved the revisions that shouldn't be

2824

removed and that it'll re-add them after this truncation.

2824

removed and that it'll re-add them after this truncation.

2825

"""

2825

"""

2826

if len(self) == 0:

2826

if len(self) == 0:

2827

return

2827

return

2828

2829

rev, _ = self.getstrippoint(minlink)

2829

rev, _ = self.getstrippoint(minlink)

2830

if rev == len(self):

2830

if rev == len(self):

2831

return

2831

return

2832

2833

# first truncate the files on disk

2833

# first truncate the files on disk

2834

data_end = self.start(rev)

2834

data_end = self.start(rev)

2835

if not self._inline:

2835

if not self._inline:

2836

transaction.add(self._datafile, data_end)

2836

transaction.add(self._datafile, data_end)

2837

end = rev * self.index.entry_size

2837

end = rev * self.index.entry_size

2838

else:

2838

else:

2839

end = data_end + (rev * self.index.entry_size)

2839

end = data_end + (rev * self.index.entry_size)

2840

2841

transaction.add(self._indexfile, end)

2841

transaction.add(self._indexfile, end)

2842

if self._docket is not None:

2842

if self._docket is not None:

2843

# XXX we could, leverage the docket while stripping. However it is

2843

# XXX we could, leverage the docket while stripping. However it is

2844

# not powerfull enough at the time of this comment

2844

# not powerfull enough at the time of this comment

2845

self._docket.index_end = end

2845

self._docket.index_end = end

2846

self._docket.data_end = data_end

2846

self._docket.data_end = data_end

2847

self._docket.write(transaction, stripping=True)

2847

self._docket.write(transaction, stripping=True)

2848

2849

# then reset internal state in memory to forget those revisions

2849

# then reset internal state in memory to forget those revisions

2850

self._revisioncache = None

2850

self._revisioncache = None

2851

self._chaininfocache = util.lrucachedict(500)

2851

self._chaininfocache = util.lrucachedict(500)

2852

self._chunkclear()

2852

self._chunkclear()

2853

2854

del self.index[rev:-1]

2854

del self.index[rev:-1]

2855

2856

def checksize(self):

2856

def checksize(self):

2857

"""Check size of index and data files

2857

"""Check size of index and data files

2858

2859

return a (dd, di) tuple.

2859

return a (dd, di) tuple.

2860

- dd: extra bytes for the "data" file

2860

- dd: extra bytes for the "data" file

2861

- di: extra bytes for the "index" file

2861

- di: extra bytes for the "index" file

2862

2863

A healthy revlog will return (0, 0).

2863

A healthy revlog will return (0, 0).

2864

"""

2864

"""

2865

expected = 0

2865

expected = 0

2866

if len(self):

2866

if len(self):

2867

expected = max(0, self.end(len(self) - 1))

2867

expected = max(0, self.end(len(self) - 1))

2868

2869

try:

2869

try:

2870

with self._datafp() as f:

2870

with self._datafp() as f:

2871

f.seek(0, io.SEEK_END)

2871

f.seek(0, io.SEEK_END)

2872

actual = f.tell()

2872

actual = f.tell()

2873

dd = actual - expected

2873

dd = actual - expected

2874

except IOError as inst:

2874

except IOError as inst:

2875

if inst.errno != errno.ENOENT:

2875

if inst.errno != errno.ENOENT:

2876

raise

2876

raise

2877

dd = 0

2877

dd = 0

2878

2879

try:

2879

try:

2880

f = self.opener(self._indexfile)

2880

f = self.opener(self._indexfile)

2881

f.seek(0, io.SEEK_END)

2881

f.seek(0, io.SEEK_END)

2882

actual = f.tell()

2882

actual = f.tell()

2883

f.close()

2883

f.close()

2884

s = self.index.entry_size

2884

s = self.index.entry_size

2885

i = max(0, actual // s)

2885

i = max(0, actual // s)

2886

di = actual - (i * s)

2886

di = actual - (i * s)

2887

if self._inline:

2887

if self._inline:

2888

databytes = 0

2888

databytes = 0

2889

for r in self:

2889

for r in self:

2890

databytes += max(0, self.length(r))

2890

databytes += max(0, self.length(r))

2891

dd = 0

2891

dd = 0

2892

di = actual - len(self) * s - databytes

2892

di = actual - len(self) * s - databytes

2893

except IOError as inst:

2893

except IOError as inst:

2894

if inst.errno != errno.ENOENT:

2894

if inst.errno != errno.ENOENT:

2895

raise

2895

raise

2896

di = 0

2896

di = 0

2897

2898

return (dd, di)

2898

return (dd, di)

2899

2900

def files(self):

2900

def files(self):

2901

res = [self._indexfile]

2901

res = [self._indexfile]

2902

if not self._inline:

2902

if not self._inline:

2903

res.append(self._datafile)

2903

res.append(self._datafile)

2904

return res

2904

return res

2905

2906

def emitrevisions(

2906

def emitrevisions(

2907

self,

2907

self,

2908

nodes,

2908

nodes,

2909

nodesorder=None,

2909

nodesorder=None,

2910

revisiondata=False,

2910

revisiondata=False,

2911

assumehaveparentrevisions=False,

2911

assumehaveparentrevisions=False,

2912

deltamode=repository.CG_DELTAMODE_STD,

2912

deltamode=repository.CG_DELTAMODE_STD,

2913

sidedata_helpers=None,

2913

sidedata_helpers=None,

2914

):

2914

):

2915

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2915

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2916

raise error.ProgrammingError(

2916

raise error.ProgrammingError(

2917

b'unhandled value for nodesorder: %s' % nodesorder

2917

b'unhandled value for nodesorder: %s' % nodesorder

2918

)

2918

)

2919

2920

if nodesorder is None and not self._generaldelta:

2920

if nodesorder is None and not self._generaldelta:

2921

nodesorder = b'storage'

2921

nodesorder = b'storage'

2922

2923

if (

2923

if (

2924

not self._storedeltachains

2924

not self._storedeltachains

2925

and deltamode != repository.CG_DELTAMODE_PREV

2925

and deltamode != repository.CG_DELTAMODE_PREV

2926

):

2926

):

2927

deltamode = repository.CG_DELTAMODE_FULL

2927

deltamode = repository.CG_DELTAMODE_FULL

2928

2929

return storageutil.emitrevisions(

2929

return storageutil.emitrevisions(

2930

self,

2930

self,

2931

nodes,

2931

nodes,

2932

nodesorder,

2932

nodesorder,

2933

revlogrevisiondelta,

2933

revlogrevisiondelta,

2934

deltaparentfn=self.deltaparent,

2934

deltaparentfn=self.deltaparent,

2935

candeltafn=self.candelta,

2935

candeltafn=self.candelta,

2936

rawsizefn=self.rawsize,

2936

rawsizefn=self.rawsize,

2937

revdifffn=self.revdiff,

2937

revdifffn=self.revdiff,

2938

flagsfn=self.flags,

2938

flagsfn=self.flags,

2939

deltamode=deltamode,

2939

deltamode=deltamode,

2940

revisiondata=revisiondata,

2940

revisiondata=revisiondata,

2941

assumehaveparentrevisions=assumehaveparentrevisions,

2941

assumehaveparentrevisions=assumehaveparentrevisions,

2942

sidedata_helpers=sidedata_helpers,

2942

sidedata_helpers=sidedata_helpers,

2943

)

2943

)

2944

2945

DELTAREUSEALWAYS = b'always'

2945

DELTAREUSEALWAYS = b'always'

2946

DELTAREUSESAMEREVS = b'samerevs'

2946

DELTAREUSESAMEREVS = b'samerevs'

2947

DELTAREUSENEVER = b'never'

2947

DELTAREUSENEVER = b'never'

2948

2949

DELTAREUSEFULLADD = b'fulladd'

2949

DELTAREUSEFULLADD = b'fulladd'

2950

2951

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2951

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2952

2953

def clone(

2953

def clone(

2954

self,

2954

self,

2955

tr,

2955

tr,

2956

destrevlog,

2956

destrevlog,

2957

addrevisioncb=None,

2957

addrevisioncb=None,

2958

deltareuse=DELTAREUSESAMEREVS,

2958

deltareuse=DELTAREUSESAMEREVS,

2959

forcedeltabothparents=None,

2959

forcedeltabothparents=None,

2960

sidedata_helpers=None,

2960

sidedata_helpers=None,

2961

):

2961

):

2962

"""Copy this revlog to another, possibly with format changes.

2962

"""Copy this revlog to another, possibly with format changes.

2963

2964

The destination revlog will contain the same revisions and nodes.

2964

The destination revlog will contain the same revisions and nodes.

2965

However, it may not be bit-for-bit identical due to e.g. delta encoding

2965

However, it may not be bit-for-bit identical due to e.g. delta encoding

2966

differences.

2966

differences.

2967

2968

The ``deltareuse`` argument control how deltas from the existing revlog

2968

The ``deltareuse`` argument control how deltas from the existing revlog

2969

are preserved in the destination revlog. The argument can have the

2969

are preserved in the destination revlog. The argument can have the

2970

following values:

2970

following values:

2971

2972

DELTAREUSEALWAYS

2972

DELTAREUSEALWAYS

2973

Deltas will always be reused (if possible), even if the destination

2973

Deltas will always be reused (if possible), even if the destination

2974

revlog would not select the same revisions for the delta. This is the

2974

revlog would not select the same revisions for the delta. This is the

2975

fastest mode of operation.

2975

fastest mode of operation.

2976

DELTAREUSESAMEREVS

2976

DELTAREUSESAMEREVS

2977

Deltas will be reused if the destination revlog would pick the same

2977

Deltas will be reused if the destination revlog would pick the same

2978

revisions for the delta. This mode strikes a balance between speed

2978

revisions for the delta. This mode strikes a balance between speed

2979

and optimization.

2979

and optimization.

2980

DELTAREUSENEVER

2980

DELTAREUSENEVER

2981

Deltas will never be reused. This is the slowest mode of execution.

2981

Deltas will never be reused. This is the slowest mode of execution.

2982

This mode can be used to recompute deltas (e.g. if the diff/delta

2982

This mode can be used to recompute deltas (e.g. if the diff/delta

2983

algorithm changes).

2983

algorithm changes).

2984

DELTAREUSEFULLADD

2984

DELTAREUSEFULLADD

2985

Revision will be re-added as if their were new content. This is

2985

Revision will be re-added as if their were new content. This is

2986

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

2986

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

2987

eg: large file detection and handling.

2987

eg: large file detection and handling.

2988

2989

Delta computation can be slow, so the choice of delta reuse policy can

2989

Delta computation can be slow, so the choice of delta reuse policy can

2990

significantly affect run time.

2990

significantly affect run time.

2991

2992

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2992

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2993

two extremes. Deltas will be reused if they are appropriate. But if the

2993

two extremes. Deltas will be reused if they are appropriate. But if the

2994

delta could choose a better revision, it will do so. This means if you

2994

delta could choose a better revision, it will do so. This means if you

2995

are converting a non-generaldelta revlog to a generaldelta revlog,

2995

are converting a non-generaldelta revlog to a generaldelta revlog,

2996

deltas will be recomputed if the delta's parent isn't a parent of the

2996

deltas will be recomputed if the delta's parent isn't a parent of the

2997

revision.

2997

revision.

2998

2999

In addition to the delta policy, the ``forcedeltabothparents``

2999

In addition to the delta policy, the ``forcedeltabothparents``

3000

argument controls whether to force compute deltas against both parents

3000

argument controls whether to force compute deltas against both parents

3001

for merges. By default, the current default is used.

3001

for merges. By default, the current default is used.

3002

3003

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3003

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3004

`sidedata_helpers`.

3004

`sidedata_helpers`.

3005

"""

3005

"""

3006

if deltareuse not in self.DELTAREUSEALL:

3006

if deltareuse not in self.DELTAREUSEALL:

3007

raise ValueError(

3007

raise ValueError(

3008

_(b'value for deltareuse invalid: %s') % deltareuse

3008

_(b'value for deltareuse invalid: %s') % deltareuse

3009

)

3009

)

3010

3011

if len(destrevlog):

3011

if len(destrevlog):

3012

raise ValueError(_(b'destination revlog is not empty'))

3012

raise ValueError(_(b'destination revlog is not empty'))

3013

3014

if getattr(self, 'filteredrevs', None):

3014

if getattr(self, 'filteredrevs', None):

3015

raise ValueError(_(b'source revlog has filtered revisions'))

3015

raise ValueError(_(b'source revlog has filtered revisions'))

3016

if getattr(destrevlog, 'filteredrevs', None):

3016

if getattr(destrevlog, 'filteredrevs', None):

3017

raise ValueError(_(b'destination revlog has filtered revisions'))

3017

raise ValueError(_(b'destination revlog has filtered revisions'))

3018

3019

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3019

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3020

# if possible.

3020

# if possible.

3021

oldlazydelta = destrevlog._lazydelta

3021

oldlazydelta = destrevlog._lazydelta

3022

oldlazydeltabase = destrevlog._lazydeltabase

3022

oldlazydeltabase = destrevlog._lazydeltabase

3023

oldamd = destrevlog._deltabothparents

3023

oldamd = destrevlog._deltabothparents

3024

3025

try:

3025

try:

3026

if deltareuse == self.DELTAREUSEALWAYS:

3026

if deltareuse == self.DELTAREUSEALWAYS:

3027

destrevlog._lazydeltabase = True

3027

destrevlog._lazydeltabase = True

3028

destrevlog._lazydelta = True

3028

destrevlog._lazydelta = True

3029

elif deltareuse == self.DELTAREUSESAMEREVS:

3029

elif deltareuse == self.DELTAREUSESAMEREVS:

3030

destrevlog._lazydeltabase = False

3030

destrevlog._lazydeltabase = False

3031

destrevlog._lazydelta = True

3031

destrevlog._lazydelta = True

3032

elif deltareuse == self.DELTAREUSENEVER:

3032

elif deltareuse == self.DELTAREUSENEVER:

3033

destrevlog._lazydeltabase = False

3033

destrevlog._lazydeltabase = False

3034

destrevlog._lazydelta = False

3034

destrevlog._lazydelta = False

3035

3036

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3036

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3037

3038

self._clone(

3038

self._clone(

3039

tr,

3039

tr,

3040

destrevlog,

3040

destrevlog,

3041

addrevisioncb,

3041

addrevisioncb,

3042

deltareuse,

3042

deltareuse,

3043

forcedeltabothparents,

3043

forcedeltabothparents,

3044

sidedata_helpers,

3044

sidedata_helpers,

3045

)

3045

)

3046

3047

finally:

3047

finally:

3048

destrevlog._lazydelta = oldlazydelta

3048

destrevlog._lazydelta = oldlazydelta

3049

destrevlog._lazydeltabase = oldlazydeltabase

3049

destrevlog._lazydeltabase = oldlazydeltabase

3050

destrevlog._deltabothparents = oldamd

3050

destrevlog._deltabothparents = oldamd

3051

3052

def _clone(

3052

def _clone(

3053

self,

3053

self,

3054

tr,

3054

tr,

3055

destrevlog,

3055

destrevlog,

3056

addrevisioncb,

3056

addrevisioncb,

3057

deltareuse,

3057

deltareuse,

3058

forcedeltabothparents,

3058

forcedeltabothparents,

3059

sidedata_helpers,

3059

sidedata_helpers,

3060

):

3060

):

3061

"""perform the core duty of `revlog.clone` after parameter processing"""

3061

"""perform the core duty of `revlog.clone` after parameter processing"""

3062

deltacomputer = deltautil.deltacomputer(destrevlog)

3062

deltacomputer = deltautil.deltacomputer(destrevlog)

3063

index = self.index

3063

index = self.index

3064

for rev in self:

3064

for rev in self:

3065

entry = index[rev]

3065

entry = index[rev]

3066

3067

# Some classes override linkrev to take filtered revs into

3067

# Some classes override linkrev to take filtered revs into

3068

# account. Use raw entry from index.

3068

# account. Use raw entry from index.

3069

flags = entry[0] & 0xFFFF

3069

flags = entry[0] & 0xFFFF

3070

linkrev = entry[4]

3070

linkrev = entry[4]

3071

p1 = index[entry[5]][7]

3071

p1 = index[entry[5]][7]

3072

p2 = index[entry[6]][7]

3072

p2 = index[entry[6]][7]

3073

node = entry[7]

3073

node = entry[7]

3074

3075

# (Possibly) reuse the delta from the revlog if allowed and

3075

# (Possibly) reuse the delta from the revlog if allowed and

3076

# the revlog chunk is a delta.

3076

# the revlog chunk is a delta.

3077

cachedelta = None

3077

cachedelta = None

3078

rawtext = None

3078

rawtext = None

3079

if deltareuse == self.DELTAREUSEFULLADD:

3079

if deltareuse == self.DELTAREUSEFULLADD:

3080

text, sidedata = self._revisiondata(rev)

3080

text, sidedata = self._revisiondata(rev)

3081

3082

if sidedata_helpers is not None:

3082

if sidedata_helpers is not None:

3083

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3083

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3084

self, sidedata_helpers, sidedata, rev

3084

self, sidedata_helpers, sidedata, rev

3085

)

3085

)

3086

flags = flags | new_flags[0] & ~new_flags[1]

3086

flags = flags | new_flags[0] & ~new_flags[1]

3087

3088

destrevlog.addrevision(

3088

destrevlog.addrevision(

3089

text,

3089

text,

3090

tr,

3090

tr,

3091

linkrev,

3091

linkrev,

3092

p1,

3092

p1,

3093

p2,

3093

p2,

3094

cachedelta=cachedelta,

3094

cachedelta=cachedelta,

3095

node=node,

3095

node=node,

3096

flags=flags,

3096

flags=flags,

3097

deltacomputer=deltacomputer,

3097

deltacomputer=deltacomputer,

3098

sidedata=sidedata,

3098

sidedata=sidedata,

3099

)

3099

)

3100

else:

3100

else:

3101

if destrevlog._lazydelta:

3101

if destrevlog._lazydelta:

3102

dp = self.deltaparent(rev)

3102

dp = self.deltaparent(rev)

3103

if dp != nullrev:

3103

if dp != nullrev:

3104

cachedelta = (dp, bytes(self._chunk(rev)))

3104

cachedelta = (dp, bytes(self._chunk(rev)))

3105

3106

sidedata = None

3106

sidedata = None

3107

if not cachedelta:

3107

if not cachedelta:

3108

rawtext, sidedata = self._revisiondata(rev)

3108

rawtext, sidedata = self._revisiondata(rev)

3109

if sidedata is None:

3109

if sidedata is None:

3110

sidedata = self.sidedata(rev)

3110

sidedata = self.sidedata(rev)

3111

3112

if sidedata_helpers is not None:

3112

if sidedata_helpers is not None:

3113

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3113

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3114

self, sidedata_helpers, sidedata, rev

3114

self, sidedata_helpers, sidedata, rev

3115

)

3115

)

3116

flags = flags | new_flags[0] & ~new_flags[1]

3116

flags = flags | new_flags[0] & ~new_flags[1]

3117

3118

with destrevlog._writing(tr):

3118

with destrevlog._writing(tr):

3119

destrevlog._addrevision(

3119

destrevlog._addrevision(

3120

node,

3120

node,

3121

rawtext,

3121

rawtext,

3122

tr,

3122

tr,

3123

linkrev,

3123

linkrev,

3124

p1,

3124

p1,

3125

p2,

3125

p2,

3126

flags,

3126

flags,

3127

cachedelta,

3127

cachedelta,

3128

deltacomputer=deltacomputer,

3128

deltacomputer=deltacomputer,

3129

sidedata=sidedata,

3129

sidedata=sidedata,

3130

)

3130

)

3131

3132

if addrevisioncb:

3132

if addrevisioncb:

3133

addrevisioncb(self, rev, node)

3133

addrevisioncb(self, rev, node)

3134

3135

def censorrevision(self, tr, censornode, tombstone=b''):

3135

def censorrevision(self, tr, censornode, tombstone=b''):

3136

if self._format_version == REVLOGV0:

3136

if self._format_version == REVLOGV0:

3137

raise error.RevlogError(

3137

raise error.RevlogError(

3138

_(b'cannot censor with version %d revlogs')

3138

_(b'cannot censor with version %d revlogs')

3139

% self._format_version

3139

% self._format_version

3140

)

3140

)

3141

3142

censorrev = self.rev(censornode)

3142

censorrev = self.rev(censornode)

3143

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

3143

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

3144

3145

if len(tombstone) > self.rawsize(censorrev):

3145

if len(tombstone) > self.rawsize(censorrev):

3146

raise error.Abort(

3146

raise error.Abort(

3147

_(b'censor tombstone must be no longer than censored data')

3147

_(b'censor tombstone must be no longer than censored data')

3148

)

3148

)

3149

3150

# Rewriting the revlog in place is hard. Our strategy for censoring is

3150

# Rewriting the revlog in place is hard. Our strategy for censoring is

3151

# to create a new revlog, copy all revisions to it, then replace the

3151

# to create a new revlog, copy all revisions to it, then replace the

3152

# revlogs on transaction close.

3152

# revlogs on transaction close.

3153

#

3153

#

3154

# This is a bit dangerous. We could easily have a mismatch of state.

3154

# This is a bit dangerous. We could easily have a mismatch of state.

3155

newrl = revlog(

3155

newrl = revlog(

3156

self.opener,

3156

self.opener,

3157

target=self.target,

3157

target=self.target,

3158

radix=self.radix,

3158

radix=self.radix,

3159

postfix=b'tmpcensored',

3159

postfix=b'tmpcensored',

3160

censorable=True,

3160

censorable=True,

3161

)

3161

)

3162

newrl._format_version = self._format_version

3162

newrl._format_version = self._format_version

3163

newrl._format_flags = self._format_flags

3163

newrl._format_flags = self._format_flags

3164

newrl._generaldelta = self._generaldelta

3164

newrl._generaldelta = self._generaldelta

3165

newrl._parse_index = self._parse_index

3165

newrl._parse_index = self._parse_index

3166

3167

for rev in self.revs():

3167

for rev in self.revs():

3168

node = self.node(rev)

3168

node = self.node(rev)

3169

p1, p2 = self.parents(node)

3169

p1, p2 = self.parents(node)

3170

3171

if rev == censorrev:

3171

if rev == censorrev:

3172

newrl.addrawrevision(

3172

newrl.addrawrevision(

3173

tombstone,

3173

tombstone,

3174

tr,

3174

tr,

3175

self.linkrev(censorrev),

3175

self.linkrev(censorrev),

3176

p1,

3176

p1,

3177

p2,

3177

p2,

3178

censornode,

3178

censornode,

3179

REVIDX_ISCENSORED,

3179

REVIDX_ISCENSORED,

3180

)

3180

)

3181

3182

if newrl.deltaparent(rev) != nullrev:

3182

if newrl.deltaparent(rev) != nullrev:

3183

raise error.Abort(

3183

raise error.Abort(

3184

_(

3184

_(

3185

b'censored revision stored as delta; '

3185

b'censored revision stored as delta; '

3186

b'cannot censor'

3186

b'cannot censor'

3187

),

3187

),

3188

hint=_(

3188

hint=_(

3189

b'censoring of revlogs is not '

3189

b'censoring of revlogs is not '

3190

b'fully implemented; please report '

3190

b'fully implemented; please report '

3191

b'this bug'

3191

b'this bug'

3192

),

3192

),

3193

)

3193

)

3194

continue

3194

continue

3195

3196

if self.iscensored(rev):

3196

if self.iscensored(rev):

3197

if self.deltaparent(rev) != nullrev:

3197

if self.deltaparent(rev) != nullrev:

3198

raise error.Abort(

3198

raise error.Abort(

3199

_(

3199

_(

3200

b'cannot censor due to censored '

3200

b'cannot censor due to censored '

3201

b'revision having delta stored'

3201

b'revision having delta stored'

3202

)

3202

)

3203

)

3203

)

3204

rawtext = self._chunk(rev)

3204

rawtext = self._chunk(rev)

3205

else:

3205

else:

3206

rawtext = self.rawdata(rev)

3206

rawtext = self.rawdata(rev)

3207

3208

newrl.addrawrevision(

3208

newrl.addrawrevision(

3209

rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)

3209

rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)

3210

)

3210

)

3211

3212

tr.addbackup(self._indexfile, location=b'store')

3212

tr.addbackup(self._indexfile, location=b'store')

3213

if not self._inline:

3213

if not self._inline:

3214

tr.addbackup(self._datafile, location=b'store')

3214

tr.addbackup(self._datafile, location=b'store')

3215

3216

self.opener.rename(newrl._indexfile, self._indexfile)

3216

self.opener.rename(newrl._indexfile, self._indexfile)

3217

if not self._inline:

3217

if not self._inline:

3218

self.opener.rename(newrl._datafile, self._datafile)

3218

self.opener.rename(newrl._datafile, self._datafile)

3219

3220

self.clearcaches()

3220

self.clearcaches()

3221

self._loadindex()

3221

self._loadindex()

3222

3223

def verifyintegrity(self, state):

3223

def verifyintegrity(self, state):

3224

"""Verifies the integrity of the revlog.

3224

"""Verifies the integrity of the revlog.

3225

3226

Yields ``revlogproblem`` instances describing problems that are

3226

Yields ``revlogproblem`` instances describing problems that are

3227

found.

3227

found.

3228

"""

3228

"""

3229

dd, di = self.checksize()

3229

dd, di = self.checksize()

3230

if dd:

3230

if dd:

3231

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3231

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3232

if di:

3232

if di:

3233

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3233

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3234

3235

version = self._format_version

3235

version = self._format_version

3236

3237

# The verifier tells us what version revlog we should be.

3237

# The verifier tells us what version revlog we should be.

3238

if version != state[b'expectedversion']:

3238

if version != state[b'expectedversion']:

3239

yield revlogproblem(

3239

yield revlogproblem(

3240

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3240

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3241

% (self.display_id, version, state[b'expectedversion'])

3241

% (self.display_id, version, state[b'expectedversion'])

3242

)

3242

)

3243

3244

state[b'skipread'] = set()

3244

state[b'skipread'] = set()

3245

state[b'safe_renamed'] = set()

3245

state[b'safe_renamed'] = set()

3246

3247

for rev in self:

3247

for rev in self:

3248

node = self.node(rev)

3248

node = self.node(rev)

3249

3250

# Verify contents. 4 cases to care about:

3250

# Verify contents. 4 cases to care about:

3251

#

3251

#

3252

# common: the most common case

3252

# common: the most common case

3253

# rename: with a rename

3253

# rename: with a rename

3254

# meta: file content starts with b'\1\n', the metadata

3254

# meta: file content starts with b'\1\n', the metadata

3255

# header defined in filelog.py, but without a rename

3255

# header defined in filelog.py, but without a rename

3256

# ext: content stored externally

3256

# ext: content stored externally

3257

#

3257

#

3258

# More formally, their differences are shown below:

3258

# More formally, their differences are shown below:

3259

#

3259

#

3260

# | common | rename | meta | ext

3260

# | common | rename | meta | ext

3261

# -------------------------------------------------------

3261

# -------------------------------------------------------

3262

# flags() | 0 | 0 | 0 | not 0

3262

# flags() | 0 | 0 | 0 | not 0

3263

# renamed() | False | True | False | ?

3263

# renamed() | False | True | False | ?

3264

# rawtext[0:2]=='\1\n'| False | True | True | ?

3264

# rawtext[0:2]=='\1\n'| False | True | True | ?

3265

#

3265

#

3266

# "rawtext" means the raw text stored in revlog data, which

3266

# "rawtext" means the raw text stored in revlog data, which

3267

# could be retrieved by "rawdata(rev)". "text"

3267

# could be retrieved by "rawdata(rev)". "text"

3268

# mentioned below is "revision(rev)".

3268

# mentioned below is "revision(rev)".

3269

#

3269

#

3270

# There are 3 different lengths stored physically:

3270

# There are 3 different lengths stored physically:

3271

# 1. L1: rawsize, stored in revlog index

3271

# 1. L1: rawsize, stored in revlog index

3272

# 2. L2: len(rawtext), stored in revlog data

3272

# 2. L2: len(rawtext), stored in revlog data

3273

# 3. L3: len(text), stored in revlog data if flags==0, or

3273

# 3. L3: len(text), stored in revlog data if flags==0, or

3274

# possibly somewhere else if flags!=0

3274

# possibly somewhere else if flags!=0

3275

#

3275

#

3276

# L1 should be equal to L2. L3 could be different from them.

3276

# L1 should be equal to L2. L3 could be different from them.

3277

# "text" may or may not affect commit hash depending on flag

3277

# "text" may or may not affect commit hash depending on flag

3278

# processors (see flagutil.addflagprocessor).

3278

# processors (see flagutil.addflagprocessor).

3279

#

3279

#

3280

# | common | rename | meta | ext

3280

# | common | rename | meta | ext

3281

# -------------------------------------------------

3281

# -------------------------------------------------

3282

# rawsize() | L1 | L1 | L1 | L1

3282

# rawsize() | L1 | L1 | L1 | L1

3283

# size() | L1 | L2-LM | L1(*) | L1 (?)

3283

# size() | L1 | L2-LM | L1(*) | L1 (?)

3284

# len(rawtext) | L2 | L2 | L2 | L2

3284

# len(rawtext) | L2 | L2 | L2 | L2

3285

# len(text) | L2 | L2 | L2 | L3

3285

# len(text) | L2 | L2 | L2 | L3

3286

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3286

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3287

#

3287

#

3288

# LM: length of metadata, depending on rawtext

3288

# LM: length of metadata, depending on rawtext

3289

# (*): not ideal, see comment in filelog.size

3289

# (*): not ideal, see comment in filelog.size

3290

# (?): could be "- len(meta)" if the resolved content has

3290

# (?): could be "- len(meta)" if the resolved content has

3291

# rename metadata

3291

# rename metadata

3292

#

3292

#

3293

# Checks needed to be done:

3293

# Checks needed to be done:

3294

# 1. length check: L1 == L2, in all cases.

3294

# 1. length check: L1 == L2, in all cases.

3295

# 2. hash check: depending on flag processor, we may need to

3295

# 2. hash check: depending on flag processor, we may need to

3296

# use either "text" (external), or "rawtext" (in revlog).

3296

# use either "text" (external), or "rawtext" (in revlog).

3297

3298

try:

3298

try:

3299

skipflags = state.get(b'skipflags', 0)

3299

skipflags = state.get(b'skipflags', 0)

3300

if skipflags:

3300

if skipflags:

3301

skipflags &= self.flags(rev)

3301

skipflags &= self.flags(rev)

3302

3303

_verify_revision(self, skipflags, state, node)

3303

_verify_revision(self, skipflags, state, node)

3304

3305

l1 = self.rawsize(rev)

3305

l1 = self.rawsize(rev)

3306

l2 = len(self.rawdata(node))

3306

l2 = len(self.rawdata(node))

3307

3308

if l1 != l2:

3308

if l1 != l2:

3309

yield revlogproblem(

3309

yield revlogproblem(

3310

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3310

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3311

node=node,

3311

node=node,

3312

)

3312

)

3313

3314

except error.CensoredNodeError:

3314

except error.CensoredNodeError:

3315

if state[b'erroroncensored']:

3315

if state[b'erroroncensored']:

3316

yield revlogproblem(

3316

yield revlogproblem(

3317

error=_(b'censored file data'), node=node

3317

error=_(b'censored file data'), node=node

3318

)

3318

)

3319

state[b'skipread'].add(node)

3319

state[b'skipread'].add(node)

3320

except Exception as e:

3320

except Exception as e:

3321

yield revlogproblem(

3321

yield revlogproblem(

3322

error=_(b'unpacking %s: %s')

3322

error=_(b'unpacking %s: %s')

3323

% (short(node), stringutil.forcebytestr(e)),

3323

% (short(node), stringutil.forcebytestr(e)),

3324

node=node,

3324

node=node,

3325

)

3325

)

3326

state[b'skipread'].add(node)

3326

state[b'skipread'].add(node)

3327

3328

def storageinfo(

3328

def storageinfo(

3329

self,

3329

self,

3330

exclusivefiles=False,

3330

exclusivefiles=False,

3331

sharedfiles=False,

3331

sharedfiles=False,

3332

revisionscount=False,

3332

revisionscount=False,

3333

trackedsize=False,

3333

trackedsize=False,

3334

storedsize=False,

3334

storedsize=False,

3335

):

3335

):

3336

d = {}

3336

d = {}

3337

3338

if exclusivefiles:

3338

if exclusivefiles:

3339

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3339

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3340

if not self._inline:

3340

if not self._inline:

3341

d[b'exclusivefiles'].append((self.opener, self._datafile))

3341

d[b'exclusivefiles'].append((self.opener, self._datafile))

3342

3343

if sharedfiles:

3343

if sharedfiles:

3344

d[b'sharedfiles'] = []

3344

d[b'sharedfiles'] = []

3345

3346

if revisionscount:

3346

if revisionscount:

3347

d[b'revisionscount'] = len(self)

3347

d[b'revisionscount'] = len(self)

3348

3349

if trackedsize:

3349

if trackedsize:

3350

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3350

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3351

3352

if storedsize:

3352

if storedsize:

3353

d[b'storedsize'] = sum(

3353

d[b'storedsize'] = sum(

3354

self.opener.stat(path).st_size for path in self.files()

3354

self.opener.stat(path).st_size for path in self.files()

3355

)

3355

)

3356

3357

return d

3357

return d

3358

3359

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3359

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3360

if not self.hassidedata:

3360

if not self.hassidedata:

3361

return

3361

return

3362

# revlog formats with sidedata support does not support inline

3362

# revlog formats with sidedata support does not support inline

3363

assert not self._inline

3363

assert not self._inline

3364

if not helpers[1] and not helpers[2]:

3364

if not helpers[1] and not helpers[2]:

3365

# Nothing to generate or remove

3365

# Nothing to generate or remove

3366

return

3366

return

3367

3368

new_entries = []

3368

new_entries = []

3369

# append the new sidedata

3369

# append the new sidedata

3370

with self._writing(transaction):

3370

with self._writing(transaction):

3371

ifh, dfh = self._writinghandles

3371

ifh, dfh = self._writinghandles

3372

if self._docket is not None:

3372

if self._docket is not None:

3373

dfh.seek(self._docket.data_end, os.SEEK_SET)

3373

dfh.seek(self._docket.data_end, os.SEEK_SET)

3374

else:

3374

else:

3375

dfh.seek(0, os.SEEK_END)

3375

dfh.seek(0, os.SEEK_END)

3376

3377

current_offset = dfh.tell()

3377

current_offset = dfh.tell()

3378

for rev in range(startrev, endrev + 1):

3378

for rev in range(startrev, endrev + 1):

3379

entry = self.index[rev]

3379

entry = self.index[rev]

3380

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3380

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3381

store=self,

3381

store=self,

3382

sidedata_helpers=helpers,

3382

sidedata_helpers=helpers,

3383

sidedata={},

3383

sidedata={},

3384

rev=rev,

3384

rev=rev,

3385

)

3385

)

3386

3387

serialized_sidedata = sidedatautil.serialize_sidedata(

3387

serialized_sidedata = sidedatautil.serialize_sidedata(

3388

new_sidedata

3388

new_sidedata

3389

)

3389

)

3390

3391

sidedata_compression_mode = COMP_MODE_INLINE

3391

sidedata_compression_mode = COMP_MODE_INLINE

3392

if serialized_sidedata and self.hassidedata:

3392

if serialized_sidedata and self.hassidedata:

3393

sidedata_compression_mode = COMP_MODE_PLAIN

3393

sidedata_compression_mode = COMP_MODE_PLAIN

3394

h, comp_sidedata = self.compress(serialized_sidedata)

3394

h, comp_sidedata = self.compress(serialized_sidedata)

3395

if (

3395

if (

3396

h != b'u'

3396

h != b'u'

3397

and comp_sidedata[0] != b'\0'

3397

and comp_sidedata[0] != b'\0'

3398

and len(comp_sidedata) < len(serialized_sidedata)

3398

and len(comp_sidedata) < len(serialized_sidedata)

3399

):

3399

):

3400

assert not h

3400

assert not h

3401

if (

3401

if (

3402

comp_sidedata[0]

3402

comp_sidedata[0]

3403

== self._docket.default_compression_header

3403

== self._docket.default_compression_header

3404

):

3404

):

3405

sidedata_compression_mode = COMP_MODE_DEFAULT

3405

sidedata_compression_mode = COMP_MODE_DEFAULT

3406

serialized_sidedata = comp_sidedata

3406

serialized_sidedata = comp_sidedata

3407

else:

3407

else:

3408

sidedata_compression_mode = COMP_MODE_INLINE

3408

sidedata_compression_mode = COMP_MODE_INLINE

3409

serialized_sidedata = comp_sidedata

3409

serialized_sidedata = comp_sidedata

3410

if entry[8] != 0 or entry[9] != 0:

3410

if entry[8] != 0 or entry[9] != 0:

3411

# rewriting entries that already have sidedata is not

3411

# rewriting entries that already have sidedata is not

3412

# supported yet, because it introduces garbage data in the

3412

# supported yet, because it introduces garbage data in the

3413

# revlog.

3413

# revlog.

3414

msg = b"rewriting existing sidedata is not supported yet"

3414

msg = b"rewriting existing sidedata is not supported yet"

3415

raise error.Abort(msg)

3415

raise error.Abort(msg)

3416

3417

# Apply (potential) flags to add and to remove after running

3417

# Apply (potential) flags to add and to remove after running

3418

# the sidedata helpers

3418

# the sidedata helpers

3419

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3419

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3420

entry_update = (

3420

entry_update = (

3421

current_offset,

3421

current_offset,

3422

len(serialized_sidedata),

3422

len(serialized_sidedata),

3423

new_offset_flags,

3423

new_offset_flags,

3424

sidedata_compression_mode,

3424

sidedata_compression_mode,

3425

)

3425

)

3426

3427

# the sidedata computation might have move the file cursors around

3427

# the sidedata computation might have move the file cursors around

3428

dfh.seek(current_offset, os.SEEK_SET)

3428

dfh.seek(current_offset, os.SEEK_SET)

3429

dfh.write(serialized_sidedata)

3429

dfh.write(serialized_sidedata)

3430

new_entries.append(entry_update)

3430

new_entries.append(entry_update)

3431

current_offset += len(serialized_sidedata)

3431

current_offset += len(serialized_sidedata)

3432

if self._docket is not None:

3432

if self._docket is not None:

3433

self._docket.data_end = dfh.tell()

3433

self._docket.data_end = dfh.tell()

3434

3435

# rewrite the new index entries

3435

# rewrite the new index entries

3436

ifh.seek(startrev * self.index.entry_size)

3436

ifh.seek(startrev * self.index.entry_size)

3437

for i, e in enumerate(new_entries):

3437

for i, e in enumerate(new_entries):

3438

rev = startrev + i

3438

rev = startrev + i

3439

self.index.replace_sidedata_info(rev, *e)

3439

self.index.replace_sidedata_info(rev, *e)

3440

packed = self.index.entry_binary(rev)

3440

packed = self.index.entry_binary(rev)

3441

if rev == 0 and self._docket is None:

3441

if rev == 0 and self._docket is None:

3442

header = self._format_flags | self._format_version

3442

header = self._format_flags | self._format_version

3443

header = self.index.pack_header(header)

3443

header = self.index.pack_header(header)

3444

packed = header + packed

3444

packed = header + packed

3445

ifh.write(packed)

3445

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import contextlib
             import errno
             import io
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .pycompat import getattr
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 revlogv0,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def offset_type(offset, type):
                 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError(b'unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
                 parsers, 'BaseIndexObject'
             )
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
                 return index, cache
             if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 Internal details
                 ----------------
                 A large part of the revlog logic deals with revisions' "index entries", tuple
                 objects that contains the same "items" whatever the revlog version.
                 Different versions will have different ways of storing these items (sometimes
                 not having them at all), but the tuple will always be the same. New fields
                 are usually added at the end to avoid breaking existing code that relies
                 on the existing order. The field are defined as follows:
                 [0] offset:
                         The byte index of the start of revision data chunk.
                         That value is shifted up by 16 bits. use "offset = field >> 16" to
                         retrieve it.
                     flags:
                         A flag field that carries special information or changes the behavior
                         of the revision. (see `REVIDX_*` constants for details)
                         The flag field only occupies the first 16 bits of this field,
                         use "flags = field & 0xFFFF" to retrieve the value.
                 [1] compressed length:
                         The size, in bytes, of the chunk on disk
                 [2] uncompressed length:
                         The size, in bytes, of the full revision once reconstructed.
                 [3] base rev:
                         Either the base of the revision delta chain (without general
                         delta), or the base of the delta (stored in the data chunk)
                         with general delta.
                 [4] link rev:
                         Changelog revision number of the changeset introducing this
                         revision.
                 [5] parent 1 rev:
                         Revision number of the first parent
                 [6] parent 2 rev:
                         Revision number of the second parent
                 [7] node id:
                         The node id of the current revision
                 [8] sidedata offset:
                         The byte index of the start of the revision's side-data chunk.
                 [9] sidedata chunk length:
                         The size, in bytes, of the revision's side-data chunk.
                 [10] data compression mode:
                         two bits that detail the way the data chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details). For revlog version 0 and
 this will always be COMP_MODE_INLINE.
                 [11] side-data compression mode:
                         two bits that detail the way the sidedata chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details)
                 """
                 _flagserrorclass = error.RevlogError
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 2-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in pycompat.iteritems(
                         opts.get(b'flagprocessors', {})
                     ):
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b''
                 def _loadindex(self):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     else:
                         entry_point = b'%s.i' % self.radix
                     entry_data = b''
                     self._initempty = True
                     entry_data = self._get_data(entry_point, mmapindexthreshold)
                     if len(entry_data) > 0:
                         header = INDEX_HEADER.unpack(entry_data[:4])[0]
                         self._initempty = False
                     else:
                         header = new_header
                     self._format_flags = header & ~0xFFFF
                     self._format_version = header & 0xFFFF
                     supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                     if supported_flags is None:
                         msg = _(b'unknown version (%d) in revlog %s')
                         msg %= (self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     elif self._format_flags & ~supported_flags:
                         msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                         display_flag = self._format_flags >> 16
                         msg %= (display_flag, self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     features = FEATURES_BY_VERSION[self._format_version]
                     self._inline = features[b'inline'](self._format_flags)
                     self._generaldelta = features[b'generaldelta'](self._format_flags)
                     self.hassidedata = features[b'sidedata']
                     if not features[b'docket']:
                         self._indexfile = entry_point
                         index_data = entry_data
                     else:
                         self._docket_file = entry_point
                         if self._initempty:
                             self._docket = docketutil.default_docket(self, header)
                         else:
                             self._docket = docketutil.parse_docket(
                                 self, entry_data, use_pending=self._trypending
                             )
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, _chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and util.safehasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index, self._chunkcache = d
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     # Maybe we should build a user facing representation of
                     # revlog.target instead of using `self.radix`
                     return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     # Use explicit file handle, if given.
                     if existingfp is not None:
                         yield existingfp
                     # Use a file handle being actively used for writes, if available.
                     # There is some danger to doing this because reads will seek the
                     # file. However, _writeentry() performs a SEEK_END before all writes,
                     # so we should be safe.
                     elif self._writinghandles:
                         if self._inline:
                             yield self._writinghandles[0]
                         else:
                             yield self._writinghandles[1]
                     # Otherwise open a new file handle.
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @property
                 def nodemap(self):
                     msg = (
                         b"revlog.nodemap is deprecated, "
                         b"use revlog.index.[has_node|rev|get_rev]"
                     )
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 @property
                 def _nodecache(self):
                     msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, b'')
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and util.safehasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
-                    if rustancestor is not None:
+                    if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
-                    if rustancestor is not None:
+                    if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in pycompat.iteritems(heads) if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
-                    if rustdagop is not None:
+                    if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise error.RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise error.AmbiguousPrefixLookupError(
                                 id, self.display_id, _(b'ambiguous identifier')
                             )
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[: l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     Raises if the requested number of bytes could not be read.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (
                         (offset + length + cachesize) & ~(cachesize - 1)
                     ) - realoffset
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         startoffset = offset - realoffset
                         if len(d) - startoffset < length:
                             raise error.RevlogError(
                                 _(
                                     b'partial read of revlog %s; expected %d bytes from '
                                     b'offset %d, got %d'
                                 )
                                 % (
                                     self._indexfile if self._inline else self._datafile,
                                     length,
                                     offset,
                                     len(d) - startoffset,
                                 )
                             )
                         return util.buffer(d, startoffset, length)
                     if len(d) < length:
                         raise error.RevlogError(
                             _(
                                 b'partial read of revlog %s; expected %d bytes from offset '
                                 b'%d, got %d'
                             )
                             % (
                                 self._indexfile if self._inline else self._datafile,
                                 length,
                                 offset,
                                 len(d),
                             )
                         )
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d  # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = 'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, b'')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, b'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def _processflags(self, text, flags, operation, raw=False):
                     """deprecated entry point to access flag processors"""
                     msg = b'_processflag(...) use the specialized variant'
                     util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     if raw:
                         return text, flagutil.processflagsraw(self, text, flags)
                     elif operation == b'read':
                         return flagutil.processflagsread(self, text, flags)
                     else:  # write operation
                         return flagutil.processflagswrite(self, text, flags)
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if raw:
                         msg = (
                             b'revlog.revision(..., raw=True) is deprecated, '
                             b'use revlog.rawdata(...)'
                         )
                         util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     return self._revisiondata(nodeorrev, _df, raw=raw)[0]
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     return self._revisiondata(nodeorrev, _df)[1]
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b"", {}
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if self.hassidedata:
                         if rev is None:
                             rev = self.rev(node)
                         sidedata = self._sidedata(rev)
                     else:
                         sidedata = {}
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext, sidedata
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext, sidedata
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text, sidedata
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     comp_segment = self._getsegment(sidedata_offset, sidedata_size)
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)[0]
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 def _enforceinlinesize(self, tr):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     trindex = 0
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                                 if troffset <= self.start(r):
                                     trindex = r
                             new_dfh.flush()
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # the temp file replace the real index when we exit the context
                             # manager
                         tr.replace(self._indexfile, trindex * self.index.entry_size)
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._chunkclear()
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh)
                             new_dfh = None
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         r = len(self)
                         dsize = 0
                         if r:
                             dsize = self.end(r - 1)
                         dfh = None
                         if not self._inline:
                             try:
                                 dfh = self._datafp(b"r+")
                                 if self._docket is None:
                                     dfh.seek(0, os.SEEK_END)
                                 else:
                                     dfh.seek(self._docket.data_end, os.SEEK_SET)
                             except IOError as inst:
                                 if inst.errno != errno.ENOENT:
                                     raise
                                 dfh = self._datafp(b"w+")
                             transaction.add(self._datafile, dsize)
                         try:
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             try:
                                 self._writinghandles = (ifh, dfh)
                                 try:
                                     yield
                                     if self._docket is not None:
                                         self._write_docket(transaction)
                                 finally:
                                     self._writinghandles = None
                             finally:
                                 ifh.close()
                         finally:
                             if dfh is not None:
                                 dfh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh = self._writinghandles
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         h, d = deltainfo.data
                         if not h and not d:
                             # not data to store at all... declare them uncompressed
                             compression_mode = COMP_MODE_PLAIN
                         elif not h:
                             t = d[0:1]
                             if t == b'\0':
                                 compression_mode = COMP_MODE_PLAIN
                             elif t == self._docket.default_compression_header:
                                 compression_mode = COMP_MODE_DEFAULT
                         elif h == b'u':
                             # we have a more efficient way to declare uncompressed
                             h = b''
                             compression_mode = COMP_MODE_PLAIN
                             deltainfo = deltautil.drop_u_compression(deltainfo)
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = offset + deltainfo.deltalen
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     e = (
                         offset_type(offset, flags),
                         deltainfo.deltalen,
                         textlen,
                         deltainfo.base,
                         link,
                         p1r,
                         p2r,
                         node,
                         sidedata_offset,
                         len(serialized_sidedata),
                         compression_mode,
                         sidedata_compression_mode,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             dfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         if sidedata:
                             ifh.write(sidedata)
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         self._docket.index_end = self._writinghandles[0].tell()
                         self._docket.data_end = self._writinghandles[1].tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             deltacomputer = deltautil.deltacomputer(self)
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._chunkclear()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if not self._inline:
                         res.append(self._datafile)
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     deltacomputer = deltautil.deltacomputer(destrevlog)
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text, sidedata = self._revisiondata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext, sidedata = self._revisiondata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     censorrev = self.rev(censornode)
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     if len(tombstone) > self.rawsize(censorrev):
                         raise error.Abort(
                             _(b'censor tombstone must be no longer than censored data')
                         )
                     # Rewriting the revlog in place is hard. Our strategy for censoring is
                     # to create a new revlog, copy all revisions to it, then replace the
                     # revlogs on transaction close.
                     #
                     # This is a bit dangerous. We could easily have a mismatch of state.
                     newrl = revlog(
                         self.opener,
                         target=self.target,
                         radix=self.radix,
                         postfix=b'tmpcensored',
                         censorable=True,
                     )
                     newrl._format_version = self._format_version
                     newrl._format_flags = self._format_flags
                     newrl._generaldelta = self._generaldelta
                     newrl._parse_index = self._parse_index
                     for rev in self.revs():
                         node = self.node(rev)
                         p1, p2 = self.parents(node)
                         if rev == censorrev:
                             newrl.addrawrevision(
                                 tombstone,
                                 tr,
                                 self.linkrev(censorrev),
                                 p1,
                                 p2,
                                 censornode,
                                 REVIDX_ISCENSORED,
                             )
                             if newrl.deltaparent(rev) != nullrev:
                                 raise error.Abort(
                                     _(
                                         b'censored revision stored as delta; '
                                         b'cannot censor'
                                     ),
                                     hint=_(
                                         b'censoring of revlogs is not '
                                         b'fully implemented; please report '
                                         b'this bug'
                                     ),
                                 )
                             continue
                         if self.iscensored(rev):
                             if self.deltaparent(rev) != nullrev:
                                 raise error.Abort(
                                     _(
                                         b'cannot censor due to censored '
                                         b'revision having delta stored'
                                     )
                                 )
                             rawtext = self._chunk(rev)
                         else:
                             rawtext = self.rawdata(rev)
                         newrl.addrawrevision(
                             rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
                         )
                     tr.addbackup(self._indexfile, location=b'store')
                     if not self._inline:
                         tr.addbackup(self._datafile, location=b'store')
                     self.opener.rename(newrl._indexfile, self._indexfile)
                     if not self._inline:
                         self.opener.rename(newrl._datafile, self._datafile)
                     self.clearcaches()
                     self._loadindex()
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh = self._writinghandles
                         if self._docket is not None:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                         else:
                             dfh.seek(0, os.SEEK_END)
                         current_offset = dfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             dfh.seek(current_offset, os.SEEK_SET)
                             dfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             if self._docket is not None:
                                 self._docket.data_end = dfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)

             # setdiscovery.py - improved discovery of common nodeset for mercurial
             #
             # Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
             # and Peter Arrenbrecht <peter@arrenbrecht.ch>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """
             Algorithm works in the following way. You have two repository: local and
             remote. They both contains a DAG of changelists.
             The goal of the discovery protocol is to find one set of node *common*,
             the set of nodes shared by local and remote.
             One of the issue with the original protocol was latency, it could
             potentially require lots of roundtrips to discover that the local repo was a
             subset of remote (which is a very common case, you usually have few changes
             compared to upstream, while upstream probably had lots of development).
             The new protocol only requires one interface for the remote repo: `known()`,
             which given a set of changelists tells you if they are present in the DAG.
             The algorithm then works as follow:
              - We will be using three sets, `common`, `missing`, `unknown`. Originally
              all nodes are in `unknown`.
              - Take a sample from `unknown`, call `remote.known(sample)`
                - For each node that remote knows, move it and all its ancestors to `common`
                - For each node that remote doesn't know, move it and all its descendants
                to `missing`
              - Iterate until `unknown` is empty
             There are a couple optimizations, first is instead of starting with a random
             sample of missing, start by sending all heads, in the case where the local
             repo is a subset, you computed the answer in one round trip.
             Then you can do something similar to the bisecting strategy used when
             finding faulty changesets. Instead of random samples, you can try picking
             nodes that will maximize the number of nodes that will be
             classified with it (since all ancestors or descendants will be marked as well).
             """
             from __future__ import absolute_import
             import collections
             import random
             from .i18n import _
             from .node import nullrev
             from . import (
                 error,
                 policy,
                 util,
             )
             def _updatesample(revs, heads, sample, parentfn, quicksamplesize=0):
                 """update an existing sample to match the expected size
                 The sample is updated with revs exponentially distant from each head of the
                 <revs> set. (H~1, H~2, H~4, H~8, etc).
                 If a target size is specified, the sampling will stop once this size is
                 reached. Otherwise sampling will happen until roots of the <revs> set are
                 reached.
                 :revs:  set of revs we want to discover (if None, assume the whole dag)
                 :heads: set of DAG head revs
                 :sample: a sample to update
                 :parentfn: a callable to resolve parents for a revision
                 :quicksamplesize: optional target size of the sample"""
                 dist = {}
                 visit = collections.deque(heads)
                 seen = set()
                 factor = 1
                 while visit:
                     curr = visit.popleft()
                     if curr in seen:
                         continue
                     d = dist.setdefault(curr, 1)
                     if d > factor:
                         factor *= 2
                     if d == factor:
                         sample.add(curr)
                         if quicksamplesize and (len(sample) >= quicksamplesize):
                             return
                     seen.add(curr)
                     for p in parentfn(curr):
                         if p != nullrev and (not revs or p in revs):
                             dist.setdefault(p, d + 1)
                             visit.append(p)
             def _limitsample(sample, desiredlen, randomize=True):
                 """return a random subset of sample of at most desiredlen item.
                 If randomize is False, though, a deterministic subset is returned.
                 This is meant for integration tests.
                 """
                 if len(sample) <= desiredlen:
                     return sample
                 if randomize:
                     return set(random.sample(sample, desiredlen))
                 sample = list(sample)
                 sample.sort()
                 return set(sample[:desiredlen])
             class partialdiscovery(object):
                 """an object representing ongoing discovery
                 Feed with data from the remote repository, this object keep track of the
                 current set of changeset in various states:
                 - common:    revs also known remotely
                 - undecided: revs we don't have information on yet
                 - missing:   revs missing remotely
                 (all tracked revisions are known locally)
                 """
                 def __init__(self, repo, targetheads, respectsize, randomize=True):
                     self._repo = repo
                     self._targetheads = targetheads
                     self._common = repo.changelog.incrementalmissingrevs()
                     self._undecided = None
                     self.missing = set()
                     self._childrenmap = None
                     self._respectsize = respectsize
                     self.randomize = randomize
                 def addcommons(self, commons):
                     """register nodes known as common"""
                     self._common.addbases(commons)
                     if self._undecided is not None:
                         self._common.removeancestorsfrom(self._undecided)
                 def addmissings(self, missings):
                     """register some nodes as missing"""
                     newmissing = self._repo.revs(b'%ld::%ld', missings, self.undecided)
                     if newmissing:
                         self.missing.update(newmissing)
                         self.undecided.difference_update(newmissing)
                 def addinfo(self, sample):
                     """consume an iterable of (rev, known) tuples"""
                     common = set()
                     missing = set()
                     for rev, known in sample:
                         if known:
                             common.add(rev)
                         else:
                             missing.add(rev)
                     if common:
                         self.addcommons(common)
                     if missing:
                         self.addmissings(missing)
                 def hasinfo(self):
                     """return True is we have any clue about the remote state"""
                     return self._common.hasbases()
                 def iscomplete(self):
                     """True if all the necessary data have been gathered"""
                     return self._undecided is not None and not self._undecided
                 @property
                 def undecided(self):
                     if self._undecided is not None:
                         return self._undecided
                     self._undecided = set(self._common.missingancestors(self._targetheads))
                     return self._undecided
                 def stats(self):
                     return {
                         'undecided': len(self.undecided),
                     }
                 def commonheads(self):
                     """the heads of the known common set"""
                     # heads(common) == heads(common.bases) since common represents
                     # common.bases and all its ancestors
                     return self._common.basesheads()
                 def _parentsgetter(self):
                     getrev = self._repo.changelog.index.__getitem__
                     def getparents(r):
                         return getrev(r)[5:7]
                     return getparents
                 def _childrengetter(self):
                     if self._childrenmap is not None:
                         # During discovery, the `undecided` set keep shrinking.
                         # Therefore, the map computed for an iteration N will be
                         # valid for iteration N+1. Instead of computing the same
                         # data over and over we cached it the first time.
                         return self._childrenmap.__getitem__
                     # _updatesample() essentially does interaction over revisions to look
                     # up their children. This lookup is expensive and doing it in a loop is
                     # quadratic. We precompute the children for all relevant revisions and
                     # make the lookup in _updatesample() a simple dict lookup.
                     self._childrenmap = children = {}
                     parentrevs = self._parentsgetter()
                     revs = self.undecided
                     for rev in sorted(revs):
                         # Always ensure revision has an entry so we don't need to worry
                         # about missing keys.
                         children[rev] = []
                         for prev in parentrevs(rev):
                             if prev == nullrev:
                                 continue
                             c = children.get(prev)
                             if c is not None:
                                 c.append(rev)
                     return children.__getitem__
                 def takequicksample(self, headrevs, size):
                     """takes a quick sample of size <size>
                     It is meant for initial sampling and focuses on querying heads and close
                     ancestors of heads.
                     :headrevs: set of head revisions in local DAG to consider
                     :size: the maximum size of the sample"""
                     revs = self.undecided
                     if len(revs) <= size:
                         return list(revs)
                     sample = set(self._repo.revs(b'heads(%ld)', revs))
                     if len(sample) >= size:
                         return _limitsample(sample, size, randomize=self.randomize)
                     _updatesample(
                         None, headrevs, sample, self._parentsgetter(), quicksamplesize=size
                     )
                     return sample
                 def takefullsample(self, headrevs, size):
                     revs = self.undecided
                     if len(revs) <= size:
                         return list(revs)
                     repo = self._repo
                     sample = set(repo.revs(b'heads(%ld)', revs))
                     parentrevs = self._parentsgetter()
                     # update from heads
                     revsheads = sample.copy()
                     _updatesample(revs, revsheads, sample, parentrevs)
                     # update from roots
                     revsroots = set(repo.revs(b'roots(%ld)', revs))
                     childrenrevs = self._childrengetter()
                     _updatesample(revs, revsroots, sample, childrenrevs)
                     assert sample
                     if not self._respectsize:
                         size = max(size, min(len(revsroots), len(revsheads)))
                     sample = _limitsample(sample, size, randomize=self.randomize)
                     if len(sample) < size:
                         more = size - len(sample)
                         takefrom = list(revs - sample)
                         if self.randomize:
                             sample.update(random.sample(takefrom, more))
                         else:
                             takefrom.sort()
                             sample.update(takefrom[:more])
                     return sample
+            pure_partialdiscovery = partialdiscovery
             partialdiscovery = policy.importrust(
                 'discovery', member='PartialDiscovery', default=partialdiscovery
             )
             def findcommonheads(
                 ui,
                 local,
                 remote,
                 abortwhenunrelated=True,
                 ancestorsof=None,
                 audit=None,
             ):
                 """Return a tuple (common, anyincoming, remoteheads) used to identify
                 missing nodes from or in remote.
                 The audit argument is an optional dictionnary that a caller can pass. it
                 will be updated with extra data about the discovery, this is useful for
                 debug.
                 """
                 samplegrowth = float(ui.config(b'devel', b'discovery.grow-sample.rate'))
                 start = util.timer()
                 roundtrips = 0
                 cl = local.changelog
                 clnode = cl.node
                 clrev = cl.rev
                 if ancestorsof is not None:
                     ownheads = [clrev(n) for n in ancestorsof]
                 else:
                     ownheads = [rev for rev in cl.headrevs() if rev != nullrev]
                 initial_head_exchange = ui.configbool(b'devel', b'discovery.exchange-heads')
                 initialsamplesize = ui.configint(b'devel', b'discovery.sample-size.initial')
                 fullsamplesize = ui.configint(b'devel', b'discovery.sample-size')
                 # We also ask remote about all the local heads. That set can be arbitrarily
                 # large, so we used to limit it size to `initialsamplesize`. We no longer
                 # do as it proved counter productive. The skipped heads could lead to a
                 # large "undecided" set, slower to be clarified than if we asked the
                 # question for all heads right away.
                 #
                 # We are already fetching all server heads using the `heads` commands,
                 # sending a equivalent number of heads the other way should not have a
                 # significant impact.  In addition, it is very likely that we are going to
                 # have to issue "known" request for an equivalent amount of revisions in
                 # order to decide if theses heads are common or missing.
                 #
                 # find a detailled analysis below.
                 #
                 # Case A: local and server both has few heads
                 #
                 #     Ownheads is below initialsamplesize, limit would not have any effect.
                 #
                 # Case B: local has few heads and server has many
                 #
                 #     Ownheads is below initialsamplesize, limit would not have any effect.
                 #
                 # Case C: local and server both has many heads
                 #
                 #     We now transfert some more data, but not significantly more than is
                 #     already transfered to carry the server heads.
                 #
                 # Case D: local has many heads, server has few
                 #
                 #   D.1 local heads are mostly known remotely
                 #
                 #     All the known head will have be part of a `known` request at some
                 #     point for the discovery to finish. Sending them all earlier is
                 #     actually helping.
                 #
                 #     (This case is fairly unlikely, it requires the numerous heads to all
                 #     be merged server side in only a few heads)
                 #
                 #   D.2 local heads are mostly missing remotely
                 #
                 #     To determine that the heads are missing, we'll have to issue `known`
                 #     request for them or one of their ancestors. This amount of `known`
                 #     request will likely be in the same order of magnitude than the amount
                 #     of local heads.
                 #
                 #     The only case where we can be more efficient using `known` request on
                 #     ancestors are case were all the "missing" local heads are based on a
                 #     few changeset, also "missing".  This means we would have a "complex"
                 #     graph (with many heads) attached to, but very independant to a the
                 #     "simple" graph on the server. This is a fairly usual case and have
                 #     not been met in the wild so far.
                 if initial_head_exchange:
                     if remote.limitedarguments:
                         sample = _limitsample(ownheads, initialsamplesize)
                         # indices between sample and externalized version must match
                         sample = list(sample)
                     else:
                         sample = ownheads
                     ui.debug(b"query 1; heads\n")
                     roundtrips += 1
                     with remote.commandexecutor() as e:
                         fheads = e.callcommand(b'heads', {})
                         fknown = e.callcommand(
                             b'known',
                             {
                                 b'nodes': [clnode(r) for r in sample],
                             },
                         )
                     srvheadhashes, yesno = fheads.result(), fknown.result()
                     if audit is not None:
                         audit[b'total-roundtrips'] = 1
                     if cl.tiprev() == nullrev:
                         if srvheadhashes != [cl.nullid]:
                             return [cl.nullid], True, srvheadhashes
                         return [cl.nullid], False, []
                 else:
                     # we still need the remote head for the function return
                     with remote.commandexecutor() as e:
                         fheads = e.callcommand(b'heads', {})
                     srvheadhashes = fheads.result()
                 # start actual discovery (we note this before the next "if" for
                 # compatibility reasons)
                 ui.status(_(b"searching for changes\n"))
                 knownsrvheads = []  # revnos of remote heads that are known locally
                 for node in srvheadhashes:
                     if node == cl.nullid:
                         continue
                     try:
                         knownsrvheads.append(clrev(node))
                     # Catches unknown and filtered nodes.
                     except error.LookupError:
                         continue
                 if initial_head_exchange:
                     # early exit if we know all the specified remote heads already
                     if len(knownsrvheads) == len(srvheadhashes):
                         ui.debug(b"all remote heads known locally\n")
                         return srvheadhashes, False, srvheadhashes
                     if len(sample) == len(ownheads) and all(yesno):
                         ui.note(_(b"all local changesets known remotely\n"))
                         ownheadhashes = [clnode(r) for r in ownheads]
                         return ownheadhashes, True, srvheadhashes
                 # full blown discovery
                 # if the server has a limit to its arguments size, we can't grow the sample.
                 configbool = local.ui.configbool
                 grow_sample = configbool(b'devel', b'discovery.grow-sample')
                 grow_sample = grow_sample and not remote.limitedarguments
                 dynamic_sample = configbool(b'devel', b'discovery.grow-sample.dynamic')
                 hard_limit_sample = not (dynamic_sample or remote.limitedarguments)
                 randomize = ui.configbool(b'devel', b'discovery.randomize')
-                disco = partialdiscovery(
+                if cl.index.rust_ext_compat:
-                    local, ownheads, hard_limit_sample, randomize=randomize
+                    pd = partialdiscovery
+                else:
+                    pd = pure_partialdiscovery
+                disco = pd(local, ownheads, hard_limit_sample, randomize=randomize)
                 if initial_head_exchange:
                     # treat remote heads (and maybe own heads) as a first implicit sample
                     # response
                     disco.addcommons(knownsrvheads)
                     disco.addinfo(zip(sample, yesno))
                 full = not initial_head_exchange
                 progress = ui.makeprogress(_(b'searching'), unit=_(b'queries'))
                 while not disco.iscomplete():
                     if full or disco.hasinfo():
                         if full:
                             ui.note(_(b"sampling from both directions\n"))
                         else:
                             ui.debug(b"taking initial sample\n")
                         samplefunc = disco.takefullsample
                         targetsize = fullsamplesize
                         if grow_sample:
                             fullsamplesize = int(fullsamplesize * samplegrowth)
                     else:
                         # use even cheaper initial sample
                         ui.debug(b"taking quick initial sample\n")
                         samplefunc = disco.takequicksample
                         targetsize = initialsamplesize
                     sample = samplefunc(ownheads, targetsize)
                     roundtrips += 1
                     progress.update(roundtrips)
                     stats = disco.stats()
                     ui.debug(
                         b"query %i; still undecided: %i, sample size is: %i\n"
                         % (roundtrips, stats['undecided'], len(sample))
                     )
                     # indices between sample and externalized version must match
                     sample = list(sample)
                     with remote.commandexecutor() as e:
                         yesno = e.callcommand(
                             b'known',
                             {
                                 b'nodes': [clnode(r) for r in sample],
                             },
                         ).result()
                     full = True
                     disco.addinfo(zip(sample, yesno))
                 result = disco.commonheads()
                 elapsed = util.timer() - start
                 progress.complete()
                 ui.debug(b"%d total queries in %.4fs\n" % (roundtrips, elapsed))
                 msg = (
                     b'found %d common and %d unknown server heads,'
                     b' %d roundtrips in %.4fs\n'
                 )
                 missing = set(result) - set(knownsrvheads)
                 ui.log(b'discovery', msg, len(result), len(missing), roundtrips, elapsed)
                 if audit is not None:
                     audit[b'total-roundtrips'] = roundtrips
                 if not result and srvheadhashes != [cl.nullid]:
                     if abortwhenunrelated:
                         raise error.Abort(_(b"repository is unrelated"))
                     else:
                         ui.warn(_(b"warning: repository is unrelated\n"))
                     return (
                         {cl.nullid},
                         True,
                         srvheadhashes,
                     )
                 anyincoming = srvheadhashes != [cl.nullid]
                 result = {clnode(r) for r in result}
                 return result, anyincoming, srvheadhashes