upstream/mercurial-mirror Commit - r51899:6c3798b4

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

# coding: utf8

2

# coding: utf8

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

"""Storage back-end for Mercurial.

9

"""Storage back-end for Mercurial.

10

11

This provides efficient delta storage with O(1) retrieve and append

11

This provides efficient delta storage with O(1) retrieve and append

12

and O(changes) merge between branches.

12

and O(changes) merge between branches.

13

"""

13

"""

14

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import contextlib

18

import contextlib

19

import io

19

import io

20

import os

20

import os

21

import struct

21

import struct

22

import weakref

22

import weakref

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullrev,

29

nullrev,

30

sha1nodeconstants,

30

sha1nodeconstants,

31

short,

31

short,

32

wdirrev,

32

wdirrev,

33

)

33

)

34

from .i18n import _

34

from .i18n import _

35

from .revlogutils.constants import (

35

from .revlogutils.constants import (

36

ALL_KINDS,

36

ALL_KINDS,

37

CHANGELOGV2,

37

CHANGELOGV2,

38

COMP_MODE_DEFAULT,

38

COMP_MODE_DEFAULT,

39

COMP_MODE_INLINE,

39

COMP_MODE_INLINE,

40

COMP_MODE_PLAIN,

40

COMP_MODE_PLAIN,

41

DELTA_BASE_REUSE_NO,

41

DELTA_BASE_REUSE_NO,

42

DELTA_BASE_REUSE_TRY,

42

DELTA_BASE_REUSE_TRY,

43

ENTRY_RANK,

43

ENTRY_RANK,

44

FEATURES_BY_VERSION,

44

FEATURES_BY_VERSION,

45

FLAG_GENERALDELTA,

45

FLAG_GENERALDELTA,

46

FLAG_INLINE_DATA,

46

FLAG_INLINE_DATA,

47

INDEX_HEADER,

47

INDEX_HEADER,

48

KIND_CHANGELOG,

48

KIND_CHANGELOG,

49

KIND_FILELOG,

49

KIND_FILELOG,

50

RANK_UNKNOWN,

50

RANK_UNKNOWN,

51

REVLOGV0,

51

REVLOGV0,

52

REVLOGV1,

52

REVLOGV1,

53

REVLOGV1_FLAGS,

53

REVLOGV1_FLAGS,

54

REVLOGV2,

54

REVLOGV2,

55

REVLOGV2_FLAGS,

55

REVLOGV2_FLAGS,

56

REVLOG_DEFAULT_FLAGS,

56

REVLOG_DEFAULT_FLAGS,

57

REVLOG_DEFAULT_FORMAT,

57

REVLOG_DEFAULT_FORMAT,

58

REVLOG_DEFAULT_VERSION,

58

REVLOG_DEFAULT_VERSION,

59

SUPPORTED_FLAGS,

59

SUPPORTED_FLAGS,

60

)

60

)

61

from .revlogutils.flagutil import (

61

from .revlogutils.flagutil import (

62

REVIDX_DEFAULT_FLAGS,

62

REVIDX_DEFAULT_FLAGS,

63

REVIDX_ELLIPSIS,

63

REVIDX_ELLIPSIS,

64

REVIDX_EXTSTORED,

64

REVIDX_EXTSTORED,

65

REVIDX_FLAGS_ORDER,

65

REVIDX_FLAGS_ORDER,

66

REVIDX_HASCOPIESINFO,

66

REVIDX_HASCOPIESINFO,

67

REVIDX_ISCENSORED,

67

REVIDX_ISCENSORED,

68

REVIDX_RAWTEXT_CHANGING_FLAGS,

68

REVIDX_RAWTEXT_CHANGING_FLAGS,

69

)

69

)

70

from .thirdparty import attr

70

from .thirdparty import attr

71

from . import (

71

from . import (

72

ancestor,

72

ancestor,

73

dagop,

73

dagop,

74

error,

74

error,

75

mdiff,

75

mdiff,

76

policy,

76

policy,

77

pycompat,

77

pycompat,

78

revlogutils,

78

revlogutils,

79

templatefilters,

79

templatefilters,

80

util,

80

util,

81

)

81

)

82

from .interfaces import (

82

from .interfaces import (

83

repository,

83

repository,

84

util as interfaceutil,

84

util as interfaceutil,

85

)

85

)

86

from .revlogutils import (

86

from .revlogutils import (

87

deltas as deltautil,

87

deltas as deltautil,

88

docket as docketutil,

88

docket as docketutil,

89

flagutil,

89

flagutil,

90

nodemap as nodemaputil,

90

nodemap as nodemaputil,

91

randomaccessfile,

91

randomaccessfile,

92

revlogv0,

92

revlogv0,

93

rewrite,

93

rewrite,

94

sidedata as sidedatautil,

94

sidedata as sidedatautil,

95

)

95

)

96

from .utils import (

96

from .utils import (

97

storageutil,

97

storageutil,

98

stringutil,

98

stringutil,

99

)

99

)

100

101

# blanked usage of all the name to prevent pyflakes constraints

101

# blanked usage of all the name to prevent pyflakes constraints

102

# We need these name available in the module for extensions.

102

# We need these name available in the module for extensions.

103

104

REVLOGV0

104

REVLOGV0

105

REVLOGV1

105

REVLOGV1

106

REVLOGV2

106

REVLOGV2

107

CHANGELOGV2

107

CHANGELOGV2

108

FLAG_INLINE_DATA

108

FLAG_INLINE_DATA

109

FLAG_GENERALDELTA

109

FLAG_GENERALDELTA

110

REVLOG_DEFAULT_FLAGS

110

REVLOG_DEFAULT_FLAGS

111

REVLOG_DEFAULT_FORMAT

111

REVLOG_DEFAULT_FORMAT

112

REVLOG_DEFAULT_VERSION

112

REVLOG_DEFAULT_VERSION

113

REVLOGV1_FLAGS

113

REVLOGV1_FLAGS

114

REVLOGV2_FLAGS

114

REVLOGV2_FLAGS

115

REVIDX_ISCENSORED

115

REVIDX_ISCENSORED

116

REVIDX_ELLIPSIS

116

REVIDX_ELLIPSIS

117

REVIDX_HASCOPIESINFO

117

REVIDX_HASCOPIESINFO

118

REVIDX_EXTSTORED

118

REVIDX_EXTSTORED

119

REVIDX_DEFAULT_FLAGS

119

REVIDX_DEFAULT_FLAGS

120

REVIDX_FLAGS_ORDER

120

REVIDX_FLAGS_ORDER

121

REVIDX_RAWTEXT_CHANGING_FLAGS

121

REVIDX_RAWTEXT_CHANGING_FLAGS

122

123

parsers = policy.importmod('parsers')

123

parsers = policy.importmod('parsers')

124

rustancestor = policy.importrust('ancestor')

124

rustancestor = policy.importrust('ancestor')

125

rustdagop = policy.importrust('dagop')

125

rustdagop = policy.importrust('dagop')

126

rustrevlog = policy.importrust('revlog')

126

rustrevlog = policy.importrust('revlog')

127

128

# Aliased for performance.

128

# Aliased for performance.

129

_zlibdecompress = zlib.decompress

129

_zlibdecompress = zlib.decompress

130

131

# max size of inline data embedded into a revlog

131

# max size of inline data embedded into a revlog

132

_maxinline = 131072

132

_maxinline = 131072

133

134

# Flag processors for REVIDX_ELLIPSIS.

134

# Flag processors for REVIDX_ELLIPSIS.

135

def ellipsisreadprocessor(rl, text):

135

def ellipsisreadprocessor(rl, text):

136

return text, False

136

return text, False

137

138

139

def ellipsiswriteprocessor(rl, text):

139

def ellipsiswriteprocessor(rl, text):

140

return text, False

140

return text, False

141

142

143

def ellipsisrawprocessor(rl, text):

143

def ellipsisrawprocessor(rl, text):

144

return False

144

return False

145

146

147

ellipsisprocessor = (

147

ellipsisprocessor = (

148

ellipsisreadprocessor,

148

ellipsisreadprocessor,

149

ellipsiswriteprocessor,

149

ellipsiswriteprocessor,

150

ellipsisrawprocessor,

150

ellipsisrawprocessor,

151

)

151

)

152

153

154

def _verify_revision(rl, skipflags, state, node):

154

def _verify_revision(rl, skipflags, state, node):

155

"""Verify the integrity of the given revlog ``node`` while providing a hook

155

"""Verify the integrity of the given revlog ``node`` while providing a hook

156

point for extensions to influence the operation."""

156

point for extensions to influence the operation."""

157

if skipflags:

157

if skipflags:

158

state[b'skipread'].add(node)

158

state[b'skipread'].add(node)

159

else:

159

else:

160

# Side-effect: read content and verify hash.

160

# Side-effect: read content and verify hash.

161

rl.revision(node)

161

rl.revision(node)

162

163

164

# True if a fast implementation for persistent-nodemap is available

164

# True if a fast implementation for persistent-nodemap is available

165

#

165

#

166

# We also consider we have a "fast" implementation in "pure" python because

166

# We also consider we have a "fast" implementation in "pure" python because

167

# people using pure don't really have performance consideration (and a

167

# people using pure don't really have performance consideration (and a

168

# wheelbarrow of other slowness source)

168

# wheelbarrow of other slowness source)

169

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(

169

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(

170

parsers, 'BaseIndexObject'

170

parsers, 'BaseIndexObject'

171

)

171

)

172

173

174

@interfaceutil.implementer(repository.irevisiondelta)

174

@interfaceutil.implementer(repository.irevisiondelta)

175

@attr.s(slots=True)

175

@attr.s(slots=True)

176

class revlogrevisiondelta:

176

class revlogrevisiondelta:

177

node = attr.ib()

177

node = attr.ib()

178

p1node = attr.ib()

178

p1node = attr.ib()

179

p2node = attr.ib()

179

p2node = attr.ib()

180

basenode = attr.ib()

180

basenode = attr.ib()

181

flags = attr.ib()

181

flags = attr.ib()

182

baserevisionsize = attr.ib()

182

baserevisionsize = attr.ib()

183

revision = attr.ib()

183

revision = attr.ib()

184

delta = attr.ib()

184

delta = attr.ib()

185

sidedata = attr.ib()

185

sidedata = attr.ib()

186

protocol_flags = attr.ib()

186

protocol_flags = attr.ib()

187

linknode = attr.ib(default=None)

187

linknode = attr.ib(default=None)

188

189

190

@interfaceutil.implementer(repository.iverifyproblem)

190

@interfaceutil.implementer(repository.iverifyproblem)

191

@attr.s(frozen=True)

191

@attr.s(frozen=True)

192

class revlogproblem:

192

class revlogproblem:

193

warning = attr.ib(default=None)

193

warning = attr.ib(default=None)

194

error = attr.ib(default=None)

194

error = attr.ib(default=None)

195

node = attr.ib(default=None)

195

node = attr.ib(default=None)

196

197

198

def parse_index_v1(data, inline):

198

def parse_index_v1(data, inline):

199

# call the C implementation to parse the index data

199

# call the C implementation to parse the index data

200

index, cache = parsers.parse_index2(data, inline)

200

index, cache = parsers.parse_index2(data, inline)

201

return index, cache

201

return index, cache

202

203

204

def parse_index_v2(data, inline):

204

def parse_index_v2(data, inline):

205

# call the C implementation to parse the index data

205

# call the C implementation to parse the index data

206

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

206

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

207

return index, cache

207

return index, cache

208

209

210

def parse_index_cl_v2(data, inline):

210

def parse_index_cl_v2(data, inline):

211

# call the C implementation to parse the index data

211

# call the C implementation to parse the index data

212

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

212

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

213

return index, cache

213

return index, cache

214

215

216

if hasattr(parsers, 'parse_index_devel_nodemap'):

216

if hasattr(parsers, 'parse_index_devel_nodemap'):

217

218

def parse_index_v1_nodemap(data, inline):

218

def parse_index_v1_nodemap(data, inline):

219

index, cache = parsers.parse_index_devel_nodemap(data, inline)

219

index, cache = parsers.parse_index_devel_nodemap(data, inline)

220

return index, cache

220

return index, cache

221

222

223

else:

223

else:

224

parse_index_v1_nodemap = None

224

parse_index_v1_nodemap = None

225

226

227

def parse_index_v1_mixed(data, inline):

227

def parse_index_v1_mixed(data, inline):

228

index, cache = parse_index_v1(data, inline)

228

index, cache = parse_index_v1(data, inline)

229

return rustrevlog.MixedIndex(index), cache

229

return rustrevlog.MixedIndex(index), cache

230

231

232

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

232

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

233

# signed integer)

233

# signed integer)

234

_maxentrysize = 0x7FFFFFFF

234

_maxentrysize = 0x7FFFFFFF

235

236

FILE_TOO_SHORT_MSG = _(

236

FILE_TOO_SHORT_MSG = _(

237

b'cannot read from revlog %s;'

237

b'cannot read from revlog %s;'

238

b' expected %d bytes from offset %d, data size is %d'

238

b' expected %d bytes from offset %d, data size is %d'

239

)

239

)

240

241

hexdigits = b'0123456789abcdefABCDEF'

241

hexdigits = b'0123456789abcdefABCDEF'

242

243

244

class revlog:

244

class revlog:

245

"""

245

"""

246

the underlying revision storage object

246

the underlying revision storage object

247

248

A revlog consists of two parts, an index and the revision data.

248

A revlog consists of two parts, an index and the revision data.

249

250

The index is a file with a fixed record size containing

250

The index is a file with a fixed record size containing

251

information on each revision, including its nodeid (hash), the

251

information on each revision, including its nodeid (hash), the

252

nodeids of its parents, the position and offset of its data within

252

nodeids of its parents, the position and offset of its data within

253

the data file, and the revision it's based on. Finally, each entry

253

the data file, and the revision it's based on. Finally, each entry

254

contains a linkrev entry that can serve as a pointer to external

254

contains a linkrev entry that can serve as a pointer to external

255

data.

255

data.

256

257

The revision data itself is a linear collection of data chunks.

257

The revision data itself is a linear collection of data chunks.

258

Each chunk represents a revision and is usually represented as a

258

Each chunk represents a revision and is usually represented as a

259

delta against the previous chunk. To bound lookup time, runs of

259

delta against the previous chunk. To bound lookup time, runs of

260

deltas are limited to about 2 times the length of the original

260

deltas are limited to about 2 times the length of the original

261

version data. This makes retrieval of a version proportional to

261

version data. This makes retrieval of a version proportional to

262

its size, or O(1) relative to the number of revisions.

262

its size, or O(1) relative to the number of revisions.

263

264

Both pieces of the revlog are written to in an append-only

264

Both pieces of the revlog are written to in an append-only

265

fashion, which means we never need to rewrite a file to insert or

265

fashion, which means we never need to rewrite a file to insert or

266

remove data, and can use some simple techniques to avoid the need

266

remove data, and can use some simple techniques to avoid the need

267

for locking while reading.

267

for locking while reading.

268

269

If checkambig, indexfile is opened with checkambig=True at

269

If checkambig, indexfile is opened with checkambig=True at

270

writing, to avoid file stat ambiguity.

270

writing, to avoid file stat ambiguity.

271

272

If mmaplargeindex is True, and an mmapindexthreshold is set, the

272

If mmaplargeindex is True, and an mmapindexthreshold is set, the

273

index will be mmapped rather than read if it is larger than the

273

index will be mmapped rather than read if it is larger than the

274

configured threshold.

274

configured threshold.

275

276

If censorable is True, the revlog can have censored revisions.

276

If censorable is True, the revlog can have censored revisions.

277

278

If `upperboundcomp` is not None, this is the expected maximal gain from

278

If `upperboundcomp` is not None, this is the expected maximal gain from

279

compression for the data content.

279

compression for the data content.

280

281

`concurrencychecker` is an optional function that receives 3 arguments: a

281

`concurrencychecker` is an optional function that receives 3 arguments: a

282

file handle, a filename, and an expected position. It should check whether

282

file handle, a filename, and an expected position. It should check whether

283

the current position in the file handle is valid, and log/warn/fail (by

283

the current position in the file handle is valid, and log/warn/fail (by

284

raising).

284

raising).

285

286

See mercurial/revlogutils/contants.py for details about the content of an

286

See mercurial/revlogutils/contants.py for details about the content of an

287

index entry.

287

index entry.

288

"""

288

"""

289

290

_flagserrorclass = error.RevlogError

290

_flagserrorclass = error.RevlogError

291

292

@staticmethod

292

@staticmethod

293

def is_inline_index(header_bytes):

293

def is_inline_index(header_bytes):

294

"""Determine if a revlog is inline from the initial bytes of the index"""

294

"""Determine if a revlog is inline from the initial bytes of the index"""

295

header = INDEX_HEADER.unpack(header_bytes)[0]

295

header = INDEX_HEADER.unpack(header_bytes)[0]

296

297

_format_flags = header & ~0xFFFF

297

_format_flags = header & ~0xFFFF

298

_format_version = header & 0xFFFF

298

_format_version = header & 0xFFFF

299

300

features = FEATURES_BY_VERSION[_format_version]

300

features = FEATURES_BY_VERSION[_format_version]

301

return features[b'inline'](_format_flags)

301

return features[b'inline'](_format_flags)

302

303

def __init__(

303

def __init__(

304

self,

304

self,

305

opener,

305

opener,

306

target,

306

target,

307

radix,

307

radix,

308

postfix=None, # only exist for `tmpcensored` now

308

postfix=None, # only exist for `tmpcensored` now

309

checkambig=False,

309

checkambig=False,

310

mmaplargeindex=False,

310

mmaplargeindex=False,

311

censorable=False,

311

censorable=False,

312

upperboundcomp=None,

312

upperboundcomp=None,

313

persistentnodemap=False,

313

persistentnodemap=False,

314

concurrencychecker=None,

314

concurrencychecker=None,

315

trypending=False,

315

trypending=False,

316

try_split=False,

316

try_split=False,

317

canonical_parent_order=True,

317

canonical_parent_order=True,

318

):

318

):

319

"""

319

"""

320

create a revlog object

320

create a revlog object

321

322

opener is a function that abstracts the file opening operation

322

opener is a function that abstracts the file opening operation

323

and can be used to implement COW semantics or the like.

323

and can be used to implement COW semantics or the like.

324

325

`target`: a (KIND, ID) tuple that identify the content stored in

325

`target`: a (KIND, ID) tuple that identify the content stored in

326

this revlog. It help the rest of the code to understand what the revlog

326

this revlog. It help the rest of the code to understand what the revlog

327

is about without having to resort to heuristic and index filename

327

is about without having to resort to heuristic and index filename

328

analysis. Note: that this must be reliably be set by normal code, but

328

analysis. Note: that this must be reliably be set by normal code, but

329

that test, debug, or performance measurement code might not set this to

329

that test, debug, or performance measurement code might not set this to

330

accurate value.

330

accurate value.

331

"""

331

"""

332

self.upperboundcomp = upperboundcomp

332

self.upperboundcomp = upperboundcomp

333

334

self.radix = radix

334

self.radix = radix

335

336

self._docket_file = None

336

self._docket_file = None

337

self._indexfile = None

337

self._indexfile = None

338

self._datafile = None

338

self._datafile = None

339

self._sidedatafile = None

339

self._sidedatafile = None

340

self._nodemap_file = None

340

self._nodemap_file = None

341

self.postfix = postfix

341

self.postfix = postfix

342

self._trypending = trypending

342

self._trypending = trypending

343

self._try_split = try_split

343

self._try_split = try_split

344

self.opener = opener

344

self.opener = opener

345

if persistentnodemap:

345

if persistentnodemap:

346

self._nodemap_file = nodemaputil.get_nodemap_file(self)

346

self._nodemap_file = nodemaputil.get_nodemap_file(self)

347

348

assert target[0] in ALL_KINDS

348

assert target[0] in ALL_KINDS

349

assert len(target) == 2

349

assert len(target) == 2

350

self.target = target

350

self.target = target

351

# When True, indexfile is opened with checkambig=True at writing, to

351

# When True, indexfile is opened with checkambig=True at writing, to

352

# avoid file stat ambiguity.

352

# avoid file stat ambiguity.

353

self._checkambig = checkambig

353

self._checkambig = checkambig

354

self._mmaplargeindex = mmaplargeindex

354

self._mmaplargeindex = mmaplargeindex

355

self._censorable = censorable

355

self._censorable = censorable

356

# 3-tuple of (node, rev, text) for a raw revision.

356

# 3-tuple of (node, rev, text) for a raw revision.

357

self._revisioncache = None

357

self._revisioncache = None

358

# Maps rev to chain base rev.

358

# Maps rev to chain base rev.

359

self._chainbasecache = util.lrucachedict(100)

359

self._chainbasecache = util.lrucachedict(100)

360

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

360

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

361

self._chunkcache = (0, b'')

361

self._chunkcache = (0, b'')

362

# How much data to read and cache into the raw revlog data cache.

362

# How much data to read and cache into the raw revlog data cache.

363

self._chunkcachesize = 65536

363

self._chunkcachesize = 65536

364

self._maxchainlen = None

364

self._maxchainlen = None

365

self._deltabothparents = True

365

self._deltabothparents = True

366

self._candidate_group_chunk_size = 0

366

self._candidate_group_chunk_size = 0

367

self._debug_delta = False

367

self._debug_delta = False

368

self.index = None

368

self.index = None

369

self._docket = None

369

self._docket = None

370

self._nodemap_docket = None

370

self._nodemap_docket = None

371

# Mapping of partial identifiers to full nodes.

371

# Mapping of partial identifiers to full nodes.

372

self._pcache = {}

372

self._pcache = {}

373

# Mapping of revision integer to full node.

373

# Mapping of revision integer to full node.

374

self._compengine = b'zlib'

374

self._compengine = b'zlib'

375

self._compengineopts = {}

375

self._compengineopts = {}

376

self._maxdeltachainspan = -1

376

self._maxdeltachainspan = -1

377

self._withsparseread = False

377

self._withsparseread = False

378

self._sparserevlog = False

378

self._sparserevlog = False

379

self.hassidedata = False

379

self.hassidedata = False

380

self._srdensitythreshold = 0.50

380

self._srdensitythreshold = 0.50

381

self._srmingapsize = 262144

381

self._srmingapsize = 262144

382

383

# other optionnals features

383

# other optionnals features

384

385

# might remove rank configuration once the computation has no impact

385

# might remove rank configuration once the computation has no impact

386

self._compute_rank = False

386

self._compute_rank = False

387

388

# Make copy of flag processors so each revlog instance can support

388

# Make copy of flag processors so each revlog instance can support

389

# custom flags.

389

# custom flags.

390

self._flagprocessors = dict(flagutil.flagprocessors)

390

self._flagprocessors = dict(flagutil.flagprocessors)

391

392

# 3-tuple of file handles being used for active writing.

392

# 3-tuple of file handles being used for active writing.

393

self._writinghandles = None

393

self._writinghandles = None

394

# prevent nesting of addgroup

394

# prevent nesting of addgroup

395

self._adding_group = None

395

self._adding_group = None

396

397

self._loadindex()

397

self._loadindex()

398

399

self._concurrencychecker = concurrencychecker

399

self._concurrencychecker = concurrencychecker

400

401

# parent order is supposed to be semantically irrelevant, so we

401

# parent order is supposed to be semantically irrelevant, so we

402

# normally resort parents to ensure that the first parent is non-null,

402

# normally resort parents to ensure that the first parent is non-null,

403

# if there is a non-null parent at all.

403

# if there is a non-null parent at all.

404

# filelog abuses the parent order as flag to mark some instances of

404

# filelog abuses the parent order as flag to mark some instances of

405

# meta-encoded files, so allow it to disable this behavior.

405

# meta-encoded files, so allow it to disable this behavior.

406

self.canonical_parent_order = canonical_parent_order

406

self.canonical_parent_order = canonical_parent_order

407

408

def _init_opts(self):

408

def _init_opts(self):

409

"""process options (from above/config) to setup associated default revlog mode

409

"""process options (from above/config) to setup associated default revlog mode

410

411

These values might be affected when actually reading on disk information.

411

These values might be affected when actually reading on disk information.

412

413

The relevant values are returned for use in _loadindex().

413

The relevant values are returned for use in _loadindex().

414

415

* newversionflags:

415

* newversionflags:

416

version header to use if we need to create a new revlog

416

version header to use if we need to create a new revlog

417

418

* mmapindexthreshold:

418

* mmapindexthreshold:

419

minimal index size for start to use mmap

419

minimal index size for start to use mmap

420

421

* force_nodemap:

421

* force_nodemap:

422

force the usage of a "development" version of the nodemap code

422

force the usage of a "development" version of the nodemap code

423

"""

423

"""

424

mmapindexthreshold = None

424

mmapindexthreshold = None

425

opts = self.opener.options

425

opts = self.opener.options

426

427

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

427

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

428

new_header = CHANGELOGV2

428

new_header = CHANGELOGV2

429

self._compute_rank = opts.get(b'changelogv2.compute-rank', True)

429

self._compute_rank = opts.get(b'changelogv2.compute-rank', True)

430

elif b'revlogv2' in opts:

430

elif b'revlogv2' in opts:

431

new_header = REVLOGV2

431

new_header = REVLOGV2

432

elif b'revlogv1' in opts:

432

elif b'revlogv1' in opts:

433

new_header = REVLOGV1 | FLAG_INLINE_DATA

433

new_header = REVLOGV1 | FLAG_INLINE_DATA

434

if b'generaldelta' in opts:

434

if b'generaldelta' in opts:

435

new_header |= FLAG_GENERALDELTA

435

new_header |= FLAG_GENERALDELTA

436

elif b'revlogv0' in self.opener.options:

436

elif b'revlogv0' in self.opener.options:

437

new_header = REVLOGV0

437

new_header = REVLOGV0

438

else:

438

else:

439

new_header = REVLOG_DEFAULT_VERSION

439

new_header = REVLOG_DEFAULT_VERSION

440

441

if b'chunkcachesize' in opts:

441

if b'chunkcachesize' in opts:

442

self._chunkcachesize = opts[b'chunkcachesize']

442

self._chunkcachesize = opts[b'chunkcachesize']

443

if b'maxchainlen' in opts:

443

if b'maxchainlen' in opts:

444

self._maxchainlen = opts[b'maxchainlen']

444

self._maxchainlen = opts[b'maxchainlen']

445

if b'deltabothparents' in opts:

445

if b'deltabothparents' in opts:

446

self._deltabothparents = opts[b'deltabothparents']

446

self._deltabothparents = opts[b'deltabothparents']

447

dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')

447

dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')

448

if dps_cgds:

448

if dps_cgds:

449

self._candidate_group_chunk_size = dps_cgds

449

self._candidate_group_chunk_size = dps_cgds

450

self._lazydelta = bool(opts.get(b'lazydelta', True))

450

self._lazydelta = bool(opts.get(b'lazydelta', True))

451

self._lazydeltabase = False

451

self._lazydeltabase = False

452

if self._lazydelta:

452

if self._lazydelta:

453

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

453

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

454

if b'debug-delta' in opts:

454

if b'debug-delta' in opts:

455

self._debug_delta = opts[b'debug-delta']

455

self._debug_delta = opts[b'debug-delta']

456

if b'compengine' in opts:

456

if b'compengine' in opts:

457

self._compengine = opts[b'compengine']

457

self._compengine = opts[b'compengine']

458

if b'zlib.level' in opts:

458

if b'zlib.level' in opts:

459

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

459

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

460

if b'zstd.level' in opts:

460

if b'zstd.level' in opts:

461

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

461

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

462

if b'maxdeltachainspan' in opts:

462

if b'maxdeltachainspan' in opts:

463

self._maxdeltachainspan = opts[b'maxdeltachainspan']

463

self._maxdeltachainspan = opts[b'maxdeltachainspan']

464

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

464

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

465

mmapindexthreshold = opts[b'mmapindexthreshold']

465

mmapindexthreshold = opts[b'mmapindexthreshold']

466

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

466

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

467

withsparseread = bool(opts.get(b'with-sparse-read', False))

467

withsparseread = bool(opts.get(b'with-sparse-read', False))

468

# sparse-revlog forces sparse-read

468

# sparse-revlog forces sparse-read

469

self._withsparseread = self._sparserevlog or withsparseread

469

self._withsparseread = self._sparserevlog or withsparseread

470

if b'sparse-read-density-threshold' in opts:

470

if b'sparse-read-density-threshold' in opts:

471

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

471

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

472

if b'sparse-read-min-gap-size' in opts:

472

if b'sparse-read-min-gap-size' in opts:

473

self._srmingapsize = opts[b'sparse-read-min-gap-size']

473

self._srmingapsize = opts[b'sparse-read-min-gap-size']

474

if opts.get(b'enableellipsis'):

474

if opts.get(b'enableellipsis'):

475

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

475

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

476

477

# revlog v0 doesn't have flag processors

477

# revlog v0 doesn't have flag processors

478

for flag, processor in opts.get(b'flagprocessors', {}).items():

478

for flag, processor in opts.get(b'flagprocessors', {}).items():

479

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

479

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

480

481

if self._chunkcachesize <= 0:

481

if self._chunkcachesize <= 0:

482

raise error.RevlogError(

482

raise error.RevlogError(

483

_(b'revlog chunk cache size %r is not greater than 0')

483

_(b'revlog chunk cache size %r is not greater than 0')

484

% self._chunkcachesize

484

% self._chunkcachesize

485

)

485

)

486

elif self._chunkcachesize & (self._chunkcachesize - 1):

486

elif self._chunkcachesize & (self._chunkcachesize - 1):

487

raise error.RevlogError(

487

raise error.RevlogError(

488

_(b'revlog chunk cache size %r is not a power of 2')

488

_(b'revlog chunk cache size %r is not a power of 2')

489

% self._chunkcachesize

489

% self._chunkcachesize

490

)

490

)

491

force_nodemap = opts.get(b'devel-force-nodemap', False)

491

force_nodemap = opts.get(b'devel-force-nodemap', False)

492

return new_header, mmapindexthreshold, force_nodemap

492

return new_header, mmapindexthreshold, force_nodemap

493

494

def _get_data(self, filepath, mmap_threshold, size=None):

494

def _get_data(self, filepath, mmap_threshold, size=None):

495

"""return a file content with or without mmap

495

"""return a file content with or without mmap

496

497

If the file is missing return the empty string"""

497

If the file is missing return the empty string"""

498

try:

498

try:

499

with self.opener(filepath) as fp:

499

with self.opener(filepath) as fp:

500

if mmap_threshold is not None:

500

if mmap_threshold is not None:

501

file_size = self.opener.fstat(fp).st_size

501

file_size = self.opener.fstat(fp).st_size

502

if file_size >= mmap_threshold:

502

if file_size >= mmap_threshold:

503

if size is not None:

503

if size is not None:

504

# avoid potentiel mmap crash

504

# avoid potentiel mmap crash

505

size = min(file_size, size)

505

size = min(file_size, size)

506

# TODO: should .close() to release resources without

506

# TODO: should .close() to release resources without

507

# relying on Python GC

507

# relying on Python GC

508

if size is None:

508

if size is None:

509

return util.buffer(util.mmapread(fp))

509

return util.buffer(util.mmapread(fp))

510

else:

510

else:

511

return util.buffer(util.mmapread(fp, size))

511

return util.buffer(util.mmapread(fp, size))

512

if size is None:

512

if size is None:

513

return fp.read()

513

return fp.read()

514

else:

514

else:

515

return fp.read(size)

515

return fp.read(size)

516

except FileNotFoundError:

516

except FileNotFoundError:

517

return b''

517

return b''

518

519

def get_streams(self, max_linkrev, force_inline=False):

519

def get_streams(self, max_linkrev, force_inline=False):

520

"""return a list of streams that represent this revlog

520

"""return a list of streams that represent this revlog

521

522

This is used by stream-clone to do bytes to bytes copies of a repository.

522

This is used by stream-clone to do bytes to bytes copies of a repository.

523

524

This streams data for all revisions that refer to a changelog revision up

524

This streams data for all revisions that refer to a changelog revision up

525

to `max_linkrev`.

525

to `max_linkrev`.

526

527

If `force_inline` is set, it enforces that the stream will represent an inline revlog.

527

If `force_inline` is set, it enforces that the stream will represent an inline revlog.

528

529

It returns is a list of three-tuple:

529

It returns is a list of three-tuple:

530

531

[

531

[

532

(filename, bytes_stream, stream_size),

532

(filename, bytes_stream, stream_size),

533

…

533

…

534

]

534

]

535

"""

535

"""

536

n = len(self)

536

n = len(self)

537

index = self.index

537

index = self.index

538

while n > 0:

538

while n > 0:

539

linkrev = index[n - 1][4]

539

linkrev = index[n - 1][4]

540

if linkrev < max_linkrev:

540

if linkrev < max_linkrev:

541

break

541

break

542

# note: this loop will rarely go through multiple iterations, since

542

# note: this loop will rarely go through multiple iterations, since

543

# it only traverses commits created during the current streaming

543

# it only traverses commits created during the current streaming

544

# pull operation.

544

# pull operation.

545

#

545

#

546

# If this become a problem, using a binary search should cap the

546

# If this become a problem, using a binary search should cap the

547

# runtime of this.

547

# runtime of this.

548

n = n - 1

548

n = n - 1

549

if n == 0:

549

if n == 0:

550

# no data to send

550

# no data to send

551

return []

551

return []

552

index_size = n * index.entry_size

552

index_size = n * index.entry_size

553

data_size = self.end(n - 1)

553

data_size = self.end(n - 1)

554

555

# XXX we might have been split (or stripped) since the object

555

# XXX we might have been split (or stripped) since the object

556

# initialization, We need to close this race too, but having a way to

556

# initialization, We need to close this race too, but having a way to

557

# pre-open the file we feed to the revlog and never closing them before

557

# pre-open the file we feed to the revlog and never closing them before

558

# we are done streaming.

558

# we are done streaming.

559

560

if self._inline:

560

if self._inline:

561

562

def get_stream():

562

def get_stream():

563

with self._indexfp() as fp:

563

with self._indexfp() as fp:

564

yield None

564

yield None

565

size = index_size + data_size

565

size = index_size + data_size

566

if size <= 65536:

566

if size <= 65536:

567

yield fp.read(size)

567

yield fp.read(size)

568

else:

568

else:

569

yield from util.filechunkiter(fp, limit=size)

569

yield from util.filechunkiter(fp, limit=size)

570

571

inline_stream = get_stream()

571

inline_stream = get_stream()

572

next(inline_stream)

572

next(inline_stream)

573

return [

573

return [

574

(self._indexfile, inline_stream, index_size + data_size),

574

(self._indexfile, inline_stream, index_size + data_size),

575

]

575

]

576

elif force_inline:

576

elif force_inline:

577

578

def get_stream():

578

def get_stream():

579

with self._datafp() as fp_d:

579

with self._datafp() as fp_d:

580

yield None

580

yield None

581

582

for rev in range(n):

582

for rev in range(n):

583

idx = self.index.entry_binary(rev)

583

idx = self.index.entry_binary(rev)

584

if rev == 0 and self._docket is None:

584

if rev == 0 and self._docket is None:

585

# re-inject the inline flag

585

# re-inject the inline flag

586

header = self._format_flags

586

header = self._format_flags

587

header |= self._format_version

587

header |= self._format_version

588

header |= FLAG_INLINE_DATA

588

header |= FLAG_INLINE_DATA

589

header = self.index.pack_header(header)

589

header = self.index.pack_header(header)

590

idx = header + idx

590

idx = header + idx

591

yield idx

591

yield idx

592

yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]

592

yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]

593

594

inline_stream = get_stream()

594

inline_stream = get_stream()

595

next(inline_stream)

595

next(inline_stream)

596

return [

596

return [

597

(self._indexfile, inline_stream, index_size + data_size),

597

(self._indexfile, inline_stream, index_size + data_size),

598

]

598

]

599

else:

599

else:

600

601

def get_index_stream():

601

def get_index_stream():

602

with self._indexfp() as fp:

602

with self._indexfp() as fp:

603

yield None

603

yield None

604

if index_size <= 65536:

604

if index_size <= 65536:

605

yield fp.read(index_size)

605

yield fp.read(index_size)

606

else:

606

else:

607

yield from util.filechunkiter(fp, limit=index_size)

607

yield from util.filechunkiter(fp, limit=index_size)

608

609

def get_data_stream():

609

def get_data_stream():

610

with self._datafp() as fp:

610

with self._datafp() as fp:

611

yield None

611

yield None

612

if data_size <= 65536:

612

if data_size <= 65536:

613

yield fp.read(data_size)

613

yield fp.read(data_size)

614

else:

614

else:

615

yield from util.filechunkiter(fp, limit=data_size)

615

yield from util.filechunkiter(fp, limit=data_size)

616

617

index_stream = get_index_stream()

617

index_stream = get_index_stream()

618

next(index_stream)

618

next(index_stream)

619

data_stream = get_data_stream()

619

data_stream = get_data_stream()

620

next(data_stream)

620

next(data_stream)

621

return [

621

return [

622

(self._datafile, data_stream, data_size),

622

(self._datafile, data_stream, data_size),

623

(self._indexfile, index_stream, index_size),

623

(self._indexfile, index_stream, index_size),

624

]

624

]

625

626

def _loadindex(self, docket=None):

626

def _loadindex(self, docket=None):

627

628

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

628

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

629

630

if self.postfix is not None:

630

if self.postfix is not None:

631

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

631

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

632

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

632

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

633

entry_point = b'%s.i.a' % self.radix

633

entry_point = b'%s.i.a' % self.radix

634

elif self._try_split and self.opener.exists(self._split_index_file):

634

elif self._try_split and self.opener.exists(self._split_index_file):

635

entry_point = self._split_index_file

635

entry_point = self._split_index_file

636

else:

636

else:

637

entry_point = b'%s.i' % self.radix

637

entry_point = b'%s.i' % self.radix

638

639

if docket is not None:

639

if docket is not None:

640

self._docket = docket

640

self._docket = docket

641

self._docket_file = entry_point

641

self._docket_file = entry_point

642

else:

642

else:

643

self._initempty = True

643

self._initempty = True

644

entry_data = self._get_data(entry_point, mmapindexthreshold)

644

entry_data = self._get_data(entry_point, mmapindexthreshold)

645

if len(entry_data) > 0:

645

if len(entry_data) > 0:

646

header = INDEX_HEADER.unpack(entry_data[:4])[0]

646

header = INDEX_HEADER.unpack(entry_data[:4])[0]

647

self._initempty = False

647

self._initempty = False

648

else:

648

else:

649

header = new_header

649

header = new_header

650

651

self._format_flags = header & ~0xFFFF

651

self._format_flags = header & ~0xFFFF

652

self._format_version = header & 0xFFFF

652

self._format_version = header & 0xFFFF

653

654

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

654

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

655

if supported_flags is None:

655

if supported_flags is None:

656

msg = _(b'unknown version (%d) in revlog %s')

656

msg = _(b'unknown version (%d) in revlog %s')

657

msg %= (self._format_version, self.display_id)

657

msg %= (self._format_version, self.display_id)

658

raise error.RevlogError(msg)

658

raise error.RevlogError(msg)

659

elif self._format_flags & ~supported_flags:

659

elif self._format_flags & ~supported_flags:

660

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

660

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

661

display_flag = self._format_flags >> 16

661

display_flag = self._format_flags >> 16

662

msg %= (display_flag, self._format_version, self.display_id)

662

msg %= (display_flag, self._format_version, self.display_id)

663

raise error.RevlogError(msg)

663

raise error.RevlogError(msg)

664

665

features = FEATURES_BY_VERSION[self._format_version]

665

features = FEATURES_BY_VERSION[self._format_version]

666

self._inline = features[b'inline'](self._format_flags)

666

self._inline = features[b'inline'](self._format_flags)

667

self._generaldelta = features[b'generaldelta'](self._format_flags)

667

self._generaldelta = features[b'generaldelta'](self._format_flags)

668

self.hassidedata = features[b'sidedata']

668

self.hassidedata = features[b'sidedata']

669

670

if not features[b'docket']:

670

if not features[b'docket']:

671

self._indexfile = entry_point

671

self._indexfile = entry_point

672

index_data = entry_data

672

index_data = entry_data

673

else:

673

else:

674

self._docket_file = entry_point

674

self._docket_file = entry_point

675

if self._initempty:

675

if self._initempty:

676

self._docket = docketutil.default_docket(self, header)

676

self._docket = docketutil.default_docket(self, header)

677

else:

677

else:

678

self._docket = docketutil.parse_docket(

678

self._docket = docketutil.parse_docket(

679

self, entry_data, use_pending=self._trypending

679

self, entry_data, use_pending=self._trypending

680

)

680

)

681

682

if self._docket is not None:

682

if self._docket is not None:

683

self._indexfile = self._docket.index_filepath()

683

self._indexfile = self._docket.index_filepath()

684

index_data = b''

684

index_data = b''

685

index_size = self._docket.index_end

685

index_size = self._docket.index_end

686

if index_size > 0:

686

if index_size > 0:

687

index_data = self._get_data(

687

index_data = self._get_data(

688

self._indexfile, mmapindexthreshold, size=index_size

688

self._indexfile, mmapindexthreshold, size=index_size

689

)

689

)

690

if len(index_data) < index_size:

690

if len(index_data) < index_size:

691

msg = _(b'too few index data for %s: got %d, expected %d')

691

msg = _(b'too few index data for %s: got %d, expected %d')

692

msg %= (self.display_id, len(index_data), index_size)

692

msg %= (self.display_id, len(index_data), index_size)

693

raise error.RevlogError(msg)

693

raise error.RevlogError(msg)

694

695

self._inline = False

695

self._inline = False

696

# generaldelta implied by version 2 revlogs.

696

# generaldelta implied by version 2 revlogs.

697

self._generaldelta = True

697

self._generaldelta = True

698

# the logic for persistent nodemap will be dealt with within the

698

# the logic for persistent nodemap will be dealt with within the

699

# main docket, so disable it for now.

699

# main docket, so disable it for now.

700

self._nodemap_file = None

700

self._nodemap_file = None

701

702

if self._docket is not None:

702

if self._docket is not None:

703

self._datafile = self._docket.data_filepath()

703

self._datafile = self._docket.data_filepath()

704

self._sidedatafile = self._docket.sidedata_filepath()

704

self._sidedatafile = self._docket.sidedata_filepath()

705

elif self.postfix is None:

705

elif self.postfix is None:

706

self._datafile = b'%s.d' % self.radix

706

self._datafile = b'%s.d' % self.radix

707

else:

707

else:

708

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

708

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

709

710

self.nodeconstants = sha1nodeconstants

710

self.nodeconstants = sha1nodeconstants

711

self.nullid = self.nodeconstants.nullid

711

self.nullid = self.nodeconstants.nullid

712

713

# sparse-revlog can't be on without general-delta (issue6056)

713

# sparse-revlog can't be on without general-delta (issue6056)

714

if not self._generaldelta:

714

if not self._generaldelta:

715

self._sparserevlog = False

715

self._sparserevlog = False

716

717

self._storedeltachains = True

717

self._storedeltachains = True

718

719

devel_nodemap = (

719

devel_nodemap = (

720

self._nodemap_file

720

self._nodemap_file

721

and force_nodemap

721

and force_nodemap

722

and parse_index_v1_nodemap is not None

722

and parse_index_v1_nodemap is not None

723

)

723

)

724

725

use_rust_index = False

725

use_rust_index = False

726

if rustrevlog is not None:

726

if rustrevlog is not None:

727

if self._nodemap_file is not None:

727

if self._nodemap_file is not None:

728

use_rust_index = True

728

use_rust_index = True

729

else:

729

else:

730

use_rust_index = self.opener.options.get(b'rust.index')

730

use_rust_index = self.opener.options.get(b'rust.index')

731

732

self._parse_index = parse_index_v1

732

self._parse_index = parse_index_v1

733

if self._format_version == REVLOGV0:

733

if self._format_version == REVLOGV0:

734

self._parse_index = revlogv0.parse_index_v0

734

self._parse_index = revlogv0.parse_index_v0

735

elif self._format_version == REVLOGV2:

735

elif self._format_version == REVLOGV2:

736

self._parse_index = parse_index_v2

736

self._parse_index = parse_index_v2

737

elif self._format_version == CHANGELOGV2:

737

elif self._format_version == CHANGELOGV2:

738

self._parse_index = parse_index_cl_v2

738

self._parse_index = parse_index_cl_v2

739

elif devel_nodemap:

739

elif devel_nodemap:

740

self._parse_index = parse_index_v1_nodemap

740

self._parse_index = parse_index_v1_nodemap

741

elif use_rust_index:

741

elif use_rust_index:

742

self._parse_index = parse_index_v1_mixed

742

self._parse_index = parse_index_v1_mixed

743

try:

743

try:

744

d = self._parse_index(index_data, self._inline)

744

d = self._parse_index(index_data, self._inline)

745

index, chunkcache = d

745

index, chunkcache = d

746

use_nodemap = (

746

use_nodemap = (

747

not self._inline

747

not self._inline

748

and self._nodemap_file is not None

748

and self._nodemap_file is not None

749

and hasattr(index, 'update_nodemap_data')

749

and hasattr(index, 'update_nodemap_data')

750

)

750

)

751

if use_nodemap:

751

if use_nodemap:

752

nodemap_data = nodemaputil.persisted_data(self)

752

nodemap_data = nodemaputil.persisted_data(self)

753

if nodemap_data is not None:

753

if nodemap_data is not None:

754

docket = nodemap_data[0]

754

docket = nodemap_data[0]

755

if (

755

if (

756

len(d[0]) > docket.tip_rev

756

len(d[0]) > docket.tip_rev

757

and d[0][docket.tip_rev][7] == docket.tip_node

757

and d[0][docket.tip_rev][7] == docket.tip_node

758

):

758

):

759

# no changelog tampering

759

# no changelog tampering

760

self._nodemap_docket = docket

760

self._nodemap_docket = docket

761

index.update_nodemap_data(*nodemap_data)

761

index.update_nodemap_data(*nodemap_data)

762

except (ValueError, IndexError):

762

except (ValueError, IndexError):

763

raise error.RevlogError(

763

raise error.RevlogError(

764

_(b"index %s is corrupted") % self.display_id

764

_(b"index %s is corrupted") % self.display_id

765

)

765

)

766

self.index = index

766

self.index = index

767

self._segmentfile = randomaccessfile.randomaccessfile(

767

self._segmentfile = randomaccessfile.randomaccessfile(

768

self.opener,

768

self.opener,

769

(self._indexfile if self._inline else self._datafile),

769

(self._indexfile if self._inline else self._datafile),

770

self._chunkcachesize,

770

self._chunkcachesize,

771

chunkcache,

771

chunkcache,

772

)

772

)

773

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

773

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

774

self.opener,

774

self.opener,

775

self._sidedatafile,

775

self._sidedatafile,

776

self._chunkcachesize,

776

self._chunkcachesize,

777

)

777

)

778

# revnum -> (chain-length, sum-delta-length)

778

# revnum -> (chain-length, sum-delta-length)

779

self._chaininfocache = util.lrucachedict(500)

779

self._chaininfocache = util.lrucachedict(500)

780

# revlog header -> revlog compressor

780

# revlog header -> revlog compressor

781

self._decompressors = {}

781

self._decompressors = {}

782

783

def get_revlog(self):

783

def get_revlog(self):

784

"""simple function to mirror API of other not-really-revlog API"""

784

"""simple function to mirror API of other not-really-revlog API"""

785

return self

785

return self

786

787

@util.propertycache

787

@util.propertycache

788

def revlog_kind(self):

788

def revlog_kind(self):

789

return self.target[0]

789

return self.target[0]

790

791

@util.propertycache

791

@util.propertycache

792

def display_id(self):

792

def display_id(self):

793

"""The public facing "ID" of the revlog that we use in message"""

793

"""The public facing "ID" of the revlog that we use in message"""

794

if self.revlog_kind == KIND_FILELOG:

794

if self.revlog_kind == KIND_FILELOG:

795

# Reference the file without the "data/" prefix, so it is familiar

795

# Reference the file without the "data/" prefix, so it is familiar

796

# to the user.

796

# to the user.

797

return self.target[1]

797

return self.target[1]

798

else:

798

else:

799

return self.radix

799

return self.radix

800

801

def _get_decompressor(self, t):

801

def _get_decompressor(self, t):

802

try:

802

try:

803

compressor = self._decompressors[t]

803

compressor = self._decompressors[t]

804

except KeyError:

804

except KeyError:

805

try:

805

try:

806

engine = util.compengines.forrevlogheader(t)

806

engine = util.compengines.forrevlogheader(t)

807

compressor = engine.revlogcompressor(self._compengineopts)

807

compressor = engine.revlogcompressor(self._compengineopts)

808

self._decompressors[t] = compressor

808

self._decompressors[t] = compressor

809

except KeyError:

809

except KeyError:

810

raise error.RevlogError(

810

raise error.RevlogError(

811

_(b'unknown compression type %s') % binascii.hexlify(t)

811

_(b'unknown compression type %s') % binascii.hexlify(t)

812

)

812

)

813

return compressor

813

return compressor

814

815

@util.propertycache

815

@util.propertycache

816

def _compressor(self):

816

def _compressor(self):

817

engine = util.compengines[self._compengine]

817

engine = util.compengines[self._compengine]

818

return engine.revlogcompressor(self._compengineopts)

818

return engine.revlogcompressor(self._compengineopts)

819

820

@util.propertycache

820

@util.propertycache

821

def _decompressor(self):

821

def _decompressor(self):

822

"""the default decompressor"""

822

"""the default decompressor"""

823

if self._docket is None:

823

if self._docket is None:

824

return None

824

return None

825

t = self._docket.default_compression_header

825

t = self._docket.default_compression_header

826

c = self._get_decompressor(t)

826

c = self._get_decompressor(t)

827

return c.decompress

827

return c.decompress

828

829

def _indexfp(self):

829

def _indexfp(self):

830

"""file object for the revlog's index file"""

830

"""file object for the revlog's index file"""

831

return self.opener(self._indexfile, mode=b"r")

831

return self.opener(self._indexfile, mode=b"r")

832

833

def __index_write_fp(self):

833

def __index_write_fp(self):

834

# You should not use this directly and use `_writing` instead

834

# You should not use this directly and use `_writing` instead

835

try:

835

try:

836

f = self.opener(

836

f = self.opener(

837

self._indexfile, mode=b"r+", checkambig=self._checkambig

837

self._indexfile, mode=b"r+", checkambig=self._checkambig

838

)

838

)

839

if self._docket is None:

839

if self._docket is None:

840

f.seek(0, os.SEEK_END)

840

f.seek(0, os.SEEK_END)

841

else:

841

else:

842

f.seek(self._docket.index_end, os.SEEK_SET)

842

f.seek(self._docket.index_end, os.SEEK_SET)

843

return f

843

return f

844

except FileNotFoundError:

844

except FileNotFoundError:

845

return self.opener(

845

return self.opener(

846

self._indexfile, mode=b"w+", checkambig=self._checkambig

846

self._indexfile, mode=b"w+", checkambig=self._checkambig

847

)

847

)

848

849

def __index_new_fp(self):

849

def __index_new_fp(self):

850

# You should not use this unless you are upgrading from inline revlog

850

# You should not use this unless you are upgrading from inline revlog

851

return self.opener(

851

return self.opener(

852

self._indexfile,

852

self._indexfile,

853

mode=b"w",

853

mode=b"w",

854

checkambig=self._checkambig,

854

checkambig=self._checkambig,

855

atomictemp=True,

855

atomictemp=True,

856

)

856

)

857

858

def _datafp(self, mode=b'r'):

858

def _datafp(self, mode=b'r'):

859

"""file object for the revlog's data file"""

859

"""file object for the revlog's data file"""

860

return self.opener(self._datafile, mode=mode)

860

return self.opener(self._datafile, mode=mode)

861

862

@contextlib.contextmanager

862

@contextlib.contextmanager

863

def _sidedatareadfp(self):

863

def _sidedatareadfp(self):

864

"""file object suitable to read sidedata"""

864

"""file object suitable to read sidedata"""

865

if self._writinghandles:

865

if self._writinghandles:

866

yield self._writinghandles[2]

866

yield self._writinghandles[2]

867

else:

867

else:

868

with self.opener(self._sidedatafile) as fp:

868

with self.opener(self._sidedatafile) as fp:

869

yield fp

869

yield fp

870

871

def tiprev(self):

871

def tiprev(self):

872

return len(self.index) - 1

872

return len(self.index) - 1

873

874

def tip(self):

874

def tip(self):

875

return self.node(self.tiprev())

875

return self.node(self.tiprev())

876

877

def __contains__(self, rev):

877

def __contains__(self, rev):

878

return 0 <= rev < len(self)

878

return 0 <= rev < len(self)

879

880

def __len__(self):

880

def __len__(self):

881

return len(self.index)

881

return len(self.index)

882

883

def __iter__(self):

883

def __iter__(self):

884

return iter(range(len(self)))

884

return iter(range(len(self)))

885

886

def revs(self, start=0, stop=None):

886

def revs(self, start=0, stop=None):

887

"""iterate over all rev in this revlog (from start to stop)"""

887

"""iterate over all rev in this revlog (from start to stop)"""

888

return storageutil.iterrevs(len(self), start=start, stop=stop)

888

return storageutil.iterrevs(len(self), start=start, stop=stop)

889

890

def hasnode(self, node):

890

def hasnode(self, node):

891

try:

891

try:

892

self.rev(node)

892

self.rev(node)

893

return True

893

return True

894

except KeyError:

894

except KeyError:

895

return False

895

return False

896

897

def _candelta(self, baserev, rev):

897

def _candelta(self, baserev, rev):

898

"""whether two revisions (baserev, rev) can be delta-ed or not"""

898

"""whether two revisions (baserev, rev) can be delta-ed or not"""

899

# Disable delta if either rev requires a content-changing flag

899

# Disable delta if either rev requires a content-changing flag

900

# processor (ex. LFS). This is because such flag processor can alter

900

# processor (ex. LFS). This is because such flag processor can alter

901

# the rawtext content that the delta will be based on, and two clients

901

# the rawtext content that the delta will be based on, and two clients

902

# could have a same revlog node with different flags (i.e. different

902

# could have a same revlog node with different flags (i.e. different

903

# rawtext contents) and the delta could be incompatible.

903

# rawtext contents) and the delta could be incompatible.

904

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

904

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

905

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

905

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

906

):

906

):

907

return False

907

return False

908

return True

908

return True

909

910

def update_caches(self, transaction):

910

def update_caches(self, transaction):

911

"""update on disk cache

911

"""update on disk cache

912

913

If a transaction is passed, the update may be delayed to transaction

913

If a transaction is passed, the update may be delayed to transaction

914

commit."""

914

commit."""

915

if self._nodemap_file is not None:

915

if self._nodemap_file is not None:

916

if transaction is None:

916

if transaction is None:

917

nodemaputil.update_persistent_nodemap(self)

917

nodemaputil.update_persistent_nodemap(self)

918

else:

918

else:

919

nodemaputil.setup_persistent_nodemap(transaction, self)

919

nodemaputil.setup_persistent_nodemap(transaction, self)

920

921

def clearcaches(self):

921

def clearcaches(self):

922

"""Clear in-memory caches"""

922

self._revisioncache = None

923

self._revisioncache = None

923

self._chainbasecache.clear()

924

self._chainbasecache.clear()

924

self._segmentfile.clear_cache()

925

self._segmentfile.clear_cache()

925

self._segmentfile_sidedata.clear_cache()

926

self._segmentfile_sidedata.clear_cache()

926

self._pcache = {}

927

self._pcache = {}

927

self._nodemap_docket = None

928

self._nodemap_docket = None

928

self.index.clearcaches()

929

self.index.clearcaches()

929

# The python code is the one responsible for validating the docket, we

930

# The python code is the one responsible for validating the docket, we

930

# end up having to refresh it here.

931

# end up having to refresh it here.

931

use_nodemap = (

932

use_nodemap = (

932

not self._inline

933

not self._inline

933

and self._nodemap_file is not None

934

and self._nodemap_file is not None

934

and hasattr(self.index, 'update_nodemap_data')

935

and hasattr(self.index, 'update_nodemap_data')

935

)

936

)

936

if use_nodemap:

937

if use_nodemap:

937

nodemap_data = nodemaputil.persisted_data(self)

938

nodemap_data = nodemaputil.persisted_data(self)

938

if nodemap_data is not None:

939

if nodemap_data is not None:

939

self._nodemap_docket = nodemap_data[0]

940

self._nodemap_docket = nodemap_data[0]

940

self.index.update_nodemap_data(*nodemap_data)

941

self.index.update_nodemap_data(*nodemap_data)

941

942

def rev(self, node):

943

def rev(self, node):

943

try:

944

try:

944

return self.index.rev(node)

945

return self.index.rev(node)

945

except TypeError:

946

except TypeError:

946

raise

947

raise

947

except error.RevlogError:

948

except error.RevlogError:

948

# parsers.c radix tree lookup failed

949

# parsers.c radix tree lookup failed

949

if (

950

if (

950

node == self.nodeconstants.wdirid

951

node == self.nodeconstants.wdirid

951

or node in self.nodeconstants.wdirfilenodeids

952

or node in self.nodeconstants.wdirfilenodeids

952

):

953

):

953

raise error.WdirUnsupported

954

raise error.WdirUnsupported

954

raise error.LookupError(node, self.display_id, _(b'no node'))

955

raise error.LookupError(node, self.display_id, _(b'no node'))

955

956

# Accessors for index entries.

957

# Accessors for index entries.

957

958

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

959

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

959

# are flags.

960

# are flags.

960

def start(self, rev):

961

def start(self, rev):

961

return int(self.index[rev][0] >> 16)

962

return int(self.index[rev][0] >> 16)

962

963

def sidedata_cut_off(self, rev):

964

def sidedata_cut_off(self, rev):

964

sd_cut_off = self.index[rev][8]

965

sd_cut_off = self.index[rev][8]

965

if sd_cut_off != 0:

966

if sd_cut_off != 0:

966

return sd_cut_off

967

return sd_cut_off

967

# This is some annoying dance, because entries without sidedata

968

# This is some annoying dance, because entries without sidedata

968

# currently use 0 as their ofsset. (instead of previous-offset +

969

# currently use 0 as their ofsset. (instead of previous-offset +

969

# previous-size)

970

# previous-size)

970

#

971

#

971

# We should reconsider this sidedata → 0 sidata_offset policy.

972

# We should reconsider this sidedata → 0 sidata_offset policy.

972

# In the meantime, we need this.

973

# In the meantime, we need this.

973

while 0 <= rev:

974

while 0 <= rev:

974

e = self.index[rev]

975

e = self.index[rev]

975

if e[9] != 0:

976

if e[9] != 0:

976

return e[8] + e[9]

977

return e[8] + e[9]

977

rev -= 1

978

rev -= 1

978

return 0

979

return 0

979

980

def flags(self, rev):

981

def flags(self, rev):

981

return self.index[rev][0] & 0xFFFF

982

return self.index[rev][0] & 0xFFFF

982

983

def length(self, rev):

984

def length(self, rev):

984

return self.index[rev][1]

985

return self.index[rev][1]

985

986

def sidedata_length(self, rev):

987

def sidedata_length(self, rev):

987

if not self.hassidedata:

988

if not self.hassidedata:

988

return 0

989

return 0

989

return self.index[rev][9]

990

return self.index[rev][9]

990

991

def rawsize(self, rev):

992

def rawsize(self, rev):

992

"""return the length of the uncompressed text for a given revision"""

993

"""return the length of the uncompressed text for a given revision"""

993

l = self.index[rev][2]

994

l = self.index[rev][2]

994

if l >= 0:

995

if l >= 0:

995

return l

996

return l

996

997

t = self.rawdata(rev)

998

t = self.rawdata(rev)

998

return len(t)

999

return len(t)

999

1000

def size(self, rev):

1001

def size(self, rev):

1001

"""length of non-raw text (processed by a "read" flag processor)"""

1002

"""length of non-raw text (processed by a "read" flag processor)"""

1002

# fast path: if no "read" flag processor could change the content,

1003

# fast path: if no "read" flag processor could change the content,

1003

# size is rawsize. note: ELLIPSIS is known to not change the content.

1004

# size is rawsize. note: ELLIPSIS is known to not change the content.

1004

flags = self.flags(rev)

1005

flags = self.flags(rev)

1005

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1006

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1006

return self.rawsize(rev)

1007

return self.rawsize(rev)

1007

1008

return len(self.revision(rev))

1009

return len(self.revision(rev))

1009

1010

def fast_rank(self, rev):

1011

def fast_rank(self, rev):

1011

"""Return the rank of a revision if already known, or None otherwise.

1012

"""Return the rank of a revision if already known, or None otherwise.

1012

1013

The rank of a revision is the size of the sub-graph it defines as a

1014

The rank of a revision is the size of the sub-graph it defines as a

1014

head. Equivalently, the rank of a revision `r` is the size of the set

1015

head. Equivalently, the rank of a revision `r` is the size of the set

1015

`ancestors(r)`, `r` included.

1016

`ancestors(r)`, `r` included.

1016

1017

This method returns the rank retrieved from the revlog in constant

1018

This method returns the rank retrieved from the revlog in constant

1018

time. It makes no attempt at computing unknown values for versions of

1019

time. It makes no attempt at computing unknown values for versions of

1019

the revlog which do not persist the rank.

1020

the revlog which do not persist the rank.

1020

"""

1021

"""

1021

rank = self.index[rev][ENTRY_RANK]

1022

rank = self.index[rev][ENTRY_RANK]

1022

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

1023

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

1023

return None

1024

return None

1024

if rev == nullrev:

1025

if rev == nullrev:

1025

return 0 # convention

1026

return 0 # convention

1026

return rank

1027

return rank

1027

1028

def chainbase(self, rev):

1029

def chainbase(self, rev):

1029

base = self._chainbasecache.get(rev)

1030

base = self._chainbasecache.get(rev)

1030

if base is not None:

1031

if base is not None:

1031

return base

1032

return base

1032

1033

index = self.index

1034

index = self.index

1034

iterrev = rev

1035

iterrev = rev

1035

base = index[iterrev][3]

1036

base = index[iterrev][3]

1036

while base != iterrev:

1037

while base != iterrev:

1037

iterrev = base

1038

iterrev = base

1038

base = index[iterrev][3]

1039

base = index[iterrev][3]

1039

1040

self._chainbasecache[rev] = base

1041

self._chainbasecache[rev] = base

1041

return base

1042

return base

1042

1043

def linkrev(self, rev):

1044

def linkrev(self, rev):

1044

return self.index[rev][4]

1045

return self.index[rev][4]

1045

1046

def parentrevs(self, rev):

1047

def parentrevs(self, rev):

1047

try:

1048

try:

1048

entry = self.index[rev]

1049

entry = self.index[rev]

1049

except IndexError:

1050

except IndexError:

1050

if rev == wdirrev:

1051

if rev == wdirrev:

1051

raise error.WdirUnsupported

1052

raise error.WdirUnsupported

1052

raise

1053

raise

1053

1054

if self.canonical_parent_order and entry[5] == nullrev:

1055

if self.canonical_parent_order and entry[5] == nullrev:

1055

return entry[6], entry[5]

1056

return entry[6], entry[5]

1056

else:

1057

else:

1057

return entry[5], entry[6]

1058

return entry[5], entry[6]

1058

1059

# fast parentrevs(rev) where rev isn't filtered

1060

# fast parentrevs(rev) where rev isn't filtered

1060

_uncheckedparentrevs = parentrevs

1061

_uncheckedparentrevs = parentrevs

1061

1062

def node(self, rev):

1063

def node(self, rev):

1063

try:

1064

try:

1064

return self.index[rev][7]

1065

return self.index[rev][7]

1065

except IndexError:

1066

except IndexError:

1066

if rev == wdirrev:

1067

if rev == wdirrev:

1067

raise error.WdirUnsupported

1068

raise error.WdirUnsupported

1068

raise

1069

raise

1069

1070

# Derived from index values.

1071

# Derived from index values.

1071

1072

def end(self, rev):

1073

def end(self, rev):

1073

return self.start(rev) + self.length(rev)

1074

return self.start(rev) + self.length(rev)

1074

1075

def parents(self, node):

1076

def parents(self, node):

1076

i = self.index

1077

i = self.index

1077

d = i[self.rev(node)]

1078

d = i[self.rev(node)]

1078

# inline node() to avoid function call overhead

1079

# inline node() to avoid function call overhead

1079

if self.canonical_parent_order and d[5] == self.nullid:

1080

if self.canonical_parent_order and d[5] == self.nullid:

1080

return i[d[6]][7], i[d[5]][7]

1081

return i[d[6]][7], i[d[5]][7]

1081

else:

1082

else:

1082

return i[d[5]][7], i[d[6]][7]

1083

return i[d[5]][7], i[d[6]][7]

1083

1084

def chainlen(self, rev):

1085

def chainlen(self, rev):

1085

return self._chaininfo(rev)[0]

1086

return self._chaininfo(rev)[0]

1086

1087

def _chaininfo(self, rev):

1088

def _chaininfo(self, rev):

1088

chaininfocache = self._chaininfocache

1089

chaininfocache = self._chaininfocache

1089

if rev in chaininfocache:

1090

if rev in chaininfocache:

1090

return chaininfocache[rev]

1091

return chaininfocache[rev]

1091

index = self.index

1092

index = self.index

1092

generaldelta = self._generaldelta

1093

generaldelta = self._generaldelta

1093

iterrev = rev

1094

iterrev = rev

1094

e = index[iterrev]

1095

e = index[iterrev]

1095

clen = 0

1096

clen = 0

1096

compresseddeltalen = 0

1097

compresseddeltalen = 0

1097

while iterrev != e[3]:

1098

while iterrev != e[3]:

1098

clen += 1

1099

clen += 1

1099

compresseddeltalen += e[1]

1100

compresseddeltalen += e[1]

1100

if generaldelta:

1101

if generaldelta:

1101

iterrev = e[3]

1102

iterrev = e[3]

1102

else:

1103

else:

1103

iterrev -= 1

1104

iterrev -= 1

1104

if iterrev in chaininfocache:

1105

if iterrev in chaininfocache:

1105

t = chaininfocache[iterrev]

1106

t = chaininfocache[iterrev]

1106

clen += t[0]

1107

clen += t[0]

1107

compresseddeltalen += t[1]

1108

compresseddeltalen += t[1]

1108

break

1109

break

1109

e = index[iterrev]

1110

e = index[iterrev]

1110

else:

1111

else:

1111

# Add text length of base since decompressing that also takes

1112

# Add text length of base since decompressing that also takes

1112

# work. For cache hits the length is already included.

1113

# work. For cache hits the length is already included.

1113

compresseddeltalen += e[1]

1114

compresseddeltalen += e[1]

1114

r = (clen, compresseddeltalen)

1115

r = (clen, compresseddeltalen)

1115

chaininfocache[rev] = r

1116

chaininfocache[rev] = r

1116

return r

1117

return r

1117

1118

def _deltachain(self, rev, stoprev=None):

1119

def _deltachain(self, rev, stoprev=None):

1119

"""Obtain the delta chain for a revision.

1120

"""Obtain the delta chain for a revision.

1120

1121

``stoprev`` specifies a revision to stop at. If not specified, we

1122

``stoprev`` specifies a revision to stop at. If not specified, we

1122

stop at the base of the chain.

1123

stop at the base of the chain.

1123

1124

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1125

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1125

revs in ascending order and ``stopped`` is a bool indicating whether

1126

revs in ascending order and ``stopped`` is a bool indicating whether

1126

``stoprev`` was hit.

1127

``stoprev`` was hit.

1127

"""

1128

"""

1128

# Try C implementation.

1129

# Try C implementation.

1129

try:

1130

try:

1130

return self.index.deltachain(rev, stoprev, self._generaldelta)

1131

return self.index.deltachain(rev, stoprev, self._generaldelta)

1131

except AttributeError:

1132

except AttributeError:

1132

pass

1133

pass

1133

1134

chain = []

1135

chain = []

1135

1136

# Alias to prevent attribute lookup in tight loop.

1137

# Alias to prevent attribute lookup in tight loop.

1137

index = self.index

1138

index = self.index

1138

generaldelta = self._generaldelta

1139

generaldelta = self._generaldelta

1139

1140

iterrev = rev

1141

iterrev = rev

1141

e = index[iterrev]

1142

e = index[iterrev]

1142

while iterrev != e[3] and iterrev != stoprev:

1143

while iterrev != e[3] and iterrev != stoprev:

1143

chain.append(iterrev)

1144

chain.append(iterrev)

1144

if generaldelta:

1145

if generaldelta:

1145

iterrev = e[3]

1146

iterrev = e[3]

1146

else:

1147

else:

1147

iterrev -= 1

1148

iterrev -= 1

1148

e = index[iterrev]

1149

e = index[iterrev]

1149

1150

if iterrev == stoprev:

1151

if iterrev == stoprev:

1151

stopped = True

1152

stopped = True

1152

else:

1153

else:

1153

chain.append(iterrev)

1154

chain.append(iterrev)

1154

stopped = False

1155

stopped = False

1155

1156

chain.reverse()

1157

chain.reverse()

1157

return chain, stopped

1158

return chain, stopped

1158

1159

def ancestors(self, revs, stoprev=0, inclusive=False):

1160

def ancestors(self, revs, stoprev=0, inclusive=False):

1160

"""Generate the ancestors of 'revs' in reverse revision order.

1161

"""Generate the ancestors of 'revs' in reverse revision order.

1161

Does not generate revs lower than stoprev.

1162

Does not generate revs lower than stoprev.

1162

1163

See the documentation for ancestor.lazyancestors for more details."""

1164

See the documentation for ancestor.lazyancestors for more details."""

1164

1165

# first, make sure start revisions aren't filtered

1166

# first, make sure start revisions aren't filtered

1166

revs = list(revs)

1167

revs = list(revs)

1167

checkrev = self.node

1168

checkrev = self.node

1168

for r in revs:

1169

for r in revs:

1169

checkrev(r)

1170

checkrev(r)

1170

# and we're sure ancestors aren't filtered as well

1171

# and we're sure ancestors aren't filtered as well

1171

1172

if rustancestor is not None and self.index.rust_ext_compat:

1173

if rustancestor is not None and self.index.rust_ext_compat:

1173

lazyancestors = rustancestor.LazyAncestors

1174

lazyancestors = rustancestor.LazyAncestors

1174

arg = self.index

1175

arg = self.index

1175

else:

1176

else:

1176

lazyancestors = ancestor.lazyancestors

1177

lazyancestors = ancestor.lazyancestors

1177

arg = self._uncheckedparentrevs

1178

arg = self._uncheckedparentrevs

1178

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1179

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1179

1180

def descendants(self, revs):

1181

def descendants(self, revs):

1181

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1182

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1182

1183

def findcommonmissing(self, common=None, heads=None):

1184

def findcommonmissing(self, common=None, heads=None):

1184

"""Return a tuple of the ancestors of common and the ancestors of heads

1185

"""Return a tuple of the ancestors of common and the ancestors of heads

1185

that are not ancestors of common. In revset terminology, we return the

1186

that are not ancestors of common. In revset terminology, we return the

1186

tuple:

1187

tuple:

1187

1188

::common, (::heads) - (::common)

1189

::common, (::heads) - (::common)

1189

1190

The list is sorted by revision number, meaning it is

1191

The list is sorted by revision number, meaning it is

1191

topologically sorted.

1192

topologically sorted.

1192

1193

'heads' and 'common' are both lists of node IDs. If heads is

1194

'heads' and 'common' are both lists of node IDs. If heads is

1194

not supplied, uses all of the revlog's heads. If common is not

1195

not supplied, uses all of the revlog's heads. If common is not

1195

supplied, uses nullid."""

1196

supplied, uses nullid."""

1196

if common is None:

1197

if common is None:

1197

common = [self.nullid]

1198

common = [self.nullid]

1198

if heads is None:

1199

if heads is None:

1199

heads = self.heads()

1200

heads = self.heads()

1200

1201

common = [self.rev(n) for n in common]

1202

common = [self.rev(n) for n in common]

1202

heads = [self.rev(n) for n in heads]

1203

heads = [self.rev(n) for n in heads]

1203

1204

# we want the ancestors, but inclusive

1205

# we want the ancestors, but inclusive

1205

class lazyset:

1206

class lazyset:

1206

def __init__(self, lazyvalues):

1207

def __init__(self, lazyvalues):

1207

self.addedvalues = set()

1208

self.addedvalues = set()

1208

self.lazyvalues = lazyvalues

1209

self.lazyvalues = lazyvalues

1209

1210

def __contains__(self, value):

1211

def __contains__(self, value):

1211

return value in self.addedvalues or value in self.lazyvalues

1212

return value in self.addedvalues or value in self.lazyvalues

1212

1213

def __iter__(self):

1214

def __iter__(self):

1214

added = self.addedvalues

1215

added = self.addedvalues

1215

for r in added:

1216

for r in added:

1216

yield r

1217

yield r

1217

for r in self.lazyvalues:

1218

for r in self.lazyvalues:

1218

if not r in added:

1219

if not r in added:

1219

yield r

1220

yield r

1220

1221

def add(self, value):

1222

def add(self, value):

1222

self.addedvalues.add(value)

1223

self.addedvalues.add(value)

1223

1224

def update(self, values):

1225

def update(self, values):

1225

self.addedvalues.update(values)

1226

self.addedvalues.update(values)

1226

1227

has = lazyset(self.ancestors(common))

1228

has = lazyset(self.ancestors(common))

1228

has.add(nullrev)

1229

has.add(nullrev)

1229

has.update(common)

1230

has.update(common)

1230

1231

# take all ancestors from heads that aren't in has

1232

# take all ancestors from heads that aren't in has

1232

missing = set()

1233

missing = set()

1233

visit = collections.deque(r for r in heads if r not in has)

1234

visit = collections.deque(r for r in heads if r not in has)

1234

while visit:

1235

while visit:

1235

r = visit.popleft()

1236

r = visit.popleft()

1236

if r in missing:

1237

if r in missing:

1237

continue

1238

continue

1238

else:

1239

else:

1239

missing.add(r)

1240

missing.add(r)

1240

for p in self.parentrevs(r):

1241

for p in self.parentrevs(r):

1241

if p not in has:

1242

if p not in has:

1242

visit.append(p)

1243

visit.append(p)

1243

missing = list(missing)

1244

missing = list(missing)

1244

missing.sort()

1245

missing.sort()

1245

return has, [self.node(miss) for miss in missing]

1246

return has, [self.node(miss) for miss in missing]

1246

1247

def incrementalmissingrevs(self, common=None):

1248

def incrementalmissingrevs(self, common=None):

1248

"""Return an object that can be used to incrementally compute the

1249

"""Return an object that can be used to incrementally compute the

1249

revision numbers of the ancestors of arbitrary sets that are not

1250

revision numbers of the ancestors of arbitrary sets that are not

1250

ancestors of common. This is an ancestor.incrementalmissingancestors

1251

ancestors of common. This is an ancestor.incrementalmissingancestors

1251

object.

1252

object.

1252

1253

'common' is a list of revision numbers. If common is not supplied, uses

1254

'common' is a list of revision numbers. If common is not supplied, uses

1254

nullrev.

1255

nullrev.

1255

"""

1256

"""

1256

if common is None:

1257

if common is None:

1257

common = [nullrev]

1258

common = [nullrev]

1258

1259

if rustancestor is not None and self.index.rust_ext_compat:

1260

if rustancestor is not None and self.index.rust_ext_compat:

1260

return rustancestor.MissingAncestors(self.index, common)

1261

return rustancestor.MissingAncestors(self.index, common)

1261

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1262

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1262

1263

def findmissingrevs(self, common=None, heads=None):

1264

def findmissingrevs(self, common=None, heads=None):

1264

"""Return the revision numbers of the ancestors of heads that

1265

"""Return the revision numbers of the ancestors of heads that

1265

are not ancestors of common.

1266

are not ancestors of common.

1266

1267

More specifically, return a list of revision numbers corresponding to

1268

More specifically, return a list of revision numbers corresponding to

1268

nodes N such that every N satisfies the following constraints:

1269

nodes N such that every N satisfies the following constraints:

1269

1270

1. N is an ancestor of some node in 'heads'

1271

1. N is an ancestor of some node in 'heads'

1271

2. N is not an ancestor of any node in 'common'

1272

2. N is not an ancestor of any node in 'common'

1272

1273

The list is sorted by revision number, meaning it is

1274

The list is sorted by revision number, meaning it is

1274

topologically sorted.

1275

topologically sorted.

1275

1276

'heads' and 'common' are both lists of revision numbers. If heads is

1277

'heads' and 'common' are both lists of revision numbers. If heads is

1277

not supplied, uses all of the revlog's heads. If common is not

1278

not supplied, uses all of the revlog's heads. If common is not

1278

supplied, uses nullid."""

1279

supplied, uses nullid."""

1279

if common is None:

1280

if common is None:

1280

common = [nullrev]

1281

common = [nullrev]

1281

if heads is None:

1282

if heads is None:

1282

heads = self.headrevs()

1283

heads = self.headrevs()

1283

1284

inc = self.incrementalmissingrevs(common=common)

1285

inc = self.incrementalmissingrevs(common=common)

1285

return inc.missingancestors(heads)

1286

return inc.missingancestors(heads)

1286

1287

def findmissing(self, common=None, heads=None):

1288

def findmissing(self, common=None, heads=None):

1288

"""Return the ancestors of heads that are not ancestors of common.

1289

"""Return the ancestors of heads that are not ancestors of common.

1289

1290

More specifically, return a list of nodes N such that every N

1291

More specifically, return a list of nodes N such that every N

1291

satisfies the following constraints:

1292

satisfies the following constraints:

1292

1293

1. N is an ancestor of some node in 'heads'

1294

1. N is an ancestor of some node in 'heads'

1294

2. N is not an ancestor of any node in 'common'

1295

2. N is not an ancestor of any node in 'common'

1295

1296

The list is sorted by revision number, meaning it is

1297

The list is sorted by revision number, meaning it is

1297

topologically sorted.

1298

topologically sorted.

1298

1299

'heads' and 'common' are both lists of node IDs. If heads is

1300

'heads' and 'common' are both lists of node IDs. If heads is

1300

not supplied, uses all of the revlog's heads. If common is not

1301

not supplied, uses all of the revlog's heads. If common is not

1301

supplied, uses nullid."""

1302

supplied, uses nullid."""

1302

if common is None:

1303

if common is None:

1303

common = [self.nullid]

1304

common = [self.nullid]

1304

if heads is None:

1305

if heads is None:

1305

heads = self.heads()

1306

heads = self.heads()

1306

1307

common = [self.rev(n) for n in common]

1308

common = [self.rev(n) for n in common]

1308

heads = [self.rev(n) for n in heads]

1309

heads = [self.rev(n) for n in heads]

1309

1310

inc = self.incrementalmissingrevs(common=common)

1311

inc = self.incrementalmissingrevs(common=common)

1311

return [self.node(r) for r in inc.missingancestors(heads)]

1312

return [self.node(r) for r in inc.missingancestors(heads)]

1312

1313

def nodesbetween(self, roots=None, heads=None):

1314

def nodesbetween(self, roots=None, heads=None):

1314

"""Return a topological path from 'roots' to 'heads'.

1315

"""Return a topological path from 'roots' to 'heads'.

1315

1316

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1317

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1317

topologically sorted list of all nodes N that satisfy both of

1318

topologically sorted list of all nodes N that satisfy both of

1318

these constraints:

1319

these constraints:

1319

1320

1. N is a descendant of some node in 'roots'

1321

1. N is a descendant of some node in 'roots'

1321

2. N is an ancestor of some node in 'heads'

1322

2. N is an ancestor of some node in 'heads'

1322

1323

Every node is considered to be both a descendant and an ancestor

1324

Every node is considered to be both a descendant and an ancestor

1324

of itself, so every reachable node in 'roots' and 'heads' will be

1325

of itself, so every reachable node in 'roots' and 'heads' will be

1325

included in 'nodes'.

1326

included in 'nodes'.

1326

1327

'outroots' is the list of reachable nodes in 'roots', i.e., the

1328

'outroots' is the list of reachable nodes in 'roots', i.e., the

1328

subset of 'roots' that is returned in 'nodes'. Likewise,

1329

subset of 'roots' that is returned in 'nodes'. Likewise,

1329

'outheads' is the subset of 'heads' that is also in 'nodes'.

1330

'outheads' is the subset of 'heads' that is also in 'nodes'.

1330

1331

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1332

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1332

unspecified, uses nullid as the only root. If 'heads' is

1333

unspecified, uses nullid as the only root. If 'heads' is

1333

unspecified, uses list of all of the revlog's heads."""

1334

unspecified, uses list of all of the revlog's heads."""

1334

nonodes = ([], [], [])

1335

nonodes = ([], [], [])

1335

if roots is not None:

1336

if roots is not None:

1336

roots = list(roots)

1337

roots = list(roots)

1337

if not roots:

1338

if not roots:

1338

return nonodes

1339

return nonodes

1339

lowestrev = min([self.rev(n) for n in roots])

1340

lowestrev = min([self.rev(n) for n in roots])

1340

else:

1341

else:

1341

roots = [self.nullid] # Everybody's a descendant of nullid

1342

roots = [self.nullid] # Everybody's a descendant of nullid

1342

lowestrev = nullrev

1343

lowestrev = nullrev

1343

if (lowestrev == nullrev) and (heads is None):

1344

if (lowestrev == nullrev) and (heads is None):

1344

# We want _all_ the nodes!

1345

# We want _all_ the nodes!

1345

return (

1346

return (

1346

[self.node(r) for r in self],

1347

[self.node(r) for r in self],

1347

[self.nullid],

1348

[self.nullid],

1348

list(self.heads()),

1349

list(self.heads()),

1349

)

1350

)

1350

if heads is None:

1351

if heads is None:

1351

# All nodes are ancestors, so the latest ancestor is the last

1352

# All nodes are ancestors, so the latest ancestor is the last

1352

# node.

1353

# node.

1353

highestrev = len(self) - 1

1354

highestrev = len(self) - 1

1354

# Set ancestors to None to signal that every node is an ancestor.

1355

# Set ancestors to None to signal that every node is an ancestor.

1355

ancestors = None

1356

ancestors = None

1356

# Set heads to an empty dictionary for later discovery of heads

1357

# Set heads to an empty dictionary for later discovery of heads

1357

heads = {}

1358

heads = {}

1358

else:

1359

else:

1359

heads = list(heads)

1360

heads = list(heads)

1360

if not heads:

1361

if not heads:

1361

return nonodes

1362

return nonodes

1362

ancestors = set()

1363

ancestors = set()

1363

# Turn heads into a dictionary so we can remove 'fake' heads.

1364

# Turn heads into a dictionary so we can remove 'fake' heads.

1364

# Also, later we will be using it to filter out the heads we can't

1365

# Also, later we will be using it to filter out the heads we can't

1365

# find from roots.

1366

# find from roots.

1366

heads = dict.fromkeys(heads, False)

1367

heads = dict.fromkeys(heads, False)

1367

# Start at the top and keep marking parents until we're done.

1368

# Start at the top and keep marking parents until we're done.

1368

nodestotag = set(heads)

1369

nodestotag = set(heads)

1369

# Remember where the top was so we can use it as a limit later.

1370

# Remember where the top was so we can use it as a limit later.

1370

highestrev = max([self.rev(n) for n in nodestotag])

1371

highestrev = max([self.rev(n) for n in nodestotag])

1371

while nodestotag:

1372

while nodestotag:

1372

# grab a node to tag

1373

# grab a node to tag

1373

n = nodestotag.pop()

1374

n = nodestotag.pop()

1374

# Never tag nullid

1375

# Never tag nullid

1375

if n == self.nullid:

1376

if n == self.nullid:

1376

continue

1377

continue

1377

# A node's revision number represents its place in a

1378

# A node's revision number represents its place in a

1378

# topologically sorted list of nodes.

1379

# topologically sorted list of nodes.

1379

r = self.rev(n)

1380

r = self.rev(n)

1380

if r >= lowestrev:

1381

if r >= lowestrev:

1381

if n not in ancestors:

1382

if n not in ancestors:

1382

# If we are possibly a descendant of one of the roots

1383

# If we are possibly a descendant of one of the roots

1383

# and we haven't already been marked as an ancestor

1384

# and we haven't already been marked as an ancestor

1384

ancestors.add(n) # Mark as ancestor

1385

ancestors.add(n) # Mark as ancestor

1385

# Add non-nullid parents to list of nodes to tag.

1386

# Add non-nullid parents to list of nodes to tag.

1386

nodestotag.update(

1387

nodestotag.update(

1387

[p for p in self.parents(n) if p != self.nullid]

1388

[p for p in self.parents(n) if p != self.nullid]

1388

)

1389

)

1389

elif n in heads: # We've seen it before, is it a fake head?

1390

elif n in heads: # We've seen it before, is it a fake head?

1390

# So it is, real heads should not be the ancestors of

1391

# So it is, real heads should not be the ancestors of

1391

# any other heads.

1392

# any other heads.

1392

heads.pop(n)

1393

heads.pop(n)

1393

if not ancestors:

1394

if not ancestors:

1394

return nonodes

1395

return nonodes

1395

# Now that we have our set of ancestors, we want to remove any

1396

# Now that we have our set of ancestors, we want to remove any

1396

# roots that are not ancestors.

1397

# roots that are not ancestors.

1397

1398

# If one of the roots was nullid, everything is included anyway.

1399

# If one of the roots was nullid, everything is included anyway.

1399

if lowestrev > nullrev:

1400

if lowestrev > nullrev:

1400

# But, since we weren't, let's recompute the lowest rev to not

1401

# But, since we weren't, let's recompute the lowest rev to not

1401

# include roots that aren't ancestors.

1402

# include roots that aren't ancestors.

1402

1403

# Filter out roots that aren't ancestors of heads

1404

# Filter out roots that aren't ancestors of heads

1404

roots = [root for root in roots if root in ancestors]

1405

roots = [root for root in roots if root in ancestors]

1405

# Recompute the lowest revision

1406

# Recompute the lowest revision

1406

if roots:

1407

if roots:

1407

lowestrev = min([self.rev(root) for root in roots])

1408

lowestrev = min([self.rev(root) for root in roots])

1408

else:

1409

else:

1409

# No more roots? Return empty list

1410

# No more roots? Return empty list

1410

return nonodes

1411

return nonodes

1411

else:

1412

else:

1412

# We are descending from nullid, and don't need to care about

1413

# We are descending from nullid, and don't need to care about

1413

# any other roots.

1414

# any other roots.

1414

lowestrev = nullrev

1415

lowestrev = nullrev

1415

roots = [self.nullid]

1416

roots = [self.nullid]

1416

# Transform our roots list into a set.

1417

# Transform our roots list into a set.

1417

descendants = set(roots)

1418

descendants = set(roots)

1418

# Also, keep the original roots so we can filter out roots that aren't

1419

# Also, keep the original roots so we can filter out roots that aren't

1419

# 'real' roots (i.e. are descended from other roots).

1420

# 'real' roots (i.e. are descended from other roots).

1420

roots = descendants.copy()

1421

roots = descendants.copy()

1421

# Our topologically sorted list of output nodes.

1422

# Our topologically sorted list of output nodes.

1422

orderedout = []

1423

orderedout = []

1423

# Don't start at nullid since we don't want nullid in our output list,

1424

# Don't start at nullid since we don't want nullid in our output list,

1424

# and if nullid shows up in descendants, empty parents will look like

1425

# and if nullid shows up in descendants, empty parents will look like

1425

# they're descendants.

1426

# they're descendants.

1426

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1427

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1427

n = self.node(r)

1428

n = self.node(r)

1428

isdescendant = False

1429

isdescendant = False

1429

if lowestrev == nullrev: # Everybody is a descendant of nullid

1430

if lowestrev == nullrev: # Everybody is a descendant of nullid

1430

isdescendant = True

1431

isdescendant = True

1431

elif n in descendants:

1432

elif n in descendants:

1432

# n is already a descendant

1433

# n is already a descendant

1433

isdescendant = True

1434

isdescendant = True

1434

# This check only needs to be done here because all the roots

1435

# This check only needs to be done here because all the roots

1435

# will start being marked is descendants before the loop.

1436

# will start being marked is descendants before the loop.

1436

if n in roots:

1437

if n in roots:

1437

# If n was a root, check if it's a 'real' root.

1438

# If n was a root, check if it's a 'real' root.

1438

p = tuple(self.parents(n))

1439

p = tuple(self.parents(n))

1439

# If any of its parents are descendants, it's not a root.

1440

# If any of its parents are descendants, it's not a root.

1440

if (p[0] in descendants) or (p[1] in descendants):

1441

if (p[0] in descendants) or (p[1] in descendants):

1441

roots.remove(n)

1442

roots.remove(n)

1442

else:

1443

else:

1443

p = tuple(self.parents(n))

1444

p = tuple(self.parents(n))

1444

# A node is a descendant if either of its parents are

1445

# A node is a descendant if either of its parents are

1445

# descendants. (We seeded the dependents list with the roots

1446

# descendants. (We seeded the dependents list with the roots

1446

# up there, remember?)

1447

# up there, remember?)

1447

if (p[0] in descendants) or (p[1] in descendants):

1448

if (p[0] in descendants) or (p[1] in descendants):

1448

descendants.add(n)

1449

descendants.add(n)

1449

isdescendant = True

1450

isdescendant = True

1450

if isdescendant and ((ancestors is None) or (n in ancestors)):

1451

if isdescendant and ((ancestors is None) or (n in ancestors)):

1451

# Only include nodes that are both descendants and ancestors.

1452

# Only include nodes that are both descendants and ancestors.

1452

orderedout.append(n)

1453

orderedout.append(n)

1453

if (ancestors is not None) and (n in heads):

1454

if (ancestors is not None) and (n in heads):

1454

# We're trying to figure out which heads are reachable

1455

# We're trying to figure out which heads are reachable

1455

# from roots.

1456

# from roots.

1456

# Mark this head as having been reached

1457

# Mark this head as having been reached

1457

heads[n] = True

1458

heads[n] = True

1458

elif ancestors is None:

1459

elif ancestors is None:

1459

# Otherwise, we're trying to discover the heads.

1460

# Otherwise, we're trying to discover the heads.

1460

# Assume this is a head because if it isn't, the next step

1461

# Assume this is a head because if it isn't, the next step

1461

# will eventually remove it.

1462

# will eventually remove it.

1462

heads[n] = True

1463

heads[n] = True

1463

# But, obviously its parents aren't.

1464

# But, obviously its parents aren't.

1464

for p in self.parents(n):

1465

for p in self.parents(n):

1465

heads.pop(p, None)

1466

heads.pop(p, None)

1466

heads = [head for head, flag in heads.items() if flag]

1467

heads = [head for head, flag in heads.items() if flag]

1467

roots = list(roots)

1468

roots = list(roots)

1468

assert orderedout

1469

assert orderedout

1469

assert roots

1470

assert roots

1470

assert heads

1471

assert heads

1471

return (orderedout, roots, heads)

1472

return (orderedout, roots, heads)

1472

1473

def headrevs(self, revs=None):

1474

def headrevs(self, revs=None):

1474

if revs is None:

1475

if revs is None:

1475

try:

1476

try:

1476

return self.index.headrevs()

1477

return self.index.headrevs()

1477

except AttributeError:

1478

except AttributeError:

1478

return self._headrevs()

1479

return self._headrevs()

1479

if rustdagop is not None and self.index.rust_ext_compat:

1480

if rustdagop is not None and self.index.rust_ext_compat:

1480

return rustdagop.headrevs(self.index, revs)

1481

return rustdagop.headrevs(self.index, revs)

1481

return dagop.headrevs(revs, self._uncheckedparentrevs)

1482

return dagop.headrevs(revs, self._uncheckedparentrevs)

1482

1483

def computephases(self, roots):

1484

def computephases(self, roots):

1484

return self.index.computephasesmapsets(roots)

1485

return self.index.computephasesmapsets(roots)

1485

1486

def _headrevs(self):

1487

def _headrevs(self):

1487

count = len(self)

1488

count = len(self)

1488

if not count:

1489

if not count:

1489

return [nullrev]

1490

return [nullrev]

1490

# we won't iter over filtered rev so nobody is a head at start

1491

# we won't iter over filtered rev so nobody is a head at start

1491

ishead = [0] * (count + 1)

1492

ishead = [0] * (count + 1)

1492

index = self.index

1493

index = self.index

1493

for r in self:

1494

for r in self:

1494

ishead[r] = 1 # I may be an head

1495

ishead[r] = 1 # I may be an head

1495

e = index[r]

1496

e = index[r]

1496

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1497

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1497

return [r for r, val in enumerate(ishead) if val]

1498

return [r for r, val in enumerate(ishead) if val]

1498

1499

def heads(self, start=None, stop=None):

1500

def heads(self, start=None, stop=None):

1500

"""return the list of all nodes that have no children

1501

"""return the list of all nodes that have no children

1501

1502

if start is specified, only heads that are descendants of

1503

if start is specified, only heads that are descendants of

1503

start will be returned

1504

start will be returned

1504

if stop is specified, it will consider all the revs from stop

1505

if stop is specified, it will consider all the revs from stop

1505

as if they had no children

1506

as if they had no children

1506

"""

1507

"""

1507

if start is None and stop is None:

1508

if start is None and stop is None:

1508

if not len(self):

1509

if not len(self):

1509

return [self.nullid]

1510

return [self.nullid]

1510

return [self.node(r) for r in self.headrevs()]

1511

return [self.node(r) for r in self.headrevs()]

1511

1512

if start is None:

1513

if start is None:

1513

start = nullrev

1514

start = nullrev

1514

else:

1515

else:

1515

start = self.rev(start)

1516

start = self.rev(start)

1516

1517

stoprevs = {self.rev(n) for n in stop or []}

1518

stoprevs = {self.rev(n) for n in stop or []}

1518

1519

revs = dagop.headrevssubset(

1520

revs = dagop.headrevssubset(

1520

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1521

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1521

)

1522

)

1522

1523

return [self.node(rev) for rev in revs]

1524

return [self.node(rev) for rev in revs]

1524

1525

def children(self, node):

1526

def children(self, node):

1526

"""find the children of a given node"""

1527

"""find the children of a given node"""

1527

c = []

1528

c = []

1528

p = self.rev(node)

1529

p = self.rev(node)

1529

for r in self.revs(start=p + 1):

1530

for r in self.revs(start=p + 1):

1530

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1531

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1531

if prevs:

1532

if prevs:

1532

for pr in prevs:

1533

for pr in prevs:

1533

if pr == p:

1534

if pr == p:

1534

c.append(self.node(r))

1535

c.append(self.node(r))

1535

elif p == nullrev:

1536

elif p == nullrev:

1536

c.append(self.node(r))

1537

c.append(self.node(r))

1537

return c

1538

return c

1538

1539

def commonancestorsheads(self, a, b):

1540

def commonancestorsheads(self, a, b):

1540

"""calculate all the heads of the common ancestors of nodes a and b"""

1541

"""calculate all the heads of the common ancestors of nodes a and b"""

1541

a, b = self.rev(a), self.rev(b)

1542

a, b = self.rev(a), self.rev(b)

1542

ancs = self._commonancestorsheads(a, b)

1543

ancs = self._commonancestorsheads(a, b)

1543

return pycompat.maplist(self.node, ancs)

1544

return pycompat.maplist(self.node, ancs)

1544

1545

def _commonancestorsheads(self, *revs):

1546

def _commonancestorsheads(self, *revs):

1546

"""calculate all the heads of the common ancestors of revs"""

1547

"""calculate all the heads of the common ancestors of revs"""

1547

try:

1548

try:

1548

ancs = self.index.commonancestorsheads(*revs)

1549

ancs = self.index.commonancestorsheads(*revs)

1549

except (AttributeError, OverflowError): # C implementation failed

1550

except (AttributeError, OverflowError): # C implementation failed

1550

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1551

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1551

return ancs

1552

return ancs

1552

1553

def isancestor(self, a, b):

1554

def isancestor(self, a, b):

1554

"""return True if node a is an ancestor of node b

1555

"""return True if node a is an ancestor of node b

1555

1556

A revision is considered an ancestor of itself."""

1557

A revision is considered an ancestor of itself."""

1557

a, b = self.rev(a), self.rev(b)

1558

a, b = self.rev(a), self.rev(b)

1558

return self.isancestorrev(a, b)

1559

return self.isancestorrev(a, b)

1559

1560

def isancestorrev(self, a, b):

1561

def isancestorrev(self, a, b):

1561

"""return True if revision a is an ancestor of revision b

1562

"""return True if revision a is an ancestor of revision b

1562

1563

A revision is considered an ancestor of itself.

1564

A revision is considered an ancestor of itself.

1564

1565

The implementation of this is trivial but the use of

1566

The implementation of this is trivial but the use of

1566

reachableroots is not."""

1567

reachableroots is not."""

1567

if a == nullrev:

1568

if a == nullrev:

1568

return True

1569

return True

1569

elif a == b:

1570

elif a == b:

1570

return True

1571

return True

1571

elif a > b:

1572

elif a > b:

1572

return False

1573

return False

1573

return bool(self.reachableroots(a, [b], [a], includepath=False))

1574

return bool(self.reachableroots(a, [b], [a], includepath=False))

1574

1575

def reachableroots(self, minroot, heads, roots, includepath=False):

1576

def reachableroots(self, minroot, heads, roots, includepath=False):

1576

"""return (heads(::(<roots> and <roots>::<heads>)))

1577

"""return (heads(::(<roots> and <roots>::<heads>)))

1577

1578

If includepath is True, return (<roots>::<heads>)."""

1579

If includepath is True, return (<roots>::<heads>)."""

1579

try:

1580

try:

1580

return self.index.reachableroots2(

1581

return self.index.reachableroots2(

1581

minroot, heads, roots, includepath

1582

minroot, heads, roots, includepath

1582

)

1583

)

1583

except AttributeError:

1584

except AttributeError:

1584

return dagop._reachablerootspure(

1585

return dagop._reachablerootspure(

1585

self.parentrevs, minroot, roots, heads, includepath

1586

self.parentrevs, minroot, roots, heads, includepath

1586

)

1587

)

1587

1588

def ancestor(self, a, b):

1589

def ancestor(self, a, b):

1589

"""calculate the "best" common ancestor of nodes a and b"""

1590

"""calculate the "best" common ancestor of nodes a and b"""

1590

1591

a, b = self.rev(a), self.rev(b)

1592

a, b = self.rev(a), self.rev(b)

1592

try:

1593

try:

1593

ancs = self.index.ancestors(a, b)

1594

ancs = self.index.ancestors(a, b)

1594

except (AttributeError, OverflowError):

1595

except (AttributeError, OverflowError):

1595

ancs = ancestor.ancestors(self.parentrevs, a, b)

1596

ancs = ancestor.ancestors(self.parentrevs, a, b)

1596

if ancs:

1597

if ancs:

1597

# choose a consistent winner when there's a tie

1598

# choose a consistent winner when there's a tie

1598

return min(map(self.node, ancs))

1599

return min(map(self.node, ancs))

1599

return self.nullid

1600

return self.nullid

1600

1601

def _match(self, id):

1602

def _match(self, id):

1602

if isinstance(id, int):

1603

if isinstance(id, int):

1603

# rev

1604

# rev

1604

return self.node(id)

1605

return self.node(id)

1605

if len(id) == self.nodeconstants.nodelen:

1606

if len(id) == self.nodeconstants.nodelen:

1606

# possibly a binary node

1607

# possibly a binary node

1607

# odds of a binary node being all hex in ASCII are 1 in 10**25

1608

# odds of a binary node being all hex in ASCII are 1 in 10**25

1608

try:

1609

try:

1609

node = id

1610

node = id

1610

self.rev(node) # quick search the index

1611

self.rev(node) # quick search the index

1611

return node

1612

return node

1612

except error.LookupError:

1613

except error.LookupError:

1613

pass # may be partial hex id

1614

pass # may be partial hex id

1614

try:

1615

try:

1615

# str(rev)

1616

# str(rev)

1616

rev = int(id)

1617

rev = int(id)

1617

if b"%d" % rev != id:

1618

if b"%d" % rev != id:

1618

raise ValueError

1619

raise ValueError

1619

if rev < 0:

1620

if rev < 0:

1620

rev = len(self) + rev

1621

rev = len(self) + rev

1621

if rev < 0 or rev >= len(self):

1622

if rev < 0 or rev >= len(self):

1622

raise ValueError

1623

raise ValueError

1623

return self.node(rev)

1624

return self.node(rev)

1624

except (ValueError, OverflowError):

1625

except (ValueError, OverflowError):

1625

pass

1626

pass

1626

if len(id) == 2 * self.nodeconstants.nodelen:

1627

if len(id) == 2 * self.nodeconstants.nodelen:

1627

try:

1628

try:

1628

# a full hex nodeid?

1629

# a full hex nodeid?

1629

node = bin(id)

1630

node = bin(id)

1630

self.rev(node)

1631

self.rev(node)

1631

return node

1632

return node

1632

except (binascii.Error, error.LookupError):

1633

except (binascii.Error, error.LookupError):

1633

pass

1634

pass

1634

1635

def _partialmatch(self, id):

1636

def _partialmatch(self, id):

1636

# we don't care wdirfilenodeids as they should be always full hash

1637

# we don't care wdirfilenodeids as they should be always full hash

1637

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1638

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1638

ambiguous = False

1639

ambiguous = False

1639

try:

1640

try:

1640

partial = self.index.partialmatch(id)

1641

partial = self.index.partialmatch(id)

1641

if partial and self.hasnode(partial):

1642

if partial and self.hasnode(partial):

1642

if maybewdir:

1643

if maybewdir:

1643

# single 'ff...' match in radix tree, ambiguous with wdir

1644

# single 'ff...' match in radix tree, ambiguous with wdir

1644

ambiguous = True

1645

ambiguous = True

1645

else:

1646

else:

1646

return partial

1647

return partial

1647

elif maybewdir:

1648

elif maybewdir:

1648

# no 'ff...' match in radix tree, wdir identified

1649

# no 'ff...' match in radix tree, wdir identified

1649

raise error.WdirUnsupported

1650

raise error.WdirUnsupported

1650

else:

1651

else:

1651

return None

1652

return None

1652

except error.RevlogError:

1653

except error.RevlogError:

1653

# parsers.c radix tree lookup gave multiple matches

1654

# parsers.c radix tree lookup gave multiple matches

1654

# fast path: for unfiltered changelog, radix tree is accurate

1655

# fast path: for unfiltered changelog, radix tree is accurate

1655

if not getattr(self, 'filteredrevs', None):

1656

if not getattr(self, 'filteredrevs', None):

1656

ambiguous = True

1657

ambiguous = True

1657

# fall through to slow path that filters hidden revisions

1658

# fall through to slow path that filters hidden revisions

1658

except (AttributeError, ValueError):

1659

except (AttributeError, ValueError):

1659

# we are pure python, or key is not hex

1660

# we are pure python, or key is not hex

1660

pass

1661

pass

1661

if ambiguous:

1662

if ambiguous:

1662

raise error.AmbiguousPrefixLookupError(

1663

raise error.AmbiguousPrefixLookupError(

1663

id, self.display_id, _(b'ambiguous identifier')

1664

id, self.display_id, _(b'ambiguous identifier')

1664

)

1665

)

1665

1666

if id in self._pcache:

1667

if id in self._pcache:

1667

return self._pcache[id]

1668

return self._pcache[id]

1668

1669

if len(id) <= 40:

1670

if len(id) <= 40:

1670

# hex(node)[:...]

1671

# hex(node)[:...]

1671

l = len(id) // 2 * 2 # grab an even number of digits

1672

l = len(id) // 2 * 2 # grab an even number of digits

1672

try:

1673

try:

1673

# we're dropping the last digit, so let's check that it's hex,

1674

# we're dropping the last digit, so let's check that it's hex,

1674

# to avoid the expensive computation below if it's not

1675

# to avoid the expensive computation below if it's not

1675

if len(id) % 2 > 0:

1676

if len(id) % 2 > 0:

1676

if not (id[-1] in hexdigits):

1677

if not (id[-1] in hexdigits):

1677

return None

1678

return None

1678

prefix = bin(id[:l])

1679

prefix = bin(id[:l])

1679

except binascii.Error:

1680

except binascii.Error:

1680

pass

1681

pass

1681

else:

1682

else:

1682

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1683

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1683

nl = [

1684

nl = [

1684

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1685

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1685

]

1686

]

1686

if self.nodeconstants.nullhex.startswith(id):

1687

if self.nodeconstants.nullhex.startswith(id):

1687

nl.append(self.nullid)

1688

nl.append(self.nullid)

1688

if len(nl) > 0:

1689

if len(nl) > 0:

1689

if len(nl) == 1 and not maybewdir:

1690

if len(nl) == 1 and not maybewdir:

1690

self._pcache[id] = nl[0]

1691

self._pcache[id] = nl[0]

1691

return nl[0]

1692

return nl[0]

1692

raise error.AmbiguousPrefixLookupError(

1693

raise error.AmbiguousPrefixLookupError(

1693

id, self.display_id, _(b'ambiguous identifier')

1694

id, self.display_id, _(b'ambiguous identifier')

1694

)

1695

)

1695

if maybewdir:

1696

if maybewdir:

1696

raise error.WdirUnsupported

1697

raise error.WdirUnsupported

1697

return None

1698

return None

1698

1699

def lookup(self, id):

1700

def lookup(self, id):

1700

"""locate a node based on:

1701

"""locate a node based on:

1701

- revision number or str(revision number)

1702

- revision number or str(revision number)

1702

- nodeid or subset of hex nodeid

1703

- nodeid or subset of hex nodeid

1703

"""

1704

"""

1704

n = self._match(id)

1705

n = self._match(id)

1705

if n is not None:

1706

if n is not None:

1706

return n

1707

return n

1707

n = self._partialmatch(id)

1708

n = self._partialmatch(id)

1708

if n:

1709

if n:

1709

return n

1710

return n

1710

1711

raise error.LookupError(id, self.display_id, _(b'no match found'))

1712

raise error.LookupError(id, self.display_id, _(b'no match found'))

1712

1713

def shortest(self, node, minlength=1):

1714

def shortest(self, node, minlength=1):

1714

"""Find the shortest unambiguous prefix that matches node."""

1715

"""Find the shortest unambiguous prefix that matches node."""

1715

1716

def isvalid(prefix):

1717

def isvalid(prefix):

1717

try:

1718

try:

1718

matchednode = self._partialmatch(prefix)

1719

matchednode = self._partialmatch(prefix)

1719

except error.AmbiguousPrefixLookupError:

1720

except error.AmbiguousPrefixLookupError:

1720

return False

1721

return False

1721

except error.WdirUnsupported:

1722

except error.WdirUnsupported:

1722

# single 'ff...' match

1723

# single 'ff...' match

1723

return True

1724

return True

1724

if matchednode is None:

1725

if matchednode is None:

1725

raise error.LookupError(node, self.display_id, _(b'no node'))

1726

raise error.LookupError(node, self.display_id, _(b'no node'))

1726

return True

1727

return True

1727

1728

def maybewdir(prefix):

1729

def maybewdir(prefix):

1729

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1730

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1730

1731

hexnode = hex(node)

1732

hexnode = hex(node)

1732

1733

def disambiguate(hexnode, minlength):

1734

def disambiguate(hexnode, minlength):

1734

"""Disambiguate against wdirid."""

1735

"""Disambiguate against wdirid."""

1735

for length in range(minlength, len(hexnode) + 1):

1736

for length in range(minlength, len(hexnode) + 1):

1736

prefix = hexnode[:length]

1737

prefix = hexnode[:length]

1737

if not maybewdir(prefix):

1738

if not maybewdir(prefix):

1738

return prefix

1739

return prefix

1739

1740

if not getattr(self, 'filteredrevs', None):

1741

if not getattr(self, 'filteredrevs', None):

1741

try:

1742

try:

1742

length = max(self.index.shortest(node), minlength)

1743

length = max(self.index.shortest(node), minlength)

1743

return disambiguate(hexnode, length)

1744

return disambiguate(hexnode, length)

1744

except error.RevlogError:

1745

except error.RevlogError:

1745

if node != self.nodeconstants.wdirid:

1746

if node != self.nodeconstants.wdirid:

1746

raise error.LookupError(

1747

raise error.LookupError(

1747

node, self.display_id, _(b'no node')

1748

node, self.display_id, _(b'no node')

1748

)

1749

)

1749

except AttributeError:

1750

except AttributeError:

1750

# Fall through to pure code

1751

# Fall through to pure code

1751

pass

1752

pass

1752

1753

if node == self.nodeconstants.wdirid:

1754

if node == self.nodeconstants.wdirid:

1754

for length in range(minlength, len(hexnode) + 1):

1755

for length in range(minlength, len(hexnode) + 1):

1755

prefix = hexnode[:length]

1756

prefix = hexnode[:length]

1756

if isvalid(prefix):

1757

if isvalid(prefix):

1757

return prefix

1758

return prefix

1758

1759

for length in range(minlength, len(hexnode) + 1):

1760

for length in range(minlength, len(hexnode) + 1):

1760

prefix = hexnode[:length]

1761

prefix = hexnode[:length]

1761

if isvalid(prefix):

1762

if isvalid(prefix):

1762

return disambiguate(hexnode, length)

1763

return disambiguate(hexnode, length)

1763

1764

def cmp(self, node, text):

1765

def cmp(self, node, text):

1765

"""compare text with a given file revision

1766

"""compare text with a given file revision

1766

1767

returns True if text is different than what is stored.

1768

returns True if text is different than what is stored.

1768

"""

1769

"""

1769

p1, p2 = self.parents(node)

1770

p1, p2 = self.parents(node)

1770

return storageutil.hashrevisionsha1(text, p1, p2) != node

1771

return storageutil.hashrevisionsha1(text, p1, p2) != node

1771

1772

def _getsegmentforrevs(self, startrev, endrev, df=None):

1773

def _getsegmentforrevs(self, startrev, endrev, df=None):

1773

"""Obtain a segment of raw data corresponding to a range of revisions.

1774

"""Obtain a segment of raw data corresponding to a range of revisions.

1774

1775

Accepts the start and end revisions and an optional already-open

1776

Accepts the start and end revisions and an optional already-open

1776

file handle to be used for reading. If the file handle is read, its

1777

file handle to be used for reading. If the file handle is read, its

1777

seek position will not be preserved.

1778

seek position will not be preserved.

1778

1779

Requests for data may be satisfied by a cache.

1780

Requests for data may be satisfied by a cache.

1780

1781

Returns a 2-tuple of (offset, data) for the requested range of

1782

Returns a 2-tuple of (offset, data) for the requested range of

1782

revisions. Offset is the integer offset from the beginning of the

1783

revisions. Offset is the integer offset from the beginning of the

1783

revlog and data is a str or buffer of the raw byte data.

1784

revlog and data is a str or buffer of the raw byte data.

1784

1785

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1786

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1786

to determine where each revision's data begins and ends.

1787

to determine where each revision's data begins and ends.

1787

"""

1788

"""

1788

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1789

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1789

# (functions are expensive).

1790

# (functions are expensive).

1790

index = self.index

1791

index = self.index

1791

istart = index[startrev]

1792

istart = index[startrev]

1792

start = int(istart[0] >> 16)

1793

start = int(istart[0] >> 16)

1793

if startrev == endrev:

1794

if startrev == endrev:

1794

end = start + istart[1]

1795

end = start + istart[1]

1795

else:

1796

else:

1796

iend = index[endrev]

1797

iend = index[endrev]

1797

end = int(iend[0] >> 16) + iend[1]

1798

end = int(iend[0] >> 16) + iend[1]

1798

1799

if self._inline:

1800

if self._inline:

1800

start += (startrev + 1) * self.index.entry_size

1801

start += (startrev + 1) * self.index.entry_size

1801

end += (endrev + 1) * self.index.entry_size

1802

end += (endrev + 1) * self.index.entry_size

1802

length = end - start

1803

length = end - start

1803

1804

return start, self._segmentfile.read_chunk(start, length, df)

1805

return start, self._segmentfile.read_chunk(start, length, df)

1805

1806

def _chunk(self, rev, df=None):

1807

def _chunk(self, rev, df=None):

1807

"""Obtain a single decompressed chunk for a revision.

1808

"""Obtain a single decompressed chunk for a revision.

1808

1809

Accepts an integer revision and an optional already-open file handle

1810

Accepts an integer revision and an optional already-open file handle

1810

to be used for reading. If used, the seek position of the file will not

1811

to be used for reading. If used, the seek position of the file will not

1811

be preserved.

1812

be preserved.

1812

1813

Returns a str holding uncompressed data for the requested revision.

1814

Returns a str holding uncompressed data for the requested revision.

1814

"""

1815

"""

1815

compression_mode = self.index[rev][10]

1816

compression_mode = self.index[rev][10]

1816

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1817

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1817

if compression_mode == COMP_MODE_PLAIN:

1818

if compression_mode == COMP_MODE_PLAIN:

1818

return data

1819

return data

1819

elif compression_mode == COMP_MODE_DEFAULT:

1820

elif compression_mode == COMP_MODE_DEFAULT:

1820

return self._decompressor(data)

1821

return self._decompressor(data)

1821

elif compression_mode == COMP_MODE_INLINE:

1822

elif compression_mode == COMP_MODE_INLINE:

1822

return self.decompress(data)

1823

return self.decompress(data)

1823

else:

1824

else:

1824

msg = b'unknown compression mode %d'

1825

msg = b'unknown compression mode %d'

1825

msg %= compression_mode

1826

msg %= compression_mode

1826

raise error.RevlogError(msg)

1827

raise error.RevlogError(msg)

1827

1828

def _chunks(self, revs, df=None, targetsize=None):

1829

def _chunks(self, revs, df=None, targetsize=None):

1829

"""Obtain decompressed chunks for the specified revisions.

1830

"""Obtain decompressed chunks for the specified revisions.

1830

1831

Accepts an iterable of numeric revisions that are assumed to be in

1832

Accepts an iterable of numeric revisions that are assumed to be in

1832

ascending order. Also accepts an optional already-open file handle

1833

ascending order. Also accepts an optional already-open file handle

1833

to be used for reading. If used, the seek position of the file will

1834

to be used for reading. If used, the seek position of the file will

1834

not be preserved.

1835

not be preserved.

1835

1836

This function is similar to calling ``self._chunk()`` multiple times,

1837

This function is similar to calling ``self._chunk()`` multiple times,

1837

but is faster.

1838

but is faster.

1838

1839

Returns a list with decompressed data for each requested revision.

1840

Returns a list with decompressed data for each requested revision.

1840

"""

1841

"""

1841

if not revs:

1842

if not revs:

1842

return []

1843

return []

1843

start = self.start

1844

start = self.start

1844

length = self.length

1845

length = self.length

1845

inline = self._inline

1846

inline = self._inline

1846

iosize = self.index.entry_size

1847

iosize = self.index.entry_size

1847

buffer = util.buffer

1848

buffer = util.buffer

1848

1849

l = []

1850

l = []

1850

ladd = l.append

1851

ladd = l.append

1851

1852

if not self._withsparseread:

1853

if not self._withsparseread:

1853

slicedchunks = (revs,)

1854

slicedchunks = (revs,)

1854

else:

1855

else:

1855

slicedchunks = deltautil.slicechunk(

1856

slicedchunks = deltautil.slicechunk(

1856

self, revs, targetsize=targetsize

1857

self, revs, targetsize=targetsize

1857

)

1858

)

1858

1859

for revschunk in slicedchunks:

1860

for revschunk in slicedchunks:

1860

firstrev = revschunk[0]

1861

firstrev = revschunk[0]

1861

# Skip trailing revisions with empty diff

1862

# Skip trailing revisions with empty diff

1862

for lastrev in revschunk[::-1]:

1863

for lastrev in revschunk[::-1]:

1863

if length(lastrev) != 0:

1864

if length(lastrev) != 0:

1864

break

1865

break

1865

1866

try:

1867

try:

1867

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1868

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1868

except OverflowError:

1869

except OverflowError:

1869

# issue4215 - we can't cache a run of chunks greater than

1870

# issue4215 - we can't cache a run of chunks greater than

1870

# 2G on Windows

1871

# 2G on Windows

1871

return [self._chunk(rev, df=df) for rev in revschunk]

1872

return [self._chunk(rev, df=df) for rev in revschunk]

1872

1873

decomp = self.decompress

1874

decomp = self.decompress

1874

# self._decompressor might be None, but will not be used in that case

1875

# self._decompressor might be None, but will not be used in that case

1875

def_decomp = self._decompressor

1876

def_decomp = self._decompressor

1876

for rev in revschunk:

1877

for rev in revschunk:

1877

chunkstart = start(rev)

1878

chunkstart = start(rev)

1878

if inline:

1879

if inline:

1879

chunkstart += (rev + 1) * iosize

1880

chunkstart += (rev + 1) * iosize

1880

chunklength = length(rev)

1881

chunklength = length(rev)

1881

comp_mode = self.index[rev][10]

1882

comp_mode = self.index[rev][10]

1882

c = buffer(data, chunkstart - offset, chunklength)

1883

c = buffer(data, chunkstart - offset, chunklength)

1883

if comp_mode == COMP_MODE_PLAIN:

1884

if comp_mode == COMP_MODE_PLAIN:

1884

ladd(c)

1885

ladd(c)

1885

elif comp_mode == COMP_MODE_INLINE:

1886

elif comp_mode == COMP_MODE_INLINE:

1886

ladd(decomp(c))

1887

ladd(decomp(c))

1887

elif comp_mode == COMP_MODE_DEFAULT:

1888

elif comp_mode == COMP_MODE_DEFAULT:

1888

ladd(def_decomp(c))

1889

ladd(def_decomp(c))

1889

else:

1890

else:

1890

msg = b'unknown compression mode %d'

1891

msg = b'unknown compression mode %d'

1891

msg %= comp_mode

1892

msg %= comp_mode

1892

raise error.RevlogError(msg)

1893

raise error.RevlogError(msg)

1893

1894

return l

1895

return l

1895

1896

def deltaparent(self, rev):

1897

def deltaparent(self, rev):

1897

"""return deltaparent of the given revision"""

1898

"""return deltaparent of the given revision"""

1898

base = self.index[rev][3]

1899

base = self.index[rev][3]

1899

if base == rev:

1900

if base == rev:

1900

return nullrev

1901

return nullrev

1901

elif self._generaldelta:

1902

elif self._generaldelta:

1902

return base

1903

return base

1903

else:

1904

else:

1904

return rev - 1

1905

return rev - 1

1905

1906

def issnapshot(self, rev):

1907

def issnapshot(self, rev):

1907

"""tells whether rev is a snapshot"""

1908

"""tells whether rev is a snapshot"""

1908

if not self._sparserevlog:

1909

if not self._sparserevlog:

1909

return self.deltaparent(rev) == nullrev

1910

return self.deltaparent(rev) == nullrev

1910

elif hasattr(self.index, 'issnapshot'):

1911

elif hasattr(self.index, 'issnapshot'):

1911

# directly assign the method to cache the testing and access

1912

# directly assign the method to cache the testing and access

1912

self.issnapshot = self.index.issnapshot

1913

self.issnapshot = self.index.issnapshot

1913

return self.issnapshot(rev)

1914

return self.issnapshot(rev)

1914

if rev == nullrev:

1915

if rev == nullrev:

1915

return True

1916

return True

1916

entry = self.index[rev]

1917

entry = self.index[rev]

1917

base = entry[3]

1918

base = entry[3]

1918

if base == rev:

1919

if base == rev:

1919

return True

1920

return True

1920

if base == nullrev:

1921

if base == nullrev:

1921

return True

1922

return True

1922

p1 = entry[5]

1923

p1 = entry[5]

1923

while self.length(p1) == 0:

1924

while self.length(p1) == 0:

1924

b = self.deltaparent(p1)

1925

b = self.deltaparent(p1)

1925

if b == p1:

1926

if b == p1:

1926

break

1927

break

1927

p1 = b

1928

p1 = b

1928

p2 = entry[6]

1929

p2 = entry[6]

1929

while self.length(p2) == 0:

1930

while self.length(p2) == 0:

1930

b = self.deltaparent(p2)

1931

b = self.deltaparent(p2)

1931

if b == p2:

1932

if b == p2:

1932

break

1933

break

1933

p2 = b

1934

p2 = b

1934

if base == p1 or base == p2:

1935

if base == p1 or base == p2:

1935

return False

1936

return False

1936

return self.issnapshot(base)

1937

return self.issnapshot(base)

1937

1938

def snapshotdepth(self, rev):

1939

def snapshotdepth(self, rev):

1939

"""number of snapshot in the chain before this one"""

1940

"""number of snapshot in the chain before this one"""

1940

if not self.issnapshot(rev):

1941

if not self.issnapshot(rev):

1941

raise error.ProgrammingError(b'revision %d not a snapshot')

1942

raise error.ProgrammingError(b'revision %d not a snapshot')

1942

return len(self._deltachain(rev)[0]) - 1

1943

return len(self._deltachain(rev)[0]) - 1

1943

1944

def revdiff(self, rev1, rev2):

1945

def revdiff(self, rev1, rev2):

1945

"""return or calculate a delta between two revisions

1946

"""return or calculate a delta between two revisions

1946

1947

The delta calculated is in binary form and is intended to be written to

1948

The delta calculated is in binary form and is intended to be written to

1948

revlog data directly. So this function needs raw revision data.

1949

revlog data directly. So this function needs raw revision data.

1949

"""

1950

"""

1950

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1951

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1951

return bytes(self._chunk(rev2))

1952

return bytes(self._chunk(rev2))

1952

1953

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1954

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1954

1955

def revision(self, nodeorrev, _df=None):

1956

def revision(self, nodeorrev, _df=None):

1956

"""return an uncompressed revision of a given node or revision

1957

"""return an uncompressed revision of a given node or revision

1957

number.

1958

number.

1958

1959

_df - an existing file handle to read from. (internal-only)

1960

_df - an existing file handle to read from. (internal-only)

1960

"""

1961

"""

1961

return self._revisiondata(nodeorrev, _df)

1962

return self._revisiondata(nodeorrev, _df)

1962

1963

def sidedata(self, nodeorrev, _df=None):

1964

def sidedata(self, nodeorrev, _df=None):

1964

"""a map of extra data related to the changeset but not part of the hash

1965

"""a map of extra data related to the changeset but not part of the hash

1965

1966

This function currently return a dictionary. However, more advanced

1967

This function currently return a dictionary. However, more advanced

1967

mapping object will likely be used in the future for a more

1968

mapping object will likely be used in the future for a more

1968

efficient/lazy code.

1969

efficient/lazy code.

1969

"""

1970

"""

1970

# deal with <nodeorrev> argument type

1971

# deal with <nodeorrev> argument type

1971

if isinstance(nodeorrev, int):

1972

if isinstance(nodeorrev, int):

1972

rev = nodeorrev

1973

rev = nodeorrev

1973

else:

1974

else:

1974

rev = self.rev(nodeorrev)

1975

rev = self.rev(nodeorrev)

1975

return self._sidedata(rev)

1976

return self._sidedata(rev)

1976

1977

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1978

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1978

# deal with <nodeorrev> argument type

1979

# deal with <nodeorrev> argument type

1979

if isinstance(nodeorrev, int):

1980

if isinstance(nodeorrev, int):

1980

rev = nodeorrev

1981

rev = nodeorrev

1981

node = self.node(rev)

1982

node = self.node(rev)

1982

else:

1983

else:

1983

node = nodeorrev

1984

node = nodeorrev

1984

rev = None

1985

rev = None

1985

1986

# fast path the special `nullid` rev

1987

# fast path the special `nullid` rev

1987

if node == self.nullid:

1988

if node == self.nullid:

1988

return b""

1989

return b""

1989

1990

# ``rawtext`` is the text as stored inside the revlog. Might be the

1991

# ``rawtext`` is the text as stored inside the revlog. Might be the

1991

# revision or might need to be processed to retrieve the revision.

1992

# revision or might need to be processed to retrieve the revision.

1992

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1993

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1993

1994

if raw and validated:

1995

if raw and validated:

1995

# if we don't want to process the raw text and that raw

1996

# if we don't want to process the raw text and that raw

1996

# text is cached, we can exit early.

1997

# text is cached, we can exit early.

1997

return rawtext

1998

return rawtext

1998

if rev is None:

1999

if rev is None:

1999

rev = self.rev(node)

2000

rev = self.rev(node)

2000

# the revlog's flag for this revision

2001

# the revlog's flag for this revision

2001

# (usually alter its state or content)

2002

# (usually alter its state or content)

2002

flags = self.flags(rev)

2003

flags = self.flags(rev)

2003

2004

if validated and flags == REVIDX_DEFAULT_FLAGS:

2005

if validated and flags == REVIDX_DEFAULT_FLAGS:

2005

# no extra flags set, no flag processor runs, text = rawtext

2006

# no extra flags set, no flag processor runs, text = rawtext

2006

return rawtext

2007

return rawtext

2007

2008

if raw:

2009

if raw:

2009

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2010

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2010

text = rawtext

2011

text = rawtext

2011

else:

2012

else:

2012

r = flagutil.processflagsread(self, rawtext, flags)

2013

r = flagutil.processflagsread(self, rawtext, flags)

2013

text, validatehash = r

2014

text, validatehash = r

2014

if validatehash:

2015

if validatehash:

2015

self.checkhash(text, node, rev=rev)

2016

self.checkhash(text, node, rev=rev)

2016

if not validated:

2017

if not validated:

2017

self._revisioncache = (node, rev, rawtext)

2018

self._revisioncache = (node, rev, rawtext)

2018

2019

return text

2020

return text

2020

2021

def _rawtext(self, node, rev, _df=None):

2022

def _rawtext(self, node, rev, _df=None):

2022

"""return the possibly unvalidated rawtext for a revision

2023

"""return the possibly unvalidated rawtext for a revision

2023

2024

returns (rev, rawtext, validated)

2025

returns (rev, rawtext, validated)

2025

"""

2026

"""

2026

2027

# revision in the cache (could be useful to apply delta)

2028

# revision in the cache (could be useful to apply delta)

2028

cachedrev = None

2029

cachedrev = None

2029

# An intermediate text to apply deltas to

2030

# An intermediate text to apply deltas to

2030

basetext = None

2031

basetext = None

2031

2032

# Check if we have the entry in cache

2033

# Check if we have the entry in cache

2033

# The cache entry looks like (node, rev, rawtext)

2034

# The cache entry looks like (node, rev, rawtext)

2034

if self._revisioncache:

2035

if self._revisioncache:

2035

if self._revisioncache[0] == node:

2036

if self._revisioncache[0] == node:

2036

return (rev, self._revisioncache[2], True)

2037

return (rev, self._revisioncache[2], True)

2037

cachedrev = self._revisioncache[1]

2038

cachedrev = self._revisioncache[1]

2038

2039

if rev is None:

2040

if rev is None:

2040

rev = self.rev(node)

2041

rev = self.rev(node)

2041

2042

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2043

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2043

if stopped:

2044

if stopped:

2044

basetext = self._revisioncache[2]

2045

basetext = self._revisioncache[2]

2045

2046

# drop cache to save memory, the caller is expected to

2047

# drop cache to save memory, the caller is expected to

2047

# update self._revisioncache after validating the text

2048

# update self._revisioncache after validating the text

2048

self._revisioncache = None

2049

self._revisioncache = None

2049

2050

targetsize = None

2051

targetsize = None

2051

rawsize = self.index[rev][2]

2052

rawsize = self.index[rev][2]

2052

if 0 <= rawsize:

2053

if 0 <= rawsize:

2053

targetsize = 4 * rawsize

2054

targetsize = 4 * rawsize

2054

2055

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2056

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2056

if basetext is None:

2057

if basetext is None:

2057

basetext = bytes(bins[0])

2058

basetext = bytes(bins[0])

2058

bins = bins[1:]

2059

bins = bins[1:]

2059

2060

rawtext = mdiff.patches(basetext, bins)

2061

rawtext = mdiff.patches(basetext, bins)

2061

del basetext # let us have a chance to free memory early

2062

del basetext # let us have a chance to free memory early

2062

return (rev, rawtext, False)

2063

return (rev, rawtext, False)

2063

2064

def _sidedata(self, rev):

2065

def _sidedata(self, rev):

2065

"""Return the sidedata for a given revision number."""

2066

"""Return the sidedata for a given revision number."""

2066

index_entry = self.index[rev]

2067

index_entry = self.index[rev]

2067

sidedata_offset = index_entry[8]

2068

sidedata_offset = index_entry[8]

2068

sidedata_size = index_entry[9]

2069

sidedata_size = index_entry[9]

2069

2070

if self._inline:

2071

if self._inline:

2071

sidedata_offset += self.index.entry_size * (1 + rev)

2072

sidedata_offset += self.index.entry_size * (1 + rev)

2072

if sidedata_size == 0:

2073

if sidedata_size == 0:

2073

return {}

2074

return {}

2074

2075

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2076

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2076

filename = self._sidedatafile

2077

filename = self._sidedatafile

2077

end = self._docket.sidedata_end

2078

end = self._docket.sidedata_end

2078

offset = sidedata_offset

2079

offset = sidedata_offset

2079

length = sidedata_size

2080

length = sidedata_size

2080

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2081

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2081

raise error.RevlogError(m)

2082

raise error.RevlogError(m)

2082

2083

comp_segment = self._segmentfile_sidedata.read_chunk(

2084

comp_segment = self._segmentfile_sidedata.read_chunk(

2084

sidedata_offset, sidedata_size

2085

sidedata_offset, sidedata_size

2085

)

2086

)

2086

2087

comp = self.index[rev][11]

2088

comp = self.index[rev][11]

2088

if comp == COMP_MODE_PLAIN:

2089

if comp == COMP_MODE_PLAIN:

2089

segment = comp_segment

2090

segment = comp_segment

2090

elif comp == COMP_MODE_DEFAULT:

2091

elif comp == COMP_MODE_DEFAULT:

2091

segment = self._decompressor(comp_segment)

2092

segment = self._decompressor(comp_segment)

2092

elif comp == COMP_MODE_INLINE:

2093

elif comp == COMP_MODE_INLINE:

2093

segment = self.decompress(comp_segment)

2094

segment = self.decompress(comp_segment)

2094

else:

2095

else:

2095

msg = b'unknown compression mode %d'

2096

msg = b'unknown compression mode %d'

2096

msg %= comp

2097

msg %= comp

2097

raise error.RevlogError(msg)

2098

raise error.RevlogError(msg)

2098

2099

sidedata = sidedatautil.deserialize_sidedata(segment)

2100

sidedata = sidedatautil.deserialize_sidedata(segment)

2100

return sidedata

2101

return sidedata

2101

2102

def rawdata(self, nodeorrev, _df=None):

2103

def rawdata(self, nodeorrev, _df=None):

2103

"""return an uncompressed raw data of a given node or revision number.

2104

"""return an uncompressed raw data of a given node or revision number.

2104

2105

_df - an existing file handle to read from. (internal-only)

2106

_df - an existing file handle to read from. (internal-only)

2106

"""

2107

"""

2107

return self._revisiondata(nodeorrev, _df, raw=True)

2108

return self._revisiondata(nodeorrev, _df, raw=True)

2108

2109

def hash(self, text, p1, p2):

2110

def hash(self, text, p1, p2):

2110

"""Compute a node hash.

2111

"""Compute a node hash.

2111

2112

Available as a function so that subclasses can replace the hash

2113

Available as a function so that subclasses can replace the hash

2113

as needed.

2114

as needed.

2114

"""

2115

"""

2115

return storageutil.hashrevisionsha1(text, p1, p2)

2116

return storageutil.hashrevisionsha1(text, p1, p2)

2116

2117

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2118

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2118

"""Check node hash integrity.

2119

"""Check node hash integrity.

2119

2120

Available as a function so that subclasses can extend hash mismatch

2121

Available as a function so that subclasses can extend hash mismatch

2121

behaviors as needed.

2122

behaviors as needed.

2122

"""

2123

"""

2123

try:

2124

try:

2124

if p1 is None and p2 is None:

2125

if p1 is None and p2 is None:

2125

p1, p2 = self.parents(node)

2126

p1, p2 = self.parents(node)

2126

if node != self.hash(text, p1, p2):

2127

if node != self.hash(text, p1, p2):

2127

# Clear the revision cache on hash failure. The revision cache

2128

# Clear the revision cache on hash failure. The revision cache

2128

# only stores the raw revision and clearing the cache does have

2129

# only stores the raw revision and clearing the cache does have

2129

# the side-effect that we won't have a cache hit when the raw

2130

# the side-effect that we won't have a cache hit when the raw

2130

# revision data is accessed. But this case should be rare and

2131

# revision data is accessed. But this case should be rare and

2131

# it is extra work to teach the cache about the hash

2132

# it is extra work to teach the cache about the hash

2132

# verification state.

2133

# verification state.

2133

if self._revisioncache and self._revisioncache[0] == node:

2134

if self._revisioncache and self._revisioncache[0] == node:

2134

self._revisioncache = None

2135

self._revisioncache = None

2135

2136

revornode = rev

2137

revornode = rev

2137

if revornode is None:

2138

if revornode is None:

2138

revornode = templatefilters.short(hex(node))

2139

revornode = templatefilters.short(hex(node))

2139

raise error.RevlogError(

2140

raise error.RevlogError(

2140

_(b"integrity check failed on %s:%s")

2141

_(b"integrity check failed on %s:%s")

2141

% (self.display_id, pycompat.bytestr(revornode))

2142

% (self.display_id, pycompat.bytestr(revornode))

2142

)

2143

)

2143

except error.RevlogError:

2144

except error.RevlogError:

2144

if self._censorable and storageutil.iscensoredtext(text):

2145

if self._censorable and storageutil.iscensoredtext(text):

2145

raise error.CensoredNodeError(self.display_id, node, text)

2146

raise error.CensoredNodeError(self.display_id, node, text)

2146

raise

2147

raise

2147

2148

@property

2149

@property

2149

def _split_index_file(self):

2150

def _split_index_file(self):

2150

"""the path where to expect the index of an ongoing splitting operation

2151

"""the path where to expect the index of an ongoing splitting operation

2151

2152

The file will only exist if a splitting operation is in progress, but

2153

The file will only exist if a splitting operation is in progress, but

2153

it is always expected at the same location."""

2154

it is always expected at the same location."""

2154

parts = self.radix.split(b'/')

2155

parts = self.radix.split(b'/')

2155

if len(parts) > 1:

2156

if len(parts) > 1:

2156

# adds a '-s' prefix to the ``data/` or `meta/` base

2157

# adds a '-s' prefix to the ``data/` or `meta/` base

2157

head = parts[0] + b'-s'

2158

head = parts[0] + b'-s'

2158

mids = parts[1:-1]

2159

mids = parts[1:-1]

2159

tail = parts[-1] + b'.i'

2160

tail = parts[-1] + b'.i'

2160

pieces = [head] + mids + [tail]

2161

pieces = [head] + mids + [tail]

2161

return b'/'.join(pieces)

2162

return b'/'.join(pieces)

2162

else:

2163

else:

2163

# the revlog is stored at the root of the store (changelog or

2164

# the revlog is stored at the root of the store (changelog or

2164

# manifest), no risk of collision.

2165

# manifest), no risk of collision.

2165

return self.radix + b'.i.s'

2166

return self.radix + b'.i.s'

2166

2167

def _enforceinlinesize(self, tr, side_write=True):

2168

def _enforceinlinesize(self, tr, side_write=True):

2168

"""Check if the revlog is too big for inline and convert if so.

2169

"""Check if the revlog is too big for inline and convert if so.

2169

2170

This should be called after revisions are added to the revlog. If the

2171

This should be called after revisions are added to the revlog. If the

2171

revlog has grown too large to be an inline revlog, it will convert it

2172

revlog has grown too large to be an inline revlog, it will convert it

2172

to use multiple index and data files.

2173

to use multiple index and data files.

2173

"""

2174

"""

2174

tiprev = len(self) - 1

2175

tiprev = len(self) - 1

2175

total_size = self.start(tiprev) + self.length(tiprev)

2176

total_size = self.start(tiprev) + self.length(tiprev)

2176

if not self._inline or total_size < _maxinline:

2177

if not self._inline or total_size < _maxinline:

2177

return

2178

return

2178

2179

troffset = tr.findoffset(self._indexfile)

2180

troffset = tr.findoffset(self._indexfile)

2180

if troffset is None:

2181

if troffset is None:

2181

raise error.RevlogError(

2182

raise error.RevlogError(

2182

_(b"%s not found in the transaction") % self._indexfile

2183

_(b"%s not found in the transaction") % self._indexfile

2183

)

2184

)

2184

if troffset:

2185

if troffset:

2185

tr.addbackup(self._indexfile, for_offset=True)

2186

tr.addbackup(self._indexfile, for_offset=True)

2186

tr.add(self._datafile, 0)

2187

tr.add(self._datafile, 0)

2187

2188

existing_handles = False

2189

existing_handles = False

2189

if self._writinghandles is not None:

2190

if self._writinghandles is not None:

2190

existing_handles = True

2191

existing_handles = True

2191

fp = self._writinghandles[0]

2192

fp = self._writinghandles[0]

2192

fp.flush()

2193

fp.flush()

2193

fp.close()

2194

fp.close()

2194

# We can't use the cached file handle after close(). So prevent

2195

# We can't use the cached file handle after close(). So prevent

2195

# its usage.

2196

# its usage.

2196

self._writinghandles = None

2197

self._writinghandles = None

2197

self._segmentfile.writing_handle = None

2198

self._segmentfile.writing_handle = None

2198

# No need to deal with sidedata writing handle as it is only

2199

# No need to deal with sidedata writing handle as it is only

2199

# relevant with revlog-v2 which is never inline, not reaching

2200

# relevant with revlog-v2 which is never inline, not reaching

2200

# this code

2201

# this code

2201

if side_write:

2202

if side_write:

2202

old_index_file_path = self._indexfile

2203

old_index_file_path = self._indexfile

2203

new_index_file_path = self._split_index_file

2204

new_index_file_path = self._split_index_file

2204

opener = self.opener

2205

opener = self.opener

2205

weak_self = weakref.ref(self)

2206

weak_self = weakref.ref(self)

2206

2207

# the "split" index replace the real index when the transaction is finalized

2208

# the "split" index replace the real index when the transaction is finalized

2208

def finalize_callback(tr):

2209

def finalize_callback(tr):

2209

opener.rename(

2210

opener.rename(

2210

new_index_file_path,

2211

new_index_file_path,

2211

old_index_file_path,

2212

old_index_file_path,

2212

checkambig=True,

2213

checkambig=True,

2213

)

2214

)

2214

maybe_self = weak_self()

2215

maybe_self = weak_self()

2215

if maybe_self is not None:

2216

if maybe_self is not None:

2216

maybe_self._indexfile = old_index_file_path

2217

maybe_self._indexfile = old_index_file_path

2217

2218

def abort_callback(tr):

2219

def abort_callback(tr):

2219

maybe_self = weak_self()

2220

maybe_self = weak_self()

2220

if maybe_self is not None:

2221

if maybe_self is not None:

2221

maybe_self._indexfile = old_index_file_path

2222

maybe_self._indexfile = old_index_file_path

2222

2223

tr.registertmp(new_index_file_path)

2224

tr.registertmp(new_index_file_path)

2224

if self.target[1] is not None:

2225

if self.target[1] is not None:

2225

callback_id = b'000-revlog-split-%d-%s' % self.target

2226

callback_id = b'000-revlog-split-%d-%s' % self.target

2226

else:

2227

else:

2227

callback_id = b'000-revlog-split-%d' % self.target[0]

2228

callback_id = b'000-revlog-split-%d' % self.target[0]

2228

tr.addfinalize(callback_id, finalize_callback)

2229

tr.addfinalize(callback_id, finalize_callback)

2229

tr.addabort(callback_id, abort_callback)

2230

tr.addabort(callback_id, abort_callback)

2230

2231

new_dfh = self._datafp(b'w+')

2232

new_dfh = self._datafp(b'w+')

2232

new_dfh.truncate(0) # drop any potentially existing data

2233

new_dfh.truncate(0) # drop any potentially existing data

2233

try:

2234

try:

2234

with self._indexfp() as read_ifh:

2235

with self._indexfp() as read_ifh:

2235

for r in self:

2236

for r in self:

2236

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2237

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2237

new_dfh.flush()

2238

new_dfh.flush()

2238

2239

if side_write:

2240

if side_write:

2240

self._indexfile = new_index_file_path

2241

self._indexfile = new_index_file_path

2241

with self.__index_new_fp() as fp:

2242

with self.__index_new_fp() as fp:

2242

self._format_flags &= ~FLAG_INLINE_DATA

2243

self._format_flags &= ~FLAG_INLINE_DATA

2243

self._inline = False

2244

self._inline = False

2244

for i in self:

2245

for i in self:

2245

e = self.index.entry_binary(i)

2246

e = self.index.entry_binary(i)

2246

if i == 0 and self._docket is None:

2247

if i == 0 and self._docket is None:

2247

header = self._format_flags | self._format_version

2248

header = self._format_flags | self._format_version

2248

header = self.index.pack_header(header)

2249

header = self.index.pack_header(header)

2249

e = header + e

2250

e = header + e

2250

fp.write(e)

2251

fp.write(e)

2251

if self._docket is not None:

2252

if self._docket is not None:

2252

self._docket.index_end = fp.tell()

2253

self._docket.index_end = fp.tell()

2253

2254

# If we don't use side-write, the temp file replace the real

2255

# If we don't use side-write, the temp file replace the real

2255

# index when we exit the context manager

2256

# index when we exit the context manager

2256

2257

nodemaputil.setup_persistent_nodemap(tr, self)

2258

nodemaputil.setup_persistent_nodemap(tr, self)

2258

self._segmentfile = randomaccessfile.randomaccessfile(

2259

self._segmentfile = randomaccessfile.randomaccessfile(

2259

self.opener,

2260

self.opener,

2260

self._datafile,

2261

self._datafile,

2261

self._chunkcachesize,

2262

self._chunkcachesize,

2262

)

2263

)

2263

2264

if existing_handles:

2265

if existing_handles:

2265

# switched from inline to conventional reopen the index

2266

# switched from inline to conventional reopen the index

2266

ifh = self.__index_write_fp()

2267

ifh = self.__index_write_fp()

2267

self._writinghandles = (ifh, new_dfh, None)

2268

self._writinghandles = (ifh, new_dfh, None)

2268

self._segmentfile.writing_handle = new_dfh

2269

self._segmentfile.writing_handle = new_dfh

2269

new_dfh = None

2270

new_dfh = None

2270

# No need to deal with sidedata writing handle as it is only

2271

# No need to deal with sidedata writing handle as it is only

2271

# relevant with revlog-v2 which is never inline, not reaching

2272

# relevant with revlog-v2 which is never inline, not reaching

2272

# this code

2273

# this code

2273

finally:

2274

finally:

2274

if new_dfh is not None:

2275

if new_dfh is not None:

2275

new_dfh.close()

2276

new_dfh.close()

2276

2277

def _nodeduplicatecallback(self, transaction, node):

2278

def _nodeduplicatecallback(self, transaction, node):

2278

"""called when trying to add a node already stored."""

2279

"""called when trying to add a node already stored."""

2279

2280

@contextlib.contextmanager

2281

@contextlib.contextmanager

2281

def reading(self):

2282

def reading(self):

2282

"""Context manager that keeps data and sidedata files open for reading"""

2283

"""Context manager that keeps data and sidedata files open for reading"""

2283

with self._segmentfile.reading():

2284

with self._segmentfile.reading():

2284

with self._segmentfile_sidedata.reading():

2285

with self._segmentfile_sidedata.reading():

2285

yield

2286

yield

2286

2287

@contextlib.contextmanager

2288

@contextlib.contextmanager

2288

def _writing(self, transaction):

2289

def _writing(self, transaction):

2289

if self._trypending:

2290

if self._trypending:

2290

msg = b'try to write in a `trypending` revlog: %s'

2291

msg = b'try to write in a `trypending` revlog: %s'

2291

msg %= self.display_id

2292

msg %= self.display_id

2292

raise error.ProgrammingError(msg)

2293

raise error.ProgrammingError(msg)

2293

if self._writinghandles is not None:

2294

if self._writinghandles is not None:

2294

yield

2295

yield

2295

else:

2296

else:

2296

ifh = dfh = sdfh = None

2297

ifh = dfh = sdfh = None

2297

try:

2298

try:

2298

r = len(self)

2299

r = len(self)

2299

# opening the data file.

2300

# opening the data file.

2300

dsize = 0

2301

dsize = 0

2301

if r:

2302

if r:

2302

dsize = self.end(r - 1)

2303

dsize = self.end(r - 1)

2303

dfh = None

2304

dfh = None

2304

if not self._inline:

2305

if not self._inline:

2305

try:

2306

try:

2306

dfh = self._datafp(b"r+")

2307

dfh = self._datafp(b"r+")

2307

if self._docket is None:

2308

if self._docket is None:

2308

dfh.seek(0, os.SEEK_END)

2309

dfh.seek(0, os.SEEK_END)

2309

else:

2310

else:

2310

dfh.seek(self._docket.data_end, os.SEEK_SET)

2311

dfh.seek(self._docket.data_end, os.SEEK_SET)

2311

except FileNotFoundError:

2312

except FileNotFoundError:

2312

dfh = self._datafp(b"w+")

2313

dfh = self._datafp(b"w+")

2313

transaction.add(self._datafile, dsize)

2314

transaction.add(self._datafile, dsize)

2314

if self._sidedatafile is not None:

2315

if self._sidedatafile is not None:

2315

# revlog-v2 does not inline, help Pytype

2316

# revlog-v2 does not inline, help Pytype

2316

assert dfh is not None

2317

assert dfh is not None

2317

try:

2318

try:

2318

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2319

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2319

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2320

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2320

except FileNotFoundError:

2321

except FileNotFoundError:

2321

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2322

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2322

transaction.add(

2323

transaction.add(

2323

self._sidedatafile, self._docket.sidedata_end

2324

self._sidedatafile, self._docket.sidedata_end

2324

)

2325

)

2325

2326

# opening the index file.

2327

# opening the index file.

2327

isize = r * self.index.entry_size

2328

isize = r * self.index.entry_size

2328

ifh = self.__index_write_fp()

2329

ifh = self.__index_write_fp()

2329

if self._inline:

2330

if self._inline:

2330

transaction.add(self._indexfile, dsize + isize)

2331

transaction.add(self._indexfile, dsize + isize)

2331

else:

2332

else:

2332

transaction.add(self._indexfile, isize)

2333

transaction.add(self._indexfile, isize)

2333

# exposing all file handle for writing.

2334

# exposing all file handle for writing.

2334

self._writinghandles = (ifh, dfh, sdfh)

2335

self._writinghandles = (ifh, dfh, sdfh)

2335

self._segmentfile.writing_handle = ifh if self._inline else dfh

2336

self._segmentfile.writing_handle = ifh if self._inline else dfh

2336

self._segmentfile_sidedata.writing_handle = sdfh

2337

self._segmentfile_sidedata.writing_handle = sdfh

2337

yield

2338

yield

2338

if self._docket is not None:

2339

if self._docket is not None:

2339

self._write_docket(transaction)

2340

self._write_docket(transaction)

2340

finally:

2341

finally:

2341

self._writinghandles = None

2342

self._writinghandles = None

2342

self._segmentfile.writing_handle = None

2343

self._segmentfile.writing_handle = None

2343

self._segmentfile_sidedata.writing_handle = None

2344

self._segmentfile_sidedata.writing_handle = None

2344

if dfh is not None:

2345

if dfh is not None:

2345

dfh.close()

2346

dfh.close()

2346

if sdfh is not None:

2347

if sdfh is not None:

2347

sdfh.close()

2348

sdfh.close()

2348

# closing the index file last to avoid exposing referent to

2349

# closing the index file last to avoid exposing referent to

2349

# potential unflushed data content.

2350

# potential unflushed data content.

2350

if ifh is not None:

2351

if ifh is not None:

2351

ifh.close()

2352

ifh.close()

2352

2353

def _write_docket(self, transaction):

2354

def _write_docket(self, transaction):

2354

"""write the current docket on disk

2355

"""write the current docket on disk

2355

2356

Exist as a method to help changelog to implement transaction logic

2357

Exist as a method to help changelog to implement transaction logic

2357

2358

We could also imagine using the same transaction logic for all revlog

2359

We could also imagine using the same transaction logic for all revlog

2359

since docket are cheap."""

2360

since docket are cheap."""

2360

self._docket.write(transaction)

2361

self._docket.write(transaction)

2361

2362

def addrevision(

2363

def addrevision(

2363

self,

2364

self,

2364

text,

2365

text,

2365

transaction,

2366

transaction,

2366

link,

2367

link,

2367

p1,

2368

p1,

2368

p2,

2369

p2,

2369

cachedelta=None,

2370

cachedelta=None,

2370

node=None,

2371

node=None,

2371

flags=REVIDX_DEFAULT_FLAGS,

2372

flags=REVIDX_DEFAULT_FLAGS,

2372

deltacomputer=None,

2373

deltacomputer=None,

2373

sidedata=None,

2374

sidedata=None,

2374

):

2375

):

2375

"""add a revision to the log

2376

"""add a revision to the log

2376

2377

text - the revision data to add

2378

text - the revision data to add

2378

transaction - the transaction object used for rollback

2379

transaction - the transaction object used for rollback

2379

link - the linkrev data to add

2380

link - the linkrev data to add

2380

p1, p2 - the parent nodeids of the revision

2381

p1, p2 - the parent nodeids of the revision

2381

cachedelta - an optional precomputed delta

2382

cachedelta - an optional precomputed delta

2382

node - nodeid of revision; typically node is not specified, and it is

2383

node - nodeid of revision; typically node is not specified, and it is

2383

computed by default as hash(text, p1, p2), however subclasses might

2384

computed by default as hash(text, p1, p2), however subclasses might

2384

use different hashing method (and override checkhash() in such case)

2385

use different hashing method (and override checkhash() in such case)

2385

flags - the known flags to set on the revision

2386

flags - the known flags to set on the revision

2386

deltacomputer - an optional deltacomputer instance shared between

2387

deltacomputer - an optional deltacomputer instance shared between

2387

multiple calls

2388

multiple calls

2388

"""

2389

"""

2389

if link == nullrev:

2390

if link == nullrev:

2390

raise error.RevlogError(

2391

raise error.RevlogError(

2391

_(b"attempted to add linkrev -1 to %s") % self.display_id

2392

_(b"attempted to add linkrev -1 to %s") % self.display_id

2392

)

2393

)

2393

2394

if sidedata is None:

2395

if sidedata is None:

2395

sidedata = {}

2396

sidedata = {}

2396

elif sidedata and not self.hassidedata:

2397

elif sidedata and not self.hassidedata:

2397

raise error.ProgrammingError(

2398

raise error.ProgrammingError(

2398

_(b"trying to add sidedata to a revlog who don't support them")

2399

_(b"trying to add sidedata to a revlog who don't support them")

2399

)

2400

)

2400

2401

if flags:

2402

if flags:

2402

node = node or self.hash(text, p1, p2)

2403

node = node or self.hash(text, p1, p2)

2403

2404

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2405

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2405

2406

# If the flag processor modifies the revision data, ignore any provided

2407

# If the flag processor modifies the revision data, ignore any provided

2407

# cachedelta.

2408

# cachedelta.

2408

if rawtext != text:

2409

if rawtext != text:

2409

cachedelta = None

2410

cachedelta = None

2410

2411

if len(rawtext) > _maxentrysize:

2412

if len(rawtext) > _maxentrysize:

2412

raise error.RevlogError(

2413

raise error.RevlogError(

2413

_(

2414

_(

2414

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2415

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2415

)

2416

)

2416

% (self.display_id, len(rawtext))

2417

% (self.display_id, len(rawtext))

2417

)

2418

)

2418

2419

node = node or self.hash(rawtext, p1, p2)

2420

node = node or self.hash(rawtext, p1, p2)

2420

rev = self.index.get_rev(node)

2421

rev = self.index.get_rev(node)

2421

if rev is not None:

2422

if rev is not None:

2422

return rev

2423

return rev

2423

2424

if validatehash:

2425

if validatehash:

2425

self.checkhash(rawtext, node, p1=p1, p2=p2)

2426

self.checkhash(rawtext, node, p1=p1, p2=p2)

2426

2427

return self.addrawrevision(

2428

return self.addrawrevision(

2428

rawtext,

2429

rawtext,

2429

transaction,

2430

transaction,

2430

link,

2431

link,

2431

p1,

2432

p1,

2432

p2,

2433

p2,

2433

node,

2434

node,

2434

flags,

2435

flags,

2435

cachedelta=cachedelta,

2436

cachedelta=cachedelta,

2436

deltacomputer=deltacomputer,

2437

deltacomputer=deltacomputer,

2437

sidedata=sidedata,

2438

sidedata=sidedata,

2438

)

2439

)

2439

2440

def addrawrevision(

2441

def addrawrevision(

2441

self,

2442

self,

2442

rawtext,

2443

rawtext,

2443

transaction,

2444

transaction,

2444

link,

2445

link,

2445

p1,

2446

p1,

2446

p2,

2447

p2,

2447

node,

2448

node,

2448

flags,

2449

flags,

2449

cachedelta=None,

2450

cachedelta=None,

2450

deltacomputer=None,

2451

deltacomputer=None,

2451

sidedata=None,

2452

sidedata=None,

2452

):

2453

):

2453

"""add a raw revision with known flags, node and parents

2454

"""add a raw revision with known flags, node and parents

2454

useful when reusing a revision not stored in this revlog (ex: received

2455

useful when reusing a revision not stored in this revlog (ex: received

2455

over wire, or read from an external bundle).

2456

over wire, or read from an external bundle).

2456

"""

2457

"""

2457

with self._writing(transaction):

2458

with self._writing(transaction):

2458

return self._addrevision(

2459

return self._addrevision(

2459

node,

2460

node,

2460

rawtext,

2461

rawtext,

2461

transaction,

2462

transaction,

2462

link,

2463

link,

2463

p1,

2464

p1,

2464

p2,

2465

p2,

2465

flags,

2466

flags,

2466

cachedelta,

2467

cachedelta,

2467

deltacomputer=deltacomputer,

2468

deltacomputer=deltacomputer,

2468

sidedata=sidedata,

2469

sidedata=sidedata,

2469

)

2470

)

2470

2471

def compress(self, data):

2472

def compress(self, data):

2472

"""Generate a possibly-compressed representation of data."""

2473

"""Generate a possibly-compressed representation of data."""

2473

if not data:

2474

if not data:

2474

return b'', data

2475

return b'', data

2475

2476

compressed = self._compressor.compress(data)

2477

compressed = self._compressor.compress(data)

2477

2478

if compressed:

2479

if compressed:

2479

# The revlog compressor added the header in the returned data.

2480

# The revlog compressor added the header in the returned data.

2480

return b'', compressed

2481

return b'', compressed

2481

2482

if data[0:1] == b'\0':

2483

if data[0:1] == b'\0':

2483

return b'', data

2484

return b'', data

2484

return b'u', data

2485

return b'u', data

2485

2486

def decompress(self, data):

2487

def decompress(self, data):

2487

"""Decompress a revlog chunk.

2488

"""Decompress a revlog chunk.

2488

2489

The chunk is expected to begin with a header identifying the

2490

The chunk is expected to begin with a header identifying the

2490

format type so it can be routed to an appropriate decompressor.

2491

format type so it can be routed to an appropriate decompressor.

2491

"""

2492

"""

2492

if not data:

2493

if not data:

2493

return data

2494

return data

2494

2495

# Revlogs are read much more frequently than they are written and many

2496

# Revlogs are read much more frequently than they are written and many

2496

# chunks only take microseconds to decompress, so performance is

2497

# chunks only take microseconds to decompress, so performance is

2497

# important here.

2498

# important here.

2498

#

2499

#

2499

# We can make a few assumptions about revlogs:

2500

# We can make a few assumptions about revlogs:

2500

#

2501

#

2501

# 1) the majority of chunks will be compressed (as opposed to inline

2502

# 1) the majority of chunks will be compressed (as opposed to inline

2502

# raw data).

2503

# raw data).

2503

# 2) decompressing *any* data will likely by at least 10x slower than

2504

# 2) decompressing *any* data will likely by at least 10x slower than

2504

# returning raw inline data.

2505

# returning raw inline data.

2505

# 3) we want to prioritize common and officially supported compression

2506

# 3) we want to prioritize common and officially supported compression

2506

# engines

2507

# engines

2507

#

2508

#

2508

# It follows that we want to optimize for "decompress compressed data

2509

# It follows that we want to optimize for "decompress compressed data

2509

# when encoded with common and officially supported compression engines"

2510

# when encoded with common and officially supported compression engines"

2510

# case over "raw data" and "data encoded by less common or non-official

2511

# case over "raw data" and "data encoded by less common or non-official

2511

# compression engines." That is why we have the inline lookup first

2512

# compression engines." That is why we have the inline lookup first

2512

# followed by the compengines lookup.

2513

# followed by the compengines lookup.

2513

#

2514

#

2514

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2515

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2515

# compressed chunks. And this matters for changelog and manifest reads.

2516

# compressed chunks. And this matters for changelog and manifest reads.

2516

t = data[0:1]

2517

t = data[0:1]

2517

2518

if t == b'x':

2519

if t == b'x':

2519

try:

2520

try:

2520

return _zlibdecompress(data)

2521

return _zlibdecompress(data)

2521

except zlib.error as e:

2522

except zlib.error as e:

2522

raise error.RevlogError(

2523

raise error.RevlogError(

2523

_(b'revlog decompress error: %s')

2524

_(b'revlog decompress error: %s')

2524

% stringutil.forcebytestr(e)

2525

% stringutil.forcebytestr(e)

2525

)

2526

)

2526

# '\0' is more common than 'u' so it goes first.

2527

# '\0' is more common than 'u' so it goes first.

2527

elif t == b'\0':

2528

elif t == b'\0':

2528

return data

2529

return data

2529

elif t == b'u':

2530

elif t == b'u':

2530

return util.buffer(data, 1)

2531

return util.buffer(data, 1)

2531

2532

compressor = self._get_decompressor(t)

2533

compressor = self._get_decompressor(t)

2533

2534

return compressor.decompress(data)

2535

return compressor.decompress(data)

2535

2536

def _addrevision(

2537

def _addrevision(

2537

self,

2538

self,

2538

node,

2539

node,

2539

rawtext,

2540

rawtext,

2540

transaction,

2541

transaction,

2541

link,

2542

link,

2542

p1,

2543

p1,

2543

p2,

2544

p2,

2544

flags,

2545

flags,

2545

cachedelta,

2546

cachedelta,

2546

alwayscache=False,

2547

alwayscache=False,

2547

deltacomputer=None,

2548

deltacomputer=None,

2548

sidedata=None,

2549

sidedata=None,

2549

):

2550

):

2550

"""internal function to add revisions to the log

2551

"""internal function to add revisions to the log

2551

2552

see addrevision for argument descriptions.

2553

see addrevision for argument descriptions.

2553

2554

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2555

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2555

2556

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2557

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2557

be used.

2558

be used.

2558

2559

invariants:

2560

invariants:

2560

- rawtext is optional (can be None); if not set, cachedelta must be set.

2561

- rawtext is optional (can be None); if not set, cachedelta must be set.

2561

if both are set, they must correspond to each other.

2562

if both are set, they must correspond to each other.

2562

"""

2563

"""

2563

if node == self.nullid:

2564

if node == self.nullid:

2564

raise error.RevlogError(

2565

raise error.RevlogError(

2565

_(b"%s: attempt to add null revision") % self.display_id

2566

_(b"%s: attempt to add null revision") % self.display_id

2566

)

2567

)

2567

if (

2568

if (

2568

node == self.nodeconstants.wdirid

2569

node == self.nodeconstants.wdirid

2569

or node in self.nodeconstants.wdirfilenodeids

2570

or node in self.nodeconstants.wdirfilenodeids

2570

):

2571

):

2571

raise error.RevlogError(

2572

raise error.RevlogError(

2572

_(b"%s: attempt to add wdir revision") % self.display_id

2573

_(b"%s: attempt to add wdir revision") % self.display_id

2573

)

2574

)

2574

if self._writinghandles is None:

2575

if self._writinghandles is None:

2575

msg = b'adding revision outside `revlog._writing` context'

2576

msg = b'adding revision outside `revlog._writing` context'

2576

raise error.ProgrammingError(msg)

2577

raise error.ProgrammingError(msg)

2577

2578

if self._inline:

2579

if self._inline:

2579

fh = self._writinghandles[0]

2580

fh = self._writinghandles[0]

2580

else:

2581

else:

2581

fh = self._writinghandles[1]

2582

fh = self._writinghandles[1]

2582

2583

btext = [rawtext]

2584

btext = [rawtext]

2584

2585

curr = len(self)

2586

curr = len(self)

2586

prev = curr - 1

2587

prev = curr - 1

2587

2588

offset = self._get_data_offset(prev)

2589

offset = self._get_data_offset(prev)

2589

2590

if self._concurrencychecker:

2591

if self._concurrencychecker:

2591

ifh, dfh, sdfh = self._writinghandles

2592

ifh, dfh, sdfh = self._writinghandles

2592

# XXX no checking for the sidedata file

2593

# XXX no checking for the sidedata file

2593

if self._inline:

2594

if self._inline:

2594

# offset is "as if" it were in the .d file, so we need to add on

2595

# offset is "as if" it were in the .d file, so we need to add on

2595

# the size of the entry metadata.

2596

# the size of the entry metadata.

2596

self._concurrencychecker(

2597

self._concurrencychecker(

2597

ifh, self._indexfile, offset + curr * self.index.entry_size

2598

ifh, self._indexfile, offset + curr * self.index.entry_size

2598

)

2599

)

2599

else:

2600

else:

2600

# Entries in the .i are a consistent size.

2601

# Entries in the .i are a consistent size.

2601

self._concurrencychecker(

2602

self._concurrencychecker(

2602

ifh, self._indexfile, curr * self.index.entry_size

2603

ifh, self._indexfile, curr * self.index.entry_size

2603

)

2604

)

2604

self._concurrencychecker(dfh, self._datafile, offset)

2605

self._concurrencychecker(dfh, self._datafile, offset)

2605

2606

p1r, p2r = self.rev(p1), self.rev(p2)

2607

p1r, p2r = self.rev(p1), self.rev(p2)

2607

2608

# full versions are inserted when the needed deltas

2609

# full versions are inserted when the needed deltas

2609

# become comparable to the uncompressed text

2610

# become comparable to the uncompressed text

2610

if rawtext is None:

2611

if rawtext is None:

2611

# need rawtext size, before changed by flag processors, which is

2612

# need rawtext size, before changed by flag processors, which is

2612

# the non-raw size. use revlog explicitly to avoid filelog's extra

2613

# the non-raw size. use revlog explicitly to avoid filelog's extra

2613

# logic that might remove metadata size.

2614

# logic that might remove metadata size.

2614

textlen = mdiff.patchedsize(

2615

textlen = mdiff.patchedsize(

2615

revlog.size(self, cachedelta[0]), cachedelta[1]

2616

revlog.size(self, cachedelta[0]), cachedelta[1]

2616

)

2617

)

2617

else:

2618

else:

2618

textlen = len(rawtext)

2619

textlen = len(rawtext)

2619

2620

if deltacomputer is None:

2621

if deltacomputer is None:

2621

write_debug = None

2622

write_debug = None

2622

if self._debug_delta:

2623

if self._debug_delta:

2623

write_debug = transaction._report

2624

write_debug = transaction._report

2624

deltacomputer = deltautil.deltacomputer(

2625

deltacomputer = deltautil.deltacomputer(

2625

self, write_debug=write_debug

2626

self, write_debug=write_debug

2626

)

2627

)

2627

2628

if cachedelta is not None and len(cachedelta) == 2:

2629

if cachedelta is not None and len(cachedelta) == 2:

2629

# If the cached delta has no information about how it should be

2630

# If the cached delta has no information about how it should be

2630

# reused, add the default reuse instruction according to the

2631

# reused, add the default reuse instruction according to the

2631

# revlog's configuration.

2632

# revlog's configuration.

2632

if self._generaldelta and self._lazydeltabase:

2633

if self._generaldelta and self._lazydeltabase:

2633

delta_base_reuse = DELTA_BASE_REUSE_TRY

2634

delta_base_reuse = DELTA_BASE_REUSE_TRY

2634

else:

2635

else:

2635

delta_base_reuse = DELTA_BASE_REUSE_NO

2636

delta_base_reuse = DELTA_BASE_REUSE_NO

2636

cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)

2637

cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)

2637

2638

revinfo = revlogutils.revisioninfo(

2639

revinfo = revlogutils.revisioninfo(

2639

node,

2640

node,

2640

p1,

2641

p1,

2641

p2,

2642

p2,

2642

btext,

2643

btext,

2643

textlen,

2644

textlen,

2644

cachedelta,

2645

cachedelta,

2645

flags,

2646

flags,

2646

)

2647

)

2647

2648

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2649

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2649

2650

compression_mode = COMP_MODE_INLINE

2651

compression_mode = COMP_MODE_INLINE

2651

if self._docket is not None:

2652

if self._docket is not None:

2652

default_comp = self._docket.default_compression_header

2653

default_comp = self._docket.default_compression_header

2653

r = deltautil.delta_compression(default_comp, deltainfo)

2654

r = deltautil.delta_compression(default_comp, deltainfo)

2654

compression_mode, deltainfo = r

2655

compression_mode, deltainfo = r

2655

2656

sidedata_compression_mode = COMP_MODE_INLINE

2657

sidedata_compression_mode = COMP_MODE_INLINE

2657

if sidedata and self.hassidedata:

2658

if sidedata and self.hassidedata:

2658

sidedata_compression_mode = COMP_MODE_PLAIN

2659

sidedata_compression_mode = COMP_MODE_PLAIN

2659

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2660

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2660

sidedata_offset = self._docket.sidedata_end

2661

sidedata_offset = self._docket.sidedata_end

2661

h, comp_sidedata = self.compress(serialized_sidedata)

2662

h, comp_sidedata = self.compress(serialized_sidedata)

2662

if (

2663

if (

2663

h != b'u'

2664

h != b'u'

2664

and comp_sidedata[0:1] != b'\0'

2665

and comp_sidedata[0:1] != b'\0'

2665

and len(comp_sidedata) < len(serialized_sidedata)

2666

and len(comp_sidedata) < len(serialized_sidedata)

2666

):

2667

):

2667

assert not h

2668

assert not h

2668

if (

2669

if (

2669

comp_sidedata[0:1]

2670

comp_sidedata[0:1]

2670

== self._docket.default_compression_header

2671

== self._docket.default_compression_header

2671

):

2672

):

2672

sidedata_compression_mode = COMP_MODE_DEFAULT

2673

sidedata_compression_mode = COMP_MODE_DEFAULT

2673

serialized_sidedata = comp_sidedata

2674

serialized_sidedata = comp_sidedata

2674

else:

2675

else:

2675

sidedata_compression_mode = COMP_MODE_INLINE

2676

sidedata_compression_mode = COMP_MODE_INLINE

2676

serialized_sidedata = comp_sidedata

2677

serialized_sidedata = comp_sidedata

2677

else:

2678

else:

2678

serialized_sidedata = b""

2679

serialized_sidedata = b""

2679

# Don't store the offset if the sidedata is empty, that way

2680

# Don't store the offset if the sidedata is empty, that way

2680

# we can easily detect empty sidedata and they will be no different

2681

# we can easily detect empty sidedata and they will be no different

2681

# than ones we manually add.

2682

# than ones we manually add.

2682

sidedata_offset = 0

2683

sidedata_offset = 0

2683

2684

rank = RANK_UNKNOWN

2685

rank = RANK_UNKNOWN

2685

if self._compute_rank:

2686

if self._compute_rank:

2686

if (p1r, p2r) == (nullrev, nullrev):

2687

if (p1r, p2r) == (nullrev, nullrev):

2687

rank = 1

2688

rank = 1

2688

elif p1r != nullrev and p2r == nullrev:

2689

elif p1r != nullrev and p2r == nullrev:

2689

rank = 1 + self.fast_rank(p1r)

2690

rank = 1 + self.fast_rank(p1r)

2690

elif p1r == nullrev and p2r != nullrev:

2691

elif p1r == nullrev and p2r != nullrev:

2691

rank = 1 + self.fast_rank(p2r)

2692

rank = 1 + self.fast_rank(p2r)

2692

else: # merge node

2693

else: # merge node

2693

if rustdagop is not None and self.index.rust_ext_compat:

2694

if rustdagop is not None and self.index.rust_ext_compat:

2694

rank = rustdagop.rank(self.index, p1r, p2r)

2695

rank = rustdagop.rank(self.index, p1r, p2r)

2695

else:

2696

else:

2696

pmin, pmax = sorted((p1r, p2r))

2697

pmin, pmax = sorted((p1r, p2r))

2697

rank = 1 + self.fast_rank(pmax)

2698

rank = 1 + self.fast_rank(pmax)

2698

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2699

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2699

2700

e = revlogutils.entry(

2701

e = revlogutils.entry(

2701

flags=flags,

2702

flags=flags,

2702

data_offset=offset,

2703

data_offset=offset,

2703

data_compressed_length=deltainfo.deltalen,

2704

data_compressed_length=deltainfo.deltalen,

2704

data_uncompressed_length=textlen,

2705

data_uncompressed_length=textlen,

2705

data_compression_mode=compression_mode,

2706

data_compression_mode=compression_mode,

2706

data_delta_base=deltainfo.base,

2707

data_delta_base=deltainfo.base,

2707

link_rev=link,

2708

link_rev=link,

2708

parent_rev_1=p1r,

2709

parent_rev_1=p1r,

2709

parent_rev_2=p2r,

2710

parent_rev_2=p2r,

2710

node_id=node,

2711

node_id=node,

2711

sidedata_offset=sidedata_offset,

2712

sidedata_offset=sidedata_offset,

2712

sidedata_compressed_length=len(serialized_sidedata),

2713

sidedata_compressed_length=len(serialized_sidedata),

2713

sidedata_compression_mode=sidedata_compression_mode,

2714

sidedata_compression_mode=sidedata_compression_mode,

2714

rank=rank,

2715

rank=rank,

2715

)

2716

)

2716

2717

self.index.append(e)

2718

self.index.append(e)

2718

entry = self.index.entry_binary(curr)

2719

entry = self.index.entry_binary(curr)

2719

if curr == 0 and self._docket is None:

2720

if curr == 0 and self._docket is None:

2720

header = self._format_flags | self._format_version

2721

header = self._format_flags | self._format_version

2721

header = self.index.pack_header(header)

2722

header = self.index.pack_header(header)

2722

entry = header + entry

2723

entry = header + entry

2723

self._writeentry(

2724

self._writeentry(

2724

transaction,

2725

transaction,

2725

entry,

2726

entry,

2726

deltainfo.data,

2727

deltainfo.data,

2727

link,

2728

link,

2728

offset,

2729

offset,

2729

serialized_sidedata,

2730

serialized_sidedata,

2730

sidedata_offset,

2731

sidedata_offset,

2731

)

2732

)

2732

2733

rawtext = btext[0]

2734

rawtext = btext[0]

2734

2735

if alwayscache and rawtext is None:

2736

if alwayscache and rawtext is None:

2736

rawtext = deltacomputer.buildtext(revinfo, fh)

2737

rawtext = deltacomputer.buildtext(revinfo, fh)

2737

2738

if type(rawtext) == bytes: # only accept immutable objects

2739

if type(rawtext) == bytes: # only accept immutable objects

2739

self._revisioncache = (node, curr, rawtext)

2740

self._revisioncache = (node, curr, rawtext)

2740

self._chainbasecache[curr] = deltainfo.chainbase

2741

self._chainbasecache[curr] = deltainfo.chainbase

2741

return curr

2742

return curr

2742

2743

def _get_data_offset(self, prev):

2744

def _get_data_offset(self, prev):

2744

"""Returns the current offset in the (in-transaction) data file.

2745

"""Returns the current offset in the (in-transaction) data file.

2745

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2746

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2746

file to store that information: since sidedata can be rewritten to the

2747

file to store that information: since sidedata can be rewritten to the

2747

end of the data file within a transaction, you can have cases where, for

2748

end of the data file within a transaction, you can have cases where, for

2748

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2749

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2749

to `n - 1`'s sidedata being written after `n`'s data.

2750

to `n - 1`'s sidedata being written after `n`'s data.

2750

2751

TODO cache this in a docket file before getting out of experimental."""

2752

TODO cache this in a docket file before getting out of experimental."""

2752

if self._docket is None:

2753

if self._docket is None:

2753

return self.end(prev)

2754

return self.end(prev)

2754

else:

2755

else:

2755

return self._docket.data_end

2756

return self._docket.data_end

2756

2757

def _writeentry(

2758

def _writeentry(

2758

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2759

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2759

):

2760

):

2760

# Files opened in a+ mode have inconsistent behavior on various

2761

# Files opened in a+ mode have inconsistent behavior on various

2761

# platforms. Windows requires that a file positioning call be made

2762

# platforms. Windows requires that a file positioning call be made

2762

# when the file handle transitions between reads and writes. See

2763

# when the file handle transitions between reads and writes. See

2763

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2764

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2764

# platforms, Python or the platform itself can be buggy. Some versions

2765

# platforms, Python or the platform itself can be buggy. Some versions

2765

# of Solaris have been observed to not append at the end of the file

2766

# of Solaris have been observed to not append at the end of the file

2766

# if the file was seeked to before the end. See issue4943 for more.

2767

# if the file was seeked to before the end. See issue4943 for more.

2767

#

2768

#

2768

# We work around this issue by inserting a seek() before writing.

2769

# We work around this issue by inserting a seek() before writing.

2769

# Note: This is likely not necessary on Python 3. However, because

2770

# Note: This is likely not necessary on Python 3. However, because

2770

# the file handle is reused for reads and may be seeked there, we need

2771

# the file handle is reused for reads and may be seeked there, we need

2771

# to be careful before changing this.

2772

# to be careful before changing this.

2772

if self._writinghandles is None:

2773

if self._writinghandles is None:

2773

msg = b'adding revision outside `revlog._writing` context'

2774

msg = b'adding revision outside `revlog._writing` context'

2774

raise error.ProgrammingError(msg)

2775

raise error.ProgrammingError(msg)

2775

ifh, dfh, sdfh = self._writinghandles

2776

ifh, dfh, sdfh = self._writinghandles

2776

if self._docket is None:

2777

if self._docket is None:

2777

ifh.seek(0, os.SEEK_END)

2778

ifh.seek(0, os.SEEK_END)

2778

else:

2779

else:

2779

ifh.seek(self._docket.index_end, os.SEEK_SET)

2780

ifh.seek(self._docket.index_end, os.SEEK_SET)

2780

if dfh:

2781

if dfh:

2781

if self._docket is None:

2782

if self._docket is None:

2782

dfh.seek(0, os.SEEK_END)

2783

dfh.seek(0, os.SEEK_END)

2783

else:

2784

else:

2784

dfh.seek(self._docket.data_end, os.SEEK_SET)

2785

dfh.seek(self._docket.data_end, os.SEEK_SET)

2785

if sdfh:

2786

if sdfh:

2786

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2787

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2787

2788

curr = len(self) - 1

2789

curr = len(self) - 1

2789

if not self._inline:

2790

if not self._inline:

2790

transaction.add(self._datafile, offset)

2791

transaction.add(self._datafile, offset)

2791

if self._sidedatafile:

2792

if self._sidedatafile:

2792

transaction.add(self._sidedatafile, sidedata_offset)

2793

transaction.add(self._sidedatafile, sidedata_offset)

2793

transaction.add(self._indexfile, curr * len(entry))

2794

transaction.add(self._indexfile, curr * len(entry))

2794

if data[0]:

2795

if data[0]:

2795

dfh.write(data[0])

2796

dfh.write(data[0])

2796

dfh.write(data[1])

2797

dfh.write(data[1])

2797

if sidedata:

2798

if sidedata:

2798

sdfh.write(sidedata)

2799

sdfh.write(sidedata)

2799

ifh.write(entry)

2800

ifh.write(entry)

2800

else:

2801

else:

2801

offset += curr * self.index.entry_size

2802

offset += curr * self.index.entry_size

2802

transaction.add(self._indexfile, offset)

2803

transaction.add(self._indexfile, offset)

2803

ifh.write(entry)

2804

ifh.write(entry)

2804

ifh.write(data[0])

2805

ifh.write(data[0])

2805

ifh.write(data[1])

2806

ifh.write(data[1])

2806

assert not sidedata

2807

assert not sidedata

2807

self._enforceinlinesize(transaction)

2808

self._enforceinlinesize(transaction)

2808

if self._docket is not None:

2809

if self._docket is not None:

2809

# revlog-v2 always has 3 writing handles, help Pytype

2810

# revlog-v2 always has 3 writing handles, help Pytype

2810

wh1 = self._writinghandles[0]

2811

wh1 = self._writinghandles[0]

2811

wh2 = self._writinghandles[1]

2812

wh2 = self._writinghandles[1]

2812

wh3 = self._writinghandles[2]

2813

wh3 = self._writinghandles[2]

2813

assert wh1 is not None

2814

assert wh1 is not None

2814

assert wh2 is not None

2815

assert wh2 is not None

2815

assert wh3 is not None

2816

assert wh3 is not None

2816

self._docket.index_end = wh1.tell()

2817

self._docket.index_end = wh1.tell()

2817

self._docket.data_end = wh2.tell()

2818

self._docket.data_end = wh2.tell()

2818

self._docket.sidedata_end = wh3.tell()

2819

self._docket.sidedata_end = wh3.tell()

2819

2820

nodemaputil.setup_persistent_nodemap(transaction, self)

2821

nodemaputil.setup_persistent_nodemap(transaction, self)

2821

2822

def addgroup(

2823

def addgroup(

2823

self,

2824

self,

2824

deltas,

2825

deltas,

2825

linkmapper,

2826

linkmapper,

2826

transaction,

2827

transaction,

2827

alwayscache=False,

2828

alwayscache=False,

2828

addrevisioncb=None,

2829

addrevisioncb=None,

2829

duplicaterevisioncb=None,

2830

duplicaterevisioncb=None,

2830

debug_info=None,

2831

debug_info=None,

2831

delta_base_reuse_policy=None,

2832

delta_base_reuse_policy=None,

2832

):

2833

):

2833

"""

2834

"""

2834

add a delta group

2835

add a delta group

2835

2836

given a set of deltas, add them to the revision log. the

2837

given a set of deltas, add them to the revision log. the

2837

first delta is against its parent, which should be in our

2838

first delta is against its parent, which should be in our

2838

log, the rest are against the previous delta.

2839

log, the rest are against the previous delta.

2839

2840

If ``addrevisioncb`` is defined, it will be called with arguments of

2841

If ``addrevisioncb`` is defined, it will be called with arguments of

2841

this revlog and the node that was added.

2842

this revlog and the node that was added.

2842

"""

2843

"""

2843

2844

if self._adding_group:

2845

if self._adding_group:

2845

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2846

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2846

2847

# read the default delta-base reuse policy from revlog config if the

2848

# read the default delta-base reuse policy from revlog config if the

2848

# group did not specify one.

2849

# group did not specify one.

2849

if delta_base_reuse_policy is None:

2850

if delta_base_reuse_policy is None:

2850

if self._generaldelta and self._lazydeltabase:

2851

if self._generaldelta and self._lazydeltabase:

2851

delta_base_reuse_policy = DELTA_BASE_REUSE_TRY

2852

delta_base_reuse_policy = DELTA_BASE_REUSE_TRY

2852

else:

2853

else:

2853

delta_base_reuse_policy = DELTA_BASE_REUSE_NO

2854

delta_base_reuse_policy = DELTA_BASE_REUSE_NO

2854

2855

self._adding_group = True

2856

self._adding_group = True

2856

empty = True

2857

empty = True

2857

try:

2858

try:

2858

with self._writing(transaction):

2859

with self._writing(transaction):

2859

write_debug = None

2860

write_debug = None

2860

if self._debug_delta:

2861

if self._debug_delta:

2861

write_debug = transaction._report

2862

write_debug = transaction._report

2862

deltacomputer = deltautil.deltacomputer(

2863

deltacomputer = deltautil.deltacomputer(

2863

self,

2864

self,

2864

write_debug=write_debug,

2865

write_debug=write_debug,

2865

debug_info=debug_info,

2866

debug_info=debug_info,

2866

)

2867

)

2867

# loop through our set of deltas

2868

# loop through our set of deltas

2868

for data in deltas:

2869

for data in deltas:

2869

(

2870

(

2870

node,

2871

node,

2871

p1,

2872

p1,

2872

p2,

2873

p2,

2873

linknode,

2874

linknode,

2874

deltabase,

2875

deltabase,

2875

delta,

2876

delta,

2876

flags,

2877

flags,

2877

sidedata,

2878

sidedata,

2878

) = data

2879

) = data

2879

link = linkmapper(linknode)

2880

link = linkmapper(linknode)

2880

flags = flags or REVIDX_DEFAULT_FLAGS

2881

flags = flags or REVIDX_DEFAULT_FLAGS

2881

2882

rev = self.index.get_rev(node)

2883

rev = self.index.get_rev(node)

2883

if rev is not None:

2884

if rev is not None:

2884

# this can happen if two branches make the same change

2885

# this can happen if two branches make the same change

2885

self._nodeduplicatecallback(transaction, rev)

2886

self._nodeduplicatecallback(transaction, rev)

2886

if duplicaterevisioncb:

2887

if duplicaterevisioncb:

2887

duplicaterevisioncb(self, rev)

2888

duplicaterevisioncb(self, rev)

2888

empty = False

2889

empty = False

2889

continue

2890

continue

2890

2891

for p in (p1, p2):

2892

for p in (p1, p2):

2892

if not self.index.has_node(p):

2893

if not self.index.has_node(p):

2893

raise error.LookupError(

2894

raise error.LookupError(

2894

p, self.radix, _(b'unknown parent')

2895

p, self.radix, _(b'unknown parent')

2895

)

2896

)

2896

2897

if not self.index.has_node(deltabase):

2898

if not self.index.has_node(deltabase):

2898

raise error.LookupError(

2899

raise error.LookupError(

2899

deltabase, self.display_id, _(b'unknown delta base')

2900

deltabase, self.display_id, _(b'unknown delta base')

2900

)

2901

)

2901

2902

baserev = self.rev(deltabase)

2903

baserev = self.rev(deltabase)

2903

2904

if baserev != nullrev and self.iscensored(baserev):

2905

if baserev != nullrev and self.iscensored(baserev):

2905

# if base is censored, delta must be full replacement in a

2906

# if base is censored, delta must be full replacement in a

2906

# single patch operation

2907

# single patch operation

2907

hlen = struct.calcsize(b">lll")

2908

hlen = struct.calcsize(b">lll")

2908

oldlen = self.rawsize(baserev)

2909

oldlen = self.rawsize(baserev)

2909

newlen = len(delta) - hlen

2910

newlen = len(delta) - hlen

2910

if delta[:hlen] != mdiff.replacediffheader(

2911

if delta[:hlen] != mdiff.replacediffheader(

2911

oldlen, newlen

2912

oldlen, newlen

2912

):

2913

):

2913

raise error.CensoredBaseError(

2914

raise error.CensoredBaseError(

2914

self.display_id, self.node(baserev)

2915

self.display_id, self.node(baserev)

2915

)

2916

)

2916

2917

if not flags and self._peek_iscensored(baserev, delta):

2918

if not flags and self._peek_iscensored(baserev, delta):

2918

flags |= REVIDX_ISCENSORED

2919

flags |= REVIDX_ISCENSORED

2919

2920

# We assume consumers of addrevisioncb will want to retrieve

2921

# We assume consumers of addrevisioncb will want to retrieve

2921

# the added revision, which will require a call to

2922

# the added revision, which will require a call to

2922

# revision(). revision() will fast path if there is a cache

2923

# revision(). revision() will fast path if there is a cache

2923

# hit. So, we tell _addrevision() to always cache in this case.

2924

# hit. So, we tell _addrevision() to always cache in this case.

2924

# We're only using addgroup() in the context of changegroup

2925

# We're only using addgroup() in the context of changegroup

2925

# generation so the revision data can always be handled as raw

2926

# generation so the revision data can always be handled as raw

2926

# by the flagprocessor.

2927

# by the flagprocessor.

2927

rev = self._addrevision(

2928

rev = self._addrevision(

2928

node,

2929

node,

2929

None,

2930

None,

2930

transaction,

2931

transaction,

2931

link,

2932

link,

2932

p1,

2933

p1,

2933

p2,

2934

p2,

2934

flags,

2935

flags,

2935

(baserev, delta, delta_base_reuse_policy),

2936

(baserev, delta, delta_base_reuse_policy),

2936

alwayscache=alwayscache,

2937

alwayscache=alwayscache,

2937

deltacomputer=deltacomputer,

2938

deltacomputer=deltacomputer,

2938

sidedata=sidedata,

2939

sidedata=sidedata,

2939

)

2940

)

2940

2941

if addrevisioncb:

2942

if addrevisioncb:

2942

addrevisioncb(self, rev)

2943

addrevisioncb(self, rev)

2943

empty = False

2944

empty = False

2944

finally:

2945

finally:

2945

self._adding_group = False

2946

self._adding_group = False

2946

return not empty

2947

return not empty

2947

2948

def iscensored(self, rev):

2949

def iscensored(self, rev):

2949

"""Check if a file revision is censored."""

2950

"""Check if a file revision is censored."""

2950

if not self._censorable:

2951

if not self._censorable:

2951

return False

2952

return False

2952

2953

return self.flags(rev) & REVIDX_ISCENSORED

2954

return self.flags(rev) & REVIDX_ISCENSORED

2954

2955

def _peek_iscensored(self, baserev, delta):

2956

def _peek_iscensored(self, baserev, delta):

2956

"""Quickly check if a delta produces a censored revision."""

2957

"""Quickly check if a delta produces a censored revision."""

2957

if not self._censorable:

2958

if not self._censorable:

2958

return False

2959

return False

2959

2960

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2961

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2961

2962

def getstrippoint(self, minlink):

2963

def getstrippoint(self, minlink):

2963

"""find the minimum rev that must be stripped to strip the linkrev

2964

"""find the minimum rev that must be stripped to strip the linkrev

2964

2965

Returns a tuple containing the minimum rev and a set of all revs that

2966

Returns a tuple containing the minimum rev and a set of all revs that

2966

have linkrevs that will be broken by this strip.

2967

have linkrevs that will be broken by this strip.

2967

"""

2968

"""

2968

return storageutil.resolvestripinfo(

2969

return storageutil.resolvestripinfo(

2969

minlink,

2970

minlink,

2970

len(self) - 1,

2971

len(self) - 1,

2971

self.headrevs(),

2972

self.headrevs(),

2972

self.linkrev,

2973

self.linkrev,

2973

self.parentrevs,

2974

self.parentrevs,

2974

)

2975

)

2975

2976

def strip(self, minlink, transaction):

2977

def strip(self, minlink, transaction):

2977

"""truncate the revlog on the first revision with a linkrev >= minlink

2978

"""truncate the revlog on the first revision with a linkrev >= minlink

2978

2979

This function is called when we're stripping revision minlink and

2980

This function is called when we're stripping revision minlink and

2980

its descendants from the repository.

2981

its descendants from the repository.

2981

2982

We have to remove all revisions with linkrev >= minlink, because

2983

We have to remove all revisions with linkrev >= minlink, because

2983

the equivalent changelog revisions will be renumbered after the

2984

the equivalent changelog revisions will be renumbered after the

2984

strip.

2985

strip.

2985

2986

So we truncate the revlog on the first of these revisions, and

2987

So we truncate the revlog on the first of these revisions, and

2987

trust that the caller has saved the revisions that shouldn't be

2988

trust that the caller has saved the revisions that shouldn't be

2988

removed and that it'll re-add them after this truncation.

2989

removed and that it'll re-add them after this truncation.

2989

"""

2990

"""

2990

if len(self) == 0:

2991

if len(self) == 0:

2991

return

2992

return

2992

2993

rev, _ = self.getstrippoint(minlink)

2994

rev, _ = self.getstrippoint(minlink)

2994

if rev == len(self):

2995

if rev == len(self):

2995

return

2996

return

2996

2997

# first truncate the files on disk

2998

# first truncate the files on disk

2998

data_end = self.start(rev)

2999

data_end = self.start(rev)

2999

if not self._inline:

3000

if not self._inline:

3000

transaction.add(self._datafile, data_end)

3001

transaction.add(self._datafile, data_end)

3001

end = rev * self.index.entry_size

3002

end = rev * self.index.entry_size

3002

else:

3003

else:

3003

end = data_end + (rev * self.index.entry_size)

3004

end = data_end + (rev * self.index.entry_size)

3004

3005

if self._sidedatafile:

3006

if self._sidedatafile:

3006

sidedata_end = self.sidedata_cut_off(rev)

3007

sidedata_end = self.sidedata_cut_off(rev)

3007

transaction.add(self._sidedatafile, sidedata_end)

3008

transaction.add(self._sidedatafile, sidedata_end)

3008

3009

transaction.add(self._indexfile, end)

3010

transaction.add(self._indexfile, end)

3010

if self._docket is not None:

3011

if self._docket is not None:

3011

# XXX we could, leverage the docket while stripping. However it is

3012

# XXX we could, leverage the docket while stripping. However it is

3012

# not powerfull enough at the time of this comment

3013

# not powerfull enough at the time of this comment

3013

self._docket.index_end = end

3014

self._docket.index_end = end

3014

self._docket.data_end = data_end

3015

self._docket.data_end = data_end

3015

self._docket.sidedata_end = sidedata_end

3016

self._docket.sidedata_end = sidedata_end

3016

self._docket.write(transaction, stripping=True)

3017

self._docket.write(transaction, stripping=True)

3017

3018

# then reset internal state in memory to forget those revisions

3019

# then reset internal state in memory to forget those revisions

3019

self._revisioncache = None

3020

self._revisioncache = None

3020

self._chaininfocache = util.lrucachedict(500)

3021

self._chaininfocache = util.lrucachedict(500)

3021

self._segmentfile.clear_cache()

3022

self._segmentfile.clear_cache()

3022

self._segmentfile_sidedata.clear_cache()

3023

self._segmentfile_sidedata.clear_cache()

3023

3024

del self.index[rev:-1]

3025

del self.index[rev:-1]

3025

3026

def checksize(self):

3027

def checksize(self):

3027

"""Check size of index and data files

3028

"""Check size of index and data files

3028

3029

return a (dd, di) tuple.

3030

return a (dd, di) tuple.

3030

- dd: extra bytes for the "data" file

3031

- dd: extra bytes for the "data" file

3031

- di: extra bytes for the "index" file

3032

- di: extra bytes for the "index" file

3032

3033

A healthy revlog will return (0, 0).

3034

A healthy revlog will return (0, 0).

3034

"""

3035

"""

3035

expected = 0

3036

expected = 0

3036

if len(self):

3037

if len(self):

3037

expected = max(0, self.end(len(self) - 1))

3038

expected = max(0, self.end(len(self) - 1))

3038

3039

try:

3040

try:

3040

with self._datafp() as f:

3041

with self._datafp() as f:

3041

f.seek(0, io.SEEK_END)

3042

f.seek(0, io.SEEK_END)

3042

actual = f.tell()

3043

actual = f.tell()

3043

dd = actual - expected

3044

dd = actual - expected

3044

except FileNotFoundError:

3045

except FileNotFoundError:

3045

dd = 0

3046

dd = 0

3046

3047

try:

3048

try:

3048

f = self.opener(self._indexfile)

3049

f = self.opener(self._indexfile)

3049

f.seek(0, io.SEEK_END)

3050

f.seek(0, io.SEEK_END)

3050

actual = f.tell()

3051

actual = f.tell()

3051

f.close()

3052

f.close()

3052

s = self.index.entry_size

3053

s = self.index.entry_size

3053

i = max(0, actual // s)

3054

i = max(0, actual // s)

3054

di = actual - (i * s)

3055

di = actual - (i * s)

3055

if self._inline:

3056

if self._inline:

3056

databytes = 0

3057

databytes = 0

3057

for r in self:

3058

for r in self:

3058

databytes += max(0, self.length(r))

3059

databytes += max(0, self.length(r))

3059

dd = 0

3060

dd = 0

3060

di = actual - len(self) * s - databytes

3061

di = actual - len(self) * s - databytes

3061

except FileNotFoundError:

3062

except FileNotFoundError:

3062

di = 0

3063

di = 0

3063

3064

return (dd, di)

3065

return (dd, di)

3065

3066

def files(self):

3067

def files(self):

3067

res = [self._indexfile]

3068

res = [self._indexfile]

3068

if self._docket_file is None:

3069

if self._docket_file is None:

3069

if not self._inline:

3070

if not self._inline:

3070

res.append(self._datafile)

3071

res.append(self._datafile)

3071

else:

3072

else:

3072

res.append(self._docket_file)

3073

res.append(self._docket_file)

3073

res.extend(self._docket.old_index_filepaths(include_empty=False))

3074

res.extend(self._docket.old_index_filepaths(include_empty=False))

3074

if self._docket.data_end:

3075

if self._docket.data_end:

3075

res.append(self._datafile)

3076

res.append(self._datafile)

3076

res.extend(self._docket.old_data_filepaths(include_empty=False))

3077

res.extend(self._docket.old_data_filepaths(include_empty=False))

3077

if self._docket.sidedata_end:

3078

if self._docket.sidedata_end:

3078

res.append(self._sidedatafile)

3079

res.append(self._sidedatafile)

3079

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

3080

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

3080

return res

3081

return res

3081

3082

def emitrevisions(

3083

def emitrevisions(

3083

self,

3084

self,

3084

nodes,

3085

nodes,

3085

nodesorder=None,

3086

nodesorder=None,

3086

revisiondata=False,

3087

revisiondata=False,

3087

assumehaveparentrevisions=False,

3088

assumehaveparentrevisions=False,

3088

deltamode=repository.CG_DELTAMODE_STD,

3089

deltamode=repository.CG_DELTAMODE_STD,

3089

sidedata_helpers=None,

3090

sidedata_helpers=None,

3090

debug_info=None,

3091

debug_info=None,

3091

):

3092

):

3092

if nodesorder not in (b'nodes', b'storage', b'linear', None):

3093

if nodesorder not in (b'nodes', b'storage', b'linear', None):

3093

raise error.ProgrammingError(

3094

raise error.ProgrammingError(

3094

b'unhandled value for nodesorder: %s' % nodesorder

3095

b'unhandled value for nodesorder: %s' % nodesorder

3095

)

3096

)

3096

3097

if nodesorder is None and not self._generaldelta:

3098

if nodesorder is None and not self._generaldelta:

3098

nodesorder = b'storage'

3099

nodesorder = b'storage'

3099

3100

if (

3101

if (

3101

not self._storedeltachains

3102

not self._storedeltachains

3102

and deltamode != repository.CG_DELTAMODE_PREV

3103

and deltamode != repository.CG_DELTAMODE_PREV

3103

):

3104

):

3104

deltamode = repository.CG_DELTAMODE_FULL

3105

deltamode = repository.CG_DELTAMODE_FULL

3105

3106

return storageutil.emitrevisions(

3107

return storageutil.emitrevisions(

3107

self,

3108

self,

3108

nodes,

3109

nodes,

3109

nodesorder,

3110

nodesorder,

3110

revlogrevisiondelta,

3111

revlogrevisiondelta,

3111

deltaparentfn=self.deltaparent,

3112

deltaparentfn=self.deltaparent,

3112

candeltafn=self._candelta,

3113

candeltafn=self._candelta,

3113

rawsizefn=self.rawsize,

3114

rawsizefn=self.rawsize,

3114

revdifffn=self.revdiff,

3115

revdifffn=self.revdiff,

3115

flagsfn=self.flags,

3116

flagsfn=self.flags,

3116

deltamode=deltamode,

3117

deltamode=deltamode,

3117

revisiondata=revisiondata,

3118

revisiondata=revisiondata,

3118

assumehaveparentrevisions=assumehaveparentrevisions,

3119

assumehaveparentrevisions=assumehaveparentrevisions,

3119

sidedata_helpers=sidedata_helpers,

3120

sidedata_helpers=sidedata_helpers,

3120

debug_info=debug_info,

3121

debug_info=debug_info,

3121

)

3122

)

3122

3123

DELTAREUSEALWAYS = b'always'

3124

DELTAREUSEALWAYS = b'always'

3124

DELTAREUSESAMEREVS = b'samerevs'

3125

DELTAREUSESAMEREVS = b'samerevs'

3125

DELTAREUSENEVER = b'never'

3126

DELTAREUSENEVER = b'never'

3126

3127

DELTAREUSEFULLADD = b'fulladd'

3128

DELTAREUSEFULLADD = b'fulladd'

3128

3129

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

3130

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

3130

3131

def clone(

3132

def clone(

3132

self,

3133

self,

3133

tr,

3134

tr,

3134

destrevlog,

3135

destrevlog,

3135

addrevisioncb=None,

3136

addrevisioncb=None,

3136

deltareuse=DELTAREUSESAMEREVS,

3137

deltareuse=DELTAREUSESAMEREVS,

3137

forcedeltabothparents=None,

3138

forcedeltabothparents=None,

3138

sidedata_helpers=None,

3139

sidedata_helpers=None,

3139

):

3140

):

3140

"""Copy this revlog to another, possibly with format changes.

3141

"""Copy this revlog to another, possibly with format changes.

3141

3142

The destination revlog will contain the same revisions and nodes.

3143

The destination revlog will contain the same revisions and nodes.

3143

However, it may not be bit-for-bit identical due to e.g. delta encoding

3144

However, it may not be bit-for-bit identical due to e.g. delta encoding

3144

differences.

3145

differences.

3145

3146

The ``deltareuse`` argument control how deltas from the existing revlog

3147

The ``deltareuse`` argument control how deltas from the existing revlog

3147

are preserved in the destination revlog. The argument can have the

3148

are preserved in the destination revlog. The argument can have the

3148

following values:

3149

following values:

3149

3150

DELTAREUSEALWAYS

3151

DELTAREUSEALWAYS

3151

Deltas will always be reused (if possible), even if the destination

3152

Deltas will always be reused (if possible), even if the destination

3152

revlog would not select the same revisions for the delta. This is the

3153

revlog would not select the same revisions for the delta. This is the

3153

fastest mode of operation.

3154

fastest mode of operation.

3154

DELTAREUSESAMEREVS

3155

DELTAREUSESAMEREVS

3155

Deltas will be reused if the destination revlog would pick the same

3156

Deltas will be reused if the destination revlog would pick the same

3156

revisions for the delta. This mode strikes a balance between speed

3157

revisions for the delta. This mode strikes a balance between speed

3157

and optimization.

3158

and optimization.

3158

DELTAREUSENEVER

3159

DELTAREUSENEVER

3159

Deltas will never be reused. This is the slowest mode of execution.

3160

Deltas will never be reused. This is the slowest mode of execution.

3160

This mode can be used to recompute deltas (e.g. if the diff/delta

3161

This mode can be used to recompute deltas (e.g. if the diff/delta

3161

algorithm changes).

3162

algorithm changes).

3162

DELTAREUSEFULLADD

3163

DELTAREUSEFULLADD

3163

Revision will be re-added as if their were new content. This is

3164

Revision will be re-added as if their were new content. This is

3164

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3165

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3165

eg: large file detection and handling.

3166

eg: large file detection and handling.

3166

3167

Delta computation can be slow, so the choice of delta reuse policy can

3168

Delta computation can be slow, so the choice of delta reuse policy can

3168

significantly affect run time.

3169

significantly affect run time.

3169

3170

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3171

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3171

two extremes. Deltas will be reused if they are appropriate. But if the

3172

two extremes. Deltas will be reused if they are appropriate. But if the

3172

delta could choose a better revision, it will do so. This means if you

3173

delta could choose a better revision, it will do so. This means if you

3173

are converting a non-generaldelta revlog to a generaldelta revlog,

3174

are converting a non-generaldelta revlog to a generaldelta revlog,

3174

deltas will be recomputed if the delta's parent isn't a parent of the

3175

deltas will be recomputed if the delta's parent isn't a parent of the

3175

revision.

3176

revision.

3176

3177

In addition to the delta policy, the ``forcedeltabothparents``

3178

In addition to the delta policy, the ``forcedeltabothparents``

3178

argument controls whether to force compute deltas against both parents

3179

argument controls whether to force compute deltas against both parents

3179

for merges. By default, the current default is used.

3180

for merges. By default, the current default is used.

3180

3181

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3182

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3182

`sidedata_helpers`.

3183

`sidedata_helpers`.

3183

"""

3184

"""

3184

if deltareuse not in self.DELTAREUSEALL:

3185

if deltareuse not in self.DELTAREUSEALL:

3185

raise ValueError(

3186

raise ValueError(

3186

_(b'value for deltareuse invalid: %s') % deltareuse

3187

_(b'value for deltareuse invalid: %s') % deltareuse

3187

)

3188

)

3188

3189

if len(destrevlog):

3190

if len(destrevlog):

3190

raise ValueError(_(b'destination revlog is not empty'))

3191

raise ValueError(_(b'destination revlog is not empty'))

3191

3192

if getattr(self, 'filteredrevs', None):

3193

if getattr(self, 'filteredrevs', None):

3193

raise ValueError(_(b'source revlog has filtered revisions'))

3194

raise ValueError(_(b'source revlog has filtered revisions'))

3194

if getattr(destrevlog, 'filteredrevs', None):

3195

if getattr(destrevlog, 'filteredrevs', None):

3195

raise ValueError(_(b'destination revlog has filtered revisions'))

3196

raise ValueError(_(b'destination revlog has filtered revisions'))

3196

3197

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3198

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3198

# if possible.

3199

# if possible.

3199

oldlazydelta = destrevlog._lazydelta

3200

oldlazydelta = destrevlog._lazydelta

3200

oldlazydeltabase = destrevlog._lazydeltabase

3201

oldlazydeltabase = destrevlog._lazydeltabase

3201

oldamd = destrevlog._deltabothparents

3202

oldamd = destrevlog._deltabothparents

3202

3203

try:

3204

try:

3204

if deltareuse == self.DELTAREUSEALWAYS:

3205

if deltareuse == self.DELTAREUSEALWAYS:

3205

destrevlog._lazydeltabase = True

3206

destrevlog._lazydeltabase = True

3206

destrevlog._lazydelta = True

3207

destrevlog._lazydelta = True

3207

elif deltareuse == self.DELTAREUSESAMEREVS:

3208

elif deltareuse == self.DELTAREUSESAMEREVS:

3208

destrevlog._lazydeltabase = False

3209

destrevlog._lazydeltabase = False

3209

destrevlog._lazydelta = True

3210

destrevlog._lazydelta = True

3210

elif deltareuse == self.DELTAREUSENEVER:

3211

elif deltareuse == self.DELTAREUSENEVER:

3211

destrevlog._lazydeltabase = False

3212

destrevlog._lazydeltabase = False

3212

destrevlog._lazydelta = False

3213

destrevlog._lazydelta = False

3213

3214

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3215

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3215

3216

self._clone(

3217

self._clone(

3217

tr,

3218

tr,

3218

destrevlog,

3219

destrevlog,

3219

addrevisioncb,

3220

addrevisioncb,

3220

deltareuse,

3221

deltareuse,

3221

forcedeltabothparents,

3222

forcedeltabothparents,

3222

sidedata_helpers,

3223

sidedata_helpers,

3223

)

3224

)

3224

3225

finally:

3226

finally:

3226

destrevlog._lazydelta = oldlazydelta

3227

destrevlog._lazydelta = oldlazydelta

3227

destrevlog._lazydeltabase = oldlazydeltabase

3228

destrevlog._lazydeltabase = oldlazydeltabase

3228

destrevlog._deltabothparents = oldamd

3229

destrevlog._deltabothparents = oldamd

3229

3230

def _clone(

3231

def _clone(

3231

self,

3232

self,

3232

tr,

3233

tr,

3233

destrevlog,

3234

destrevlog,

3234

addrevisioncb,

3235

addrevisioncb,

3235

deltareuse,

3236

deltareuse,

3236

forcedeltabothparents,

3237

forcedeltabothparents,

3237

sidedata_helpers,

3238

sidedata_helpers,

3238

):

3239

):

3239

"""perform the core duty of `revlog.clone` after parameter processing"""

3240

"""perform the core duty of `revlog.clone` after parameter processing"""

3240

write_debug = None

3241

write_debug = None

3241

if self._debug_delta:

3242

if self._debug_delta:

3242

write_debug = tr._report

3243

write_debug = tr._report

3243

deltacomputer = deltautil.deltacomputer(

3244

deltacomputer = deltautil.deltacomputer(

3244

destrevlog,

3245

destrevlog,

3245

write_debug=write_debug,

3246

write_debug=write_debug,

3246

)

3247

)

3247

index = self.index

3248

index = self.index

3248

for rev in self:

3249

for rev in self:

3249

entry = index[rev]

3250

entry = index[rev]

3250

3251

# Some classes override linkrev to take filtered revs into

3252

# Some classes override linkrev to take filtered revs into

3252

# account. Use raw entry from index.

3253

# account. Use raw entry from index.

3253

flags = entry[0] & 0xFFFF

3254

flags = entry[0] & 0xFFFF

3254

linkrev = entry[4]

3255

linkrev = entry[4]

3255

p1 = index[entry[5]][7]

3256

p1 = index[entry[5]][7]

3256

p2 = index[entry[6]][7]

3257

p2 = index[entry[6]][7]

3257

node = entry[7]

3258

node = entry[7]

3258

3259

# (Possibly) reuse the delta from the revlog if allowed and

3260

# (Possibly) reuse the delta from the revlog if allowed and

3260

# the revlog chunk is a delta.

3261

# the revlog chunk is a delta.

3261

cachedelta = None

3262

cachedelta = None

3262

rawtext = None

3263

rawtext = None

3263

if deltareuse == self.DELTAREUSEFULLADD:

3264

if deltareuse == self.DELTAREUSEFULLADD:

3264

text = self._revisiondata(rev)

3265

text = self._revisiondata(rev)

3265

sidedata = self.sidedata(rev)

3266

sidedata = self.sidedata(rev)

3266

3267

if sidedata_helpers is not None:

3268

if sidedata_helpers is not None:

3268

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3269

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3269

self, sidedata_helpers, sidedata, rev

3270

self, sidedata_helpers, sidedata, rev

3270

)

3271

)

3271

flags = flags | new_flags[0] & ~new_flags[1]

3272

flags = flags | new_flags[0] & ~new_flags[1]

3272

3273

destrevlog.addrevision(

3274

destrevlog.addrevision(

3274

text,

3275

text,

3275

tr,

3276

tr,

3276

linkrev,

3277

linkrev,

3277

p1,

3278

p1,

3278

p2,

3279

p2,

3279

cachedelta=cachedelta,

3280

cachedelta=cachedelta,

3280

node=node,

3281

node=node,

3281

flags=flags,

3282

flags=flags,

3282

deltacomputer=deltacomputer,

3283

deltacomputer=deltacomputer,

3283

sidedata=sidedata,

3284

sidedata=sidedata,

3284

)

3285

)

3285

else:

3286

else:

3286

if destrevlog._lazydelta:

3287

if destrevlog._lazydelta:

3287

dp = self.deltaparent(rev)

3288

dp = self.deltaparent(rev)

3288

if dp != nullrev:

3289

if dp != nullrev:

3289

cachedelta = (dp, bytes(self._chunk(rev)))

3290

cachedelta = (dp, bytes(self._chunk(rev)))

3290

3291

sidedata = None

3292

sidedata = None

3292

if not cachedelta:

3293

if not cachedelta:

3293

rawtext = self._revisiondata(rev)

3294

rawtext = self._revisiondata(rev)

3294

sidedata = self.sidedata(rev)

3295

sidedata = self.sidedata(rev)

3295

if sidedata is None:

3296

if sidedata is None:

3296

sidedata = self.sidedata(rev)

3297

sidedata = self.sidedata(rev)

3297

3298

if sidedata_helpers is not None:

3299

if sidedata_helpers is not None:

3299

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3300

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3300

self, sidedata_helpers, sidedata, rev

3301

self, sidedata_helpers, sidedata, rev

3301

)

3302

)

3302

flags = flags | new_flags[0] & ~new_flags[1]

3303

flags = flags | new_flags[0] & ~new_flags[1]

3303

3304

with destrevlog._writing(tr):

3305

with destrevlog._writing(tr):

3305

destrevlog._addrevision(

3306

destrevlog._addrevision(

3306

node,

3307

node,

3307

rawtext,

3308

rawtext,

3308

tr,

3309

tr,

3309

linkrev,

3310

linkrev,

3310

p1,

3311

p1,

3311

p2,

3312

p2,

3312

flags,

3313

flags,

3313

cachedelta,

3314

cachedelta,

3314

deltacomputer=deltacomputer,

3315

deltacomputer=deltacomputer,

3315

sidedata=sidedata,

3316

sidedata=sidedata,

3316

)

3317

)

3317

3318

if addrevisioncb:

3319

if addrevisioncb:

3319

addrevisioncb(self, rev, node)

3320

addrevisioncb(self, rev, node)

3320

3321

def censorrevision(self, tr, censornode, tombstone=b''):

3322

def censorrevision(self, tr, censornode, tombstone=b''):

3322

if self._format_version == REVLOGV0:

3323

if self._format_version == REVLOGV0:

3323

raise error.RevlogError(

3324

raise error.RevlogError(

3324

_(b'cannot censor with version %d revlogs')

3325

_(b'cannot censor with version %d revlogs')

3325

% self._format_version

3326

% self._format_version

3326

)

3327

)

3327

elif self._format_version == REVLOGV1:

3328

elif self._format_version == REVLOGV1:

3328

rewrite.v1_censor(self, tr, censornode, tombstone)

3329

rewrite.v1_censor(self, tr, censornode, tombstone)

3329

else:

3330

else:

3330

rewrite.v2_censor(self, tr, censornode, tombstone)

3331

rewrite.v2_censor(self, tr, censornode, tombstone)

3331

3332

def verifyintegrity(self, state):

3333

def verifyintegrity(self, state):

3333

"""Verifies the integrity of the revlog.

3334

"""Verifies the integrity of the revlog.

3334

3335

Yields ``revlogproblem`` instances describing problems that are

3336

Yields ``revlogproblem`` instances describing problems that are

3336

found.

3337

found.

3337

"""

3338

"""

3338

dd, di = self.checksize()

3339

dd, di = self.checksize()

3339

if dd:

3340

if dd:

3340

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3341

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3341

if di:

3342

if di:

3342

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3343

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3343

3344

version = self._format_version

3345

version = self._format_version

3345

3346

# The verifier tells us what version revlog we should be.

3347

# The verifier tells us what version revlog we should be.

3347

if version != state[b'expectedversion']:

3348

if version != state[b'expectedversion']:

3348

yield revlogproblem(

3349

yield revlogproblem(

3349

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3350

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3350

% (self.display_id, version, state[b'expectedversion'])

3351

% (self.display_id, version, state[b'expectedversion'])

3351

)

3352

)

3352

3353

state[b'skipread'] = set()

3354

state[b'skipread'] = set()

3354

state[b'safe_renamed'] = set()

3355

state[b'safe_renamed'] = set()

3355

3356

for rev in self:

3357

for rev in self:

3357

node = self.node(rev)

3358

node = self.node(rev)

3358

3359

# Verify contents. 4 cases to care about:

3360

# Verify contents. 4 cases to care about:

3360

#

3361

#

3361

# common: the most common case

3362

# common: the most common case

3362

# rename: with a rename

3363

# rename: with a rename

3363

# meta: file content starts with b'\1\n', the metadata

3364

# meta: file content starts with b'\1\n', the metadata

3364

# header defined in filelog.py, but without a rename

3365

# header defined in filelog.py, but without a rename

3365

# ext: content stored externally

3366

# ext: content stored externally

3366

#

3367

#

3367

# More formally, their differences are shown below:

3368

# More formally, their differences are shown below:

3368

#

3369

#

3369

# | common | rename | meta | ext

3370

# | common | rename | meta | ext

3370

# -------------------------------------------------------

3371

# -------------------------------------------------------

3371

# flags() | 0 | 0 | 0 | not 0

3372

# flags() | 0 | 0 | 0 | not 0

3372

# renamed() | False | True | False | ?

3373

# renamed() | False | True | False | ?

3373

# rawtext[0:2]=='\1\n'| False | True | True | ?

3374

# rawtext[0:2]=='\1\n'| False | True | True | ?

3374

#

3375

#

3375

# "rawtext" means the raw text stored in revlog data, which

3376

# "rawtext" means the raw text stored in revlog data, which

3376

# could be retrieved by "rawdata(rev)". "text"

3377

# could be retrieved by "rawdata(rev)". "text"

3377

# mentioned below is "revision(rev)".

3378

# mentioned below is "revision(rev)".

3378

#

3379

#

3379

# There are 3 different lengths stored physically:

3380

# There are 3 different lengths stored physically:

3380

# 1. L1: rawsize, stored in revlog index

3381

# 1. L1: rawsize, stored in revlog index

3381

# 2. L2: len(rawtext), stored in revlog data

3382

# 2. L2: len(rawtext), stored in revlog data

3382

# 3. L3: len(text), stored in revlog data if flags==0, or

3383

# 3. L3: len(text), stored in revlog data if flags==0, or

3383

# possibly somewhere else if flags!=0

3384

# possibly somewhere else if flags!=0

3384

#

3385

#

3385

# L1 should be equal to L2. L3 could be different from them.

3386

# L1 should be equal to L2. L3 could be different from them.

3386

# "text" may or may not affect commit hash depending on flag

3387

# "text" may or may not affect commit hash depending on flag

3387

# processors (see flagutil.addflagprocessor).

3388

# processors (see flagutil.addflagprocessor).

3388

#

3389

#

3389

# | common | rename | meta | ext

3390

# | common | rename | meta | ext

3390

# -------------------------------------------------

3391

# -------------------------------------------------

3391

# rawsize() | L1 | L1 | L1 | L1

3392

# rawsize() | L1 | L1 | L1 | L1

3392

# size() | L1 | L2-LM | L1(*) | L1 (?)

3393

# size() | L1 | L2-LM | L1(*) | L1 (?)

3393

# len(rawtext) | L2 | L2 | L2 | L2

3394

# len(rawtext) | L2 | L2 | L2 | L2

3394

# len(text) | L2 | L2 | L2 | L3

3395

# len(text) | L2 | L2 | L2 | L3

3395

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3396

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3396

#

3397

#

3397

# LM: length of metadata, depending on rawtext

3398

# LM: length of metadata, depending on rawtext

3398

# (*): not ideal, see comment in filelog.size

3399

# (*): not ideal, see comment in filelog.size

3399

# (?): could be "- len(meta)" if the resolved content has

3400

# (?): could be "- len(meta)" if the resolved content has

3400

# rename metadata

3401

# rename metadata

3401

#

3402

#

3402

# Checks needed to be done:

3403

# Checks needed to be done:

3403

# 1. length check: L1 == L2, in all cases.

3404

# 1. length check: L1 == L2, in all cases.

3404

# 2. hash check: depending on flag processor, we may need to

3405

# 2. hash check: depending on flag processor, we may need to

3405

# use either "text" (external), or "rawtext" (in revlog).

3406

# use either "text" (external), or "rawtext" (in revlog).

3406

3407

try:

3408

try:

3408

skipflags = state.get(b'skipflags', 0)

3409

skipflags = state.get(b'skipflags', 0)

3409

if skipflags:

3410

if skipflags:

3410

skipflags &= self.flags(rev)

3411

skipflags &= self.flags(rev)

3411

3412

_verify_revision(self, skipflags, state, node)

3413

_verify_revision(self, skipflags, state, node)

3413

3414

l1 = self.rawsize(rev)

3415

l1 = self.rawsize(rev)

3415

l2 = len(self.rawdata(node))

3416

l2 = len(self.rawdata(node))

3416

3417

if l1 != l2:

3418

if l1 != l2:

3418

yield revlogproblem(

3419

yield revlogproblem(

3419

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3420

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3420

node=node,

3421

node=node,

3421

)

3422

)

3422

3423

except error.CensoredNodeError:

3424

except error.CensoredNodeError:

3424

if state[b'erroroncensored']:

3425

if state[b'erroroncensored']:

3425

yield revlogproblem(

3426

yield revlogproblem(

3426

error=_(b'censored file data'), node=node

3427

error=_(b'censored file data'), node=node

3427

)

3428

)

3428

state[b'skipread'].add(node)

3429

state[b'skipread'].add(node)

3429

except Exception as e:

3430

except Exception as e:

3430

yield revlogproblem(

3431

yield revlogproblem(

3431

error=_(b'unpacking %s: %s')

3432

error=_(b'unpacking %s: %s')

3432

% (short(node), stringutil.forcebytestr(e)),

3433

% (short(node), stringutil.forcebytestr(e)),

3433

node=node,

3434

node=node,

3434

)

3435

)

3435

state[b'skipread'].add(node)

3436

state[b'skipread'].add(node)

3436

3437

def storageinfo(

3438

def storageinfo(

3438

self,

3439

self,

3439

exclusivefiles=False,

3440

exclusivefiles=False,

3440

sharedfiles=False,

3441

sharedfiles=False,

3441

revisionscount=False,

3442

revisionscount=False,

3442

trackedsize=False,

3443

trackedsize=False,

3443

storedsize=False,

3444

storedsize=False,

3444

):

3445

):

3445

d = {}

3446

d = {}

3446

3447

if exclusivefiles:

3448

if exclusivefiles:

3448

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3449

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3449

if not self._inline:

3450

if not self._inline:

3450

d[b'exclusivefiles'].append((self.opener, self._datafile))

3451

d[b'exclusivefiles'].append((self.opener, self._datafile))

3451

3452

if sharedfiles:

3453

if sharedfiles:

3453

d[b'sharedfiles'] = []

3454

d[b'sharedfiles'] = []

3454

3455

if revisionscount:

3456

if revisionscount:

3456

d[b'revisionscount'] = len(self)

3457

d[b'revisionscount'] = len(self)

3457

3458

if trackedsize:

3459

if trackedsize:

3459

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3460

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3460

3461

if storedsize:

3462

if storedsize:

3462

d[b'storedsize'] = sum(

3463

d[b'storedsize'] = sum(

3463

self.opener.stat(path).st_size for path in self.files()

3464

self.opener.stat(path).st_size for path in self.files()

3464

)

3465

)

3465

3466

return d

3467

return d

3467

3468

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3469

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3469

if not self.hassidedata:

3470

if not self.hassidedata:

3470

return

3471

return

3471

# revlog formats with sidedata support does not support inline

3472

# revlog formats with sidedata support does not support inline

3472

assert not self._inline

3473

assert not self._inline

3473

if not helpers[1] and not helpers[2]:

3474

if not helpers[1] and not helpers[2]:

3474

# Nothing to generate or remove

3475

# Nothing to generate or remove

3475

return

3476

return

3476

3477

new_entries = []

3478

new_entries = []

3478

# append the new sidedata

3479

# append the new sidedata

3479

with self._writing(transaction):

3480

with self._writing(transaction):

3480

ifh, dfh, sdfh = self._writinghandles

3481

ifh, dfh, sdfh = self._writinghandles

3481

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3482

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3482

3483

current_offset = sdfh.tell()

3484

current_offset = sdfh.tell()

3484

for rev in range(startrev, endrev + 1):

3485

for rev in range(startrev, endrev + 1):

3485

entry = self.index[rev]

3486

entry = self.index[rev]

3486

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3487

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3487

store=self,

3488

store=self,

3488

sidedata_helpers=helpers,

3489

sidedata_helpers=helpers,

3489

sidedata={},

3490

sidedata={},

3490

rev=rev,

3491

rev=rev,

3491

)

3492

)

3492

3493

serialized_sidedata = sidedatautil.serialize_sidedata(

3494

serialized_sidedata = sidedatautil.serialize_sidedata(

3494

new_sidedata

3495

new_sidedata

3495

)

3496

)

3496

3497

sidedata_compression_mode = COMP_MODE_INLINE

3498

sidedata_compression_mode = COMP_MODE_INLINE

3498

if serialized_sidedata and self.hassidedata:

3499

if serialized_sidedata and self.hassidedata:

3499

sidedata_compression_mode = COMP_MODE_PLAIN

3500

sidedata_compression_mode = COMP_MODE_PLAIN

3500

h, comp_sidedata = self.compress(serialized_sidedata)

3501

h, comp_sidedata = self.compress(serialized_sidedata)

3501

if (

3502

if (

3502

h != b'u'

3503

h != b'u'

3503

and comp_sidedata[0] != b'\0'

3504

and comp_sidedata[0] != b'\0'

3504

and len(comp_sidedata) < len(serialized_sidedata)

3505

and len(comp_sidedata) < len(serialized_sidedata)

3505

):

3506

):

3506

assert not h

3507

assert not h

3507

if (

3508

if (

3508

comp_sidedata[0]

3509

comp_sidedata[0]

3509

== self._docket.default_compression_header

3510

== self._docket.default_compression_header

3510

):

3511

):

3511

sidedata_compression_mode = COMP_MODE_DEFAULT

3512

sidedata_compression_mode = COMP_MODE_DEFAULT

3512

serialized_sidedata = comp_sidedata

3513

serialized_sidedata = comp_sidedata

3513

else:

3514

else:

3514

sidedata_compression_mode = COMP_MODE_INLINE

3515

sidedata_compression_mode = COMP_MODE_INLINE

3515

serialized_sidedata = comp_sidedata

3516

serialized_sidedata = comp_sidedata

3516

if entry[8] != 0 or entry[9] != 0:

3517

if entry[8] != 0 or entry[9] != 0:

3517

# rewriting entries that already have sidedata is not

3518

# rewriting entries that already have sidedata is not

3518

# supported yet, because it introduces garbage data in the

3519

# supported yet, because it introduces garbage data in the

3519

# revlog.

3520

# revlog.

3520

msg = b"rewriting existing sidedata is not supported yet"

3521

msg = b"rewriting existing sidedata is not supported yet"

3521

raise error.Abort(msg)

3522

raise error.Abort(msg)

3522

3523

# Apply (potential) flags to add and to remove after running

3524

# Apply (potential) flags to add and to remove after running

3524

# the sidedata helpers

3525

# the sidedata helpers

3525

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3526

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3526

entry_update = (

3527

entry_update = (

3527

current_offset,

3528

current_offset,

3528

len(serialized_sidedata),

3529

len(serialized_sidedata),

3529

new_offset_flags,

3530

new_offset_flags,

3530

sidedata_compression_mode,

3531

sidedata_compression_mode,

3531

)

3532

)

3532

3533

# the sidedata computation might have move the file cursors around

3534

# the sidedata computation might have move the file cursors around

3534

sdfh.seek(current_offset, os.SEEK_SET)

3535

sdfh.seek(current_offset, os.SEEK_SET)

3535

sdfh.write(serialized_sidedata)

3536

sdfh.write(serialized_sidedata)

3536

new_entries.append(entry_update)

3537

new_entries.append(entry_update)

3537

current_offset += len(serialized_sidedata)

3538

current_offset += len(serialized_sidedata)

3538

self._docket.sidedata_end = sdfh.tell()

3539

self._docket.sidedata_end = sdfh.tell()

3539

3540

# rewrite the new index entries

3541

# rewrite the new index entries

3541

ifh.seek(startrev * self.index.entry_size)

3542

ifh.seek(startrev * self.index.entry_size)

3542

for i, e in enumerate(new_entries):

3543

for i, e in enumerate(new_entries):

3543

rev = startrev + i

3544

rev = startrev + i

3544

self.index.replace_sidedata_info(rev, *e)

3545

self.index.replace_sidedata_info(rev, *e)

3545

packed = self.index.entry_binary(rev)

3546

packed = self.index.entry_binary(rev)

3546

if rev == 0 and self._docket is None:

3547

if rev == 0 and self._docket is None:

3547

header = self._format_flags | self._format_version

3548

header = self._format_flags | self._format_version

3548

header = self.index.pack_header(header)

3549

header = self.index.pack_header(header)

3549

packed = header + packed

3550

packed = header + packed

3550

ifh.write(packed)

3551

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             # coding: utf8
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             import binascii
             import collections
             import contextlib
             import io
             import os
             import struct
             import weakref
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_NO,
                 DELTA_BASE_REUSE_TRY,
                 ENTRY_RANK,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 RANK_UNKNOWN,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 revlogutils,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 randomaccessfile,
                 revlogv0,
                 rewrite,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             CHANGELOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of inline data embedded into a revlog
             _maxinline = 131072
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
                 parsers, 'BaseIndexObject'
             )
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta:
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem:
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
                 return index, cache
             if hasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             FILE_TOO_SHORT_MSG = _(
                 b'cannot read from revlog %s;'
                 b'  expected %d bytes from offset %d, data size is %d'
             )
             hexdigits = b'0123456789abcdefABCDEF'
             class revlog:
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 See mercurial/revlogutils/contants.py for details about the content of an
                 index entry.
                 """
                 _flagserrorclass = error.RevlogError
                 @staticmethod
                 def is_inline_index(header_bytes):
                     """Determine if a revlog is inline from the initial bytes of the index"""
                     header = INDEX_HEADER.unpack(header_bytes)[0]
                     _format_flags = header & ~0xFFFF
                     _format_version = header & 0xFFFF
                     features = FEATURES_BY_VERSION[_format_version]
                     return features[b'inline'](_format_flags)
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                     try_split=False,
                     canonical_parent_order=True,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._sidedatafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self._try_split = try_split
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self._candidate_group_chunk_size = 0
                     self._debug_delta = False
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # other optionnals features
                     # might remove rank configuration once the computation has no impact
                     self._compute_rank = False
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 3-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                     # parent order is supposed to be semantically irrelevant, so we
                     # normally resort parents to ensure that the first parent is non-null,
                     # if there is a non-null parent at all.
                     # filelog abuses the parent order as flag to mark some instances of
                     # meta-encoded files, so allow it to disable this behavior.
                     self.canonical_parent_order = canonical_parent_order
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                         self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
                     if dps_cgds:
                         self._candidate_group_chunk_size = dps_cgds
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'debug-delta' in opts:
                         self._debug_delta = opts[b'debug-delta']
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in opts.get(b'flagprocessors', {}).items():
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except FileNotFoundError:
                         return b''
                 def get_streams(self, max_linkrev, force_inline=False):
                     """return a list of streams that represent this revlog
                     This is used by stream-clone to do bytes to bytes copies of a repository.
                     This streams data for all revisions that refer to a changelog revision up
                     to `max_linkrev`.
                     If `force_inline` is set, it enforces that the stream will represent an inline revlog.
                     It returns is a list of three-tuple:
                         [
                             (filename, bytes_stream, stream_size),
                             …
                         ]
                     """
                     n = len(self)
                     index = self.index
                     while n > 0:
                         linkrev = index[n - 1][4]
                         if linkrev < max_linkrev:
                             break
                         # note: this loop will rarely go through multiple iterations, since
                         # it only traverses commits created during the current streaming
                         # pull operation.
                         #
                         # If this become a problem, using a binary search should cap the
                         # runtime of this.
                         n = n - 1
                     if n == 0:
                         # no data to send
                         return []
                     index_size = n * index.entry_size
                     data_size = self.end(n - 1)
                     # XXX we might have been split (or stripped) since the object
                     # initialization, We need to close this race too, but having a way to
                     # pre-open the file we feed to the revlog and never closing them before
                     # we are done streaming.
                     if self._inline:
                         def get_stream():
                             with self._indexfp() as fp:
                                 yield None
                                 size = index_size + data_size
                                 if size <= 65536:
                                     yield fp.read(size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=size)
                         inline_stream = get_stream()
                         next(inline_stream)
                         return [
                             (self._indexfile, inline_stream, index_size + data_size),
                         ]
                     elif force_inline:
                         def get_stream():
                             with self._datafp() as fp_d:
                                 yield None
                                 for rev in range(n):
                                     idx = self.index.entry_binary(rev)
                                     if rev == 0 and self._docket is None:
                                         # re-inject the inline flag
                                         header = self._format_flags
                                         header |= self._format_version
                                         header |= FLAG_INLINE_DATA
                                         header = self.index.pack_header(header)
                                         idx = header + idx
                                     yield idx
                                     yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
                         inline_stream = get_stream()
                         next(inline_stream)
                         return [
                             (self._indexfile, inline_stream, index_size + data_size),
                         ]
                     else:
                         def get_index_stream():
                             with self._indexfp() as fp:
                                 yield None
                                 if index_size <= 65536:
                                     yield fp.read(index_size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=index_size)
                         def get_data_stream():
                             with self._datafp() as fp:
                                 yield None
                                 if data_size <= 65536:
                                     yield fp.read(data_size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=data_size)
                         index_stream = get_index_stream()
                         next(index_stream)
                         data_stream = get_data_stream()
                         next(data_stream)
                         return [
                             (self._datafile, data_stream, data_size),
                             (self._indexfile, index_stream, index_size),
                         ]
                 def _loadindex(self, docket=None):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     elif self._try_split and self.opener.exists(self._split_index_file):
                         entry_point = self._split_index_file
                     else:
                         entry_point = b'%s.i' % self.radix
                     if docket is not None:
                         self._docket = docket
                         self._docket_file = entry_point
                     else:
                         self._initempty = True
                         entry_data = self._get_data(entry_point, mmapindexthreshold)
                         if len(entry_data) > 0:
                             header = INDEX_HEADER.unpack(entry_data[:4])[0]
                             self._initempty = False
                         else:
                             header = new_header
                         self._format_flags = header & ~0xFFFF
                         self._format_version = header & 0xFFFF
                         supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                         if supported_flags is None:
                             msg = _(b'unknown version (%d) in revlog %s')
                             msg %= (self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         elif self._format_flags & ~supported_flags:
                             msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                             display_flag = self._format_flags >> 16
                             msg %= (display_flag, self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         features = FEATURES_BY_VERSION[self._format_version]
                         self._inline = features[b'inline'](self._format_flags)
                         self._generaldelta = features[b'generaldelta'](self._format_flags)
                         self.hassidedata = features[b'sidedata']
                         if not features[b'docket']:
                             self._indexfile = entry_point
                             index_data = entry_data
                         else:
                             self._docket_file = entry_point
                             if self._initempty:
                                 self._docket = docketutil.default_docket(self, header)
                             else:
                                 self._docket = docketutil.parse_docket(
                                     self, entry_data, use_pending=self._trypending
                                 )
                     if self._docket is not None:
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self._docket is not None:
                         self._datafile = self._docket.data_filepath()
                         self._sidedatafile = self._docket.sidedata_filepath()
                     elif self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and hasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index = index
                     self._segmentfile = randomaccessfile.randomaccessfile(
                         self.opener,
                         (self._indexfile if self._inline else self._datafile),
                         self._chunkcachesize,
                         chunkcache,
                     )
                     self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
                         self.opener,
                         self._sidedatafile,
                         self._chunkcachesize,
                     )
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 def get_revlog(self):
                     """simple function to mirror API of other not-really-revlog API"""
                     return self
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     if self.revlog_kind == KIND_FILELOG:
                         # Reference the file without the "data/" prefix, so it is familiar
                         # to the user.
                         return self.target[1]
                     else:
                         return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except FileNotFoundError:
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _sidedatareadfp(self):
                     """file object suitable to read sidedata"""
                     if self._writinghandles:
                         yield self._writinghandles[2]
                     else:
                         with self.opener(self._sidedatafile) as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(range(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def _candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     """update on disk cache
                     If a transaction is passed, the update may be delayed to transaction
                     commit."""
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
+                    """Clear in-memory caches"""
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and hasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def sidedata_cut_off(self, rev):
                     sd_cut_off = self.index[rev][8]
                     if sd_cut_off != 0:
                         return sd_cut_off
                     # This is some annoying dance, because entries without sidedata
                     # currently use 0 as their ofsset. (instead of previous-offset +
                     # previous-size)
                     #
                     # We should reconsider this sidedata → 0 sidata_offset policy.
                     # In the meantime, we need this.
                     while 0 <= rev:
                         e = self.index[rev]
                         if e[9] != 0:
                             return e[8] + e[9]
                         rev -= 1
                     return 0
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev))
                 def fast_rank(self, rev):
                     """Return the rank of a revision if already known, or None otherwise.
                     The rank of a revision is the size of the sub-graph it defines as a
                     head. Equivalently, the rank of a revision `r` is the size of the set
                     `ancestors(r)`, `r` included.
                     This method returns the rank retrieved from the revlog in constant
                     time. It makes no attempt at computing unknown values for versions of
                     the revlog which do not persist the rank.
                     """
                     rank = self.index[rev][ENTRY_RANK]
                     if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
                         return None
                     if rev == nullrev:
                         return 0  # convention
                     return rank
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if self.canonical_parent_order and entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if self.canonical_parent_order and d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset:
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.items() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (binascii.Error, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 ambiguous = True
                             else:
                                 return partial
                         elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             ambiguous = True
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key is not hex
                         pass
                     if ambiguous:
                         raise error.AmbiguousPrefixLookupError(
                             id, self.display_id, _(b'ambiguous identifier')
                         )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         # hex(node)[:...]
                         l = len(id) // 2 * 2  # grab an even number of digits
                         try:
                             # we're dropping the last digit, so let's check that it's hex,
                             # to avoid the expensive computation below if it's not
                             if len(id) % 2 > 0:
                                 if not (id[-1] in hexdigits):
                                     return None
                             prefix = bin(id[:l])
                         except binascii.Error:
                             pass
                         else:
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._segmentfile.read_chunk(start, length, df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = b'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif hasattr(self.index, 'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     while self.length(p1) == 0:
                         b = self.deltaparent(p1)
                         if b == p1:
                             break
                         p1 = b
                     p2 = entry[6]
                     while self.length(p2) == 0:
                         b = self.deltaparent(p2)
                         if b == p2:
                             break
                         p2 = b
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def revision(self, nodeorrev, _df=None):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df)
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                     else:
                         rev = self.rev(nodeorrev)
                     return self._sidedata(rev)
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b""
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     if self._docket.sidedata_end < sidedata_offset + sidedata_size:
                         filename = self._sidedatafile
                         end = self._docket.sidedata_end
                         offset = sidedata_offset
                         length = sidedata_size
                         m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
                         raise error.RevlogError(m)
                     comp_segment = self._segmentfile_sidedata.read_chunk(
                         sidedata_offset, sidedata_size
                     )
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 @property
                 def _split_index_file(self):
                     """the path where to expect the index of an ongoing splitting operation
                     The file will only exist if a splitting operation is in progress, but
                     it is always expected at the same location."""
                     parts = self.radix.split(b'/')
                     if len(parts) > 1:
                         # adds a '-s' prefix to the ``data/` or `meta/` base
                         head = parts[0] + b'-s'
                         mids = parts[1:-1]
                         tail = parts[-1] + b'.i'
                         pieces = [head] + mids + [tail]
                         return b'/'.join(pieces)
                     else:
                         # the revlog is stored at the root of the store (changelog or
                         # manifest), no risk of collision.
                         return self.radix + b'.i.s'
                 def _enforceinlinesize(self, tr, side_write=True):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     if troffset:
                         tr.addbackup(self._indexfile, for_offset=True)
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                         self._segmentfile.writing_handle = None
                         # No need to deal with sidedata writing handle as it is only
                         # relevant with revlog-v2 which is never inline, not reaching
                         # this code
                     if side_write:
                         old_index_file_path = self._indexfile
                         new_index_file_path = self._split_index_file
                         opener = self.opener
                         weak_self = weakref.ref(self)
                         # the "split" index replace the real index when the transaction is finalized
                         def finalize_callback(tr):
                             opener.rename(
                                 new_index_file_path,
                                 old_index_file_path,
                                 checkambig=True,
                             )
                             maybe_self = weak_self()
                             if maybe_self is not None:
                                 maybe_self._indexfile = old_index_file_path
                         def abort_callback(tr):
                             maybe_self = weak_self()
                             if maybe_self is not None:
                                 maybe_self._indexfile = old_index_file_path
                         tr.registertmp(new_index_file_path)
                         if self.target[1] is not None:
                             callback_id = b'000-revlog-split-%d-%s' % self.target
                         else:
                             callback_id = b'000-revlog-split-%d' % self.target[0]
                         tr.addfinalize(callback_id, finalize_callback)
                         tr.addabort(callback_id, abort_callback)
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                             new_dfh.flush()
                         if side_write:
                             self._indexfile = new_index_file_path
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # If we don't use side-write, the temp file replace the real
                             # index when we exit the context manager
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._segmentfile = randomaccessfile.randomaccessfile(
                             self.opener,
                             self._datafile,
                             self._chunkcachesize,
                         )
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh, None)
                             self._segmentfile.writing_handle = new_dfh
                             new_dfh = None
                             # No need to deal with sidedata writing handle as it is only
                             # relevant with revlog-v2 which is never inline, not reaching
                             # this code
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def reading(self):
                     """Context manager that keeps data and sidedata files open for reading"""
                     with self._segmentfile.reading():
                         with self._segmentfile_sidedata.reading():
                             yield
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         ifh = dfh = sdfh = None
                         try:
                             r = len(self)
                             # opening the data file.
                             dsize = 0
                             if r:
                                 dsize = self.end(r - 1)
                             dfh = None
                             if not self._inline:
                                 try:
                                     dfh = self._datafp(b"r+")
                                     if self._docket is None:
                                         dfh.seek(0, os.SEEK_END)
                                     else:
                                         dfh.seek(self._docket.data_end, os.SEEK_SET)
                                 except FileNotFoundError:
                                     dfh = self._datafp(b"w+")
                                 transaction.add(self._datafile, dsize)
                             if self._sidedatafile is not None:
                                 # revlog-v2 does not inline, help Pytype
                                 assert dfh is not None
                                 try:
                                     sdfh = self.opener(self._sidedatafile, mode=b"r+")
                                     dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                                 except FileNotFoundError:
                                     sdfh = self.opener(self._sidedatafile, mode=b"w+")
                                 transaction.add(
                                     self._sidedatafile, self._docket.sidedata_end
                                 )
                             # opening the index file.
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             # exposing all file handle for writing.
                             self._writinghandles = (ifh, dfh, sdfh)
                             self._segmentfile.writing_handle = ifh if self._inline else dfh
                             self._segmentfile_sidedata.writing_handle = sdfh
                             yield
                             if self._docket is not None:
                                 self._write_docket(transaction)
                         finally:
                             self._writinghandles = None
                             self._segmentfile.writing_handle = None
                             self._segmentfile_sidedata.writing_handle = None
                             if dfh is not None:
                                 dfh.close()
                             if sdfh is not None:
                                 sdfh.close()
                             # closing the index file last to avoid exposing referent to
                             # potential unflushed data content.
                             if ifh is not None:
                                 ifh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh, sdfh = self._writinghandles
                         # XXX no checking for the sidedata file
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         write_debug = None
                         if self._debug_delta:
                             write_debug = transaction._report
                         deltacomputer = deltautil.deltacomputer(
                             self, write_debug=write_debug
                         )
                     if cachedelta is not None and len(cachedelta) == 2:
                         # If the cached delta has no information about how it should be
                         # reused, add the default reuse instruction according to the
                         # revlog's configuration.
                         if self._generaldelta and self._lazydeltabase:
                             delta_base_reuse = DELTA_BASE_REUSE_TRY
                         else:
                             delta_base_reuse = DELTA_BASE_REUSE_NO
                         cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
                     revinfo = revlogutils.revisioninfo(
                         node,
                         p1,
                         p2,
                         btext,
                         textlen,
                         cachedelta,
                         flags,
                     )
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         default_comp = self._docket.default_compression_header
                         r = deltautil.delta_compression(default_comp, deltainfo)
                         compression_mode, deltainfo = r
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = self._docket.sidedata_end
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     rank = RANK_UNKNOWN
                     if self._compute_rank:
                         if (p1r, p2r) == (nullrev, nullrev):
                             rank = 1
                         elif p1r != nullrev and p2r == nullrev:
                             rank = 1 + self.fast_rank(p1r)
                         elif p1r == nullrev and p2r != nullrev:
                             rank = 1 + self.fast_rank(p2r)
                         else:  # merge node
                             if rustdagop is not None and self.index.rust_ext_compat:
                                 rank = rustdagop.rank(self.index, p1r, p2r)
                             else:
                                 pmin, pmax = sorted((p1r, p2r))
                                 rank = 1 + self.fast_rank(pmax)
                                 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
                     e = revlogutils.entry(
                         flags=flags,
                         data_offset=offset,
                         data_compressed_length=deltainfo.deltalen,
                         data_uncompressed_length=textlen,
                         data_compression_mode=compression_mode,
                         data_delta_base=deltainfo.base,
                         link_rev=link,
                         parent_rev_1=p1r,
                         parent_rev_2=p2r,
                         node_id=node,
                         sidedata_offset=sidedata_offset,
                         sidedata_compressed_length=len(serialized_sidedata),
                         sidedata_compression_mode=sidedata_compression_mode,
                         rank=rank,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                         sidedata_offset,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(
                     self, transaction, entry, data, link, offset, sidedata, sidedata_offset
                 ):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh, sdfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     if sdfh:
                         sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         if self._sidedatafile:
                             transaction.add(self._sidedatafile, sidedata_offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             sdfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         assert not sidedata
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         # revlog-v2 always has 3 writing handles, help Pytype
                         wh1 = self._writinghandles[0]
                         wh2 = self._writinghandles[1]
                         wh3 = self._writinghandles[2]
                         assert wh1 is not None
                         assert wh2 is not None
                         assert wh3 is not None
                         self._docket.index_end = wh1.tell()
                         self._docket.data_end = wh2.tell()
                         self._docket.sidedata_end = wh3.tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                     debug_info=None,
                     delta_base_reuse_policy=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     # read the default delta-base reuse policy from revlog config if the
                     # group did not specify one.
                     if delta_base_reuse_policy is None:
                         if self._generaldelta and self._lazydeltabase:
                             delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
                         else:
                             delta_base_reuse_policy = DELTA_BASE_REUSE_NO
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             write_debug = None
                             if self._debug_delta:
                                 write_debug = transaction._report
                             deltacomputer = deltautil.deltacomputer(
                                 self,
                                 write_debug=write_debug,
                                 debug_info=debug_info,
                             )
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta, delta_base_reuse_policy),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     if self._sidedatafile:
                         sidedata_end = self.sidedata_cut_off(rev)
                         transaction.add(self._sidedatafile, sidedata_end)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.sidedata_end = sidedata_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except FileNotFoundError:
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except FileNotFoundError:
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if self._docket_file is None:
                         if not self._inline:
                             res.append(self._datafile)
                     else:
                         res.append(self._docket_file)
                         res.extend(self._docket.old_index_filepaths(include_empty=False))
                         if self._docket.data_end:
                             res.append(self._datafile)
                         res.extend(self._docket.old_data_filepaths(include_empty=False))
                         if self._docket.sidedata_end:
                             res.append(self._sidedatafile)
                         res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                     debug_info=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self._candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                         debug_info=debug_info,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     write_debug = None
                     if self._debug_delta:
                         write_debug = tr._report
                     deltacomputer = deltautil.deltacomputer(
                         destrevlog,
                         write_debug=write_debug,
                     )
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text = self._revisiondata(rev)
                             sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext = self._revisiondata(rev)
                                 sidedata = self.sidedata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     elif self._format_version == REVLOGV1:
                         rewrite.v1_censor(self, tr, censornode, tombstone)
                     else:
                         rewrite.v2_censor(self, tr, censornode, tombstone)
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh, sdfh = self._writinghandles
                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                         current_offset = sdfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             sdfh.seek(current_offset, os.SEEK_SET)
                             sdfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             self._docket.sidedata_end = sdfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)