upstream/mercurial-mirror Commit - r51918:9011c38b

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

# coding: utf8

2

# coding: utf8

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

"""Storage back-end for Mercurial.

9

"""Storage back-end for Mercurial.

10

11

This provides efficient delta storage with O(1) retrieve and append

11

This provides efficient delta storage with O(1) retrieve and append

12

and O(changes) merge between branches.

12

and O(changes) merge between branches.

13

"""

13

"""

14

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import contextlib

18

import contextlib

19

import io

19

import io

20

import os

20

import os

21

import struct

21

import struct

22

import weakref

22

import weakref

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullrev,

29

nullrev,

30

sha1nodeconstants,

30

sha1nodeconstants,

31

short,

31

short,

32

wdirrev,

32

wdirrev,

33

)

33

)

34

from .i18n import _

34

from .i18n import _

35

from .revlogutils.constants import (

35

from .revlogutils.constants import (

36

ALL_KINDS,

36

ALL_KINDS,

37

CHANGELOGV2,

37

CHANGELOGV2,

38

COMP_MODE_DEFAULT,

38

COMP_MODE_DEFAULT,

39

COMP_MODE_INLINE,

39

COMP_MODE_INLINE,

40

COMP_MODE_PLAIN,

40

COMP_MODE_PLAIN,

41

DELTA_BASE_REUSE_NO,

41

DELTA_BASE_REUSE_NO,

42

DELTA_BASE_REUSE_TRY,

42

DELTA_BASE_REUSE_TRY,

43

ENTRY_RANK,

43

ENTRY_RANK,

44

FEATURES_BY_VERSION,

44

FEATURES_BY_VERSION,

45

FLAG_GENERALDELTA,

45

FLAG_GENERALDELTA,

46

FLAG_INLINE_DATA,

46

FLAG_INLINE_DATA,

47

INDEX_HEADER,

47

INDEX_HEADER,

48

KIND_CHANGELOG,

48

KIND_CHANGELOG,

49

KIND_FILELOG,

49

KIND_FILELOG,

50

RANK_UNKNOWN,

50

RANK_UNKNOWN,

51

REVLOGV0,

51

REVLOGV0,

52

REVLOGV1,

52

REVLOGV1,

53

REVLOGV1_FLAGS,

53

REVLOGV1_FLAGS,

54

REVLOGV2,

54

REVLOGV2,

55

REVLOGV2_FLAGS,

55

REVLOGV2_FLAGS,

56

REVLOG_DEFAULT_FLAGS,

56

REVLOG_DEFAULT_FLAGS,

57

REVLOG_DEFAULT_FORMAT,

57

REVLOG_DEFAULT_FORMAT,

58

REVLOG_DEFAULT_VERSION,

58

REVLOG_DEFAULT_VERSION,

59

SUPPORTED_FLAGS,

59

SUPPORTED_FLAGS,

60

)

60

)

61

from .revlogutils.flagutil import (

61

from .revlogutils.flagutil import (

62

REVIDX_DEFAULT_FLAGS,

62

REVIDX_DEFAULT_FLAGS,

63

REVIDX_ELLIPSIS,

63

REVIDX_ELLIPSIS,

64

REVIDX_EXTSTORED,

64

REVIDX_EXTSTORED,

65

REVIDX_FLAGS_ORDER,

65

REVIDX_FLAGS_ORDER,

66

REVIDX_HASCOPIESINFO,

66

REVIDX_HASCOPIESINFO,

67

REVIDX_ISCENSORED,

67

REVIDX_ISCENSORED,

68

REVIDX_RAWTEXT_CHANGING_FLAGS,

68

REVIDX_RAWTEXT_CHANGING_FLAGS,

69

)

69

)

70

from .thirdparty import attr

70

from .thirdparty import attr

71

from . import (

71

from . import (

72

ancestor,

72

ancestor,

73

dagop,

73

dagop,

74

error,

74

error,

75

mdiff,

75

mdiff,

76

policy,

76

policy,

77

pycompat,

77

pycompat,

78

revlogutils,

78

revlogutils,

79

templatefilters,

79

templatefilters,

80

util,

80

util,

81

)

81

)

82

from .interfaces import (

82

from .interfaces import (

83

repository,

83

repository,

84

util as interfaceutil,

84

util as interfaceutil,

85

)

85

)

86

from .revlogutils import (

86

from .revlogutils import (

87

deltas as deltautil,

87

deltas as deltautil,

88

docket as docketutil,

88

docket as docketutil,

89

flagutil,

89

flagutil,

90

nodemap as nodemaputil,

90

nodemap as nodemaputil,

91

randomaccessfile,

91

randomaccessfile,

92

revlogv0,

92

revlogv0,

93

rewrite,

93

rewrite,

94

sidedata as sidedatautil,

94

sidedata as sidedatautil,

95

)

95

)

96

from .utils import (

96

from .utils import (

97

storageutil,

97

storageutil,

98

stringutil,

98

stringutil,

99

)

99

)

100

101

# blanked usage of all the name to prevent pyflakes constraints

101

# blanked usage of all the name to prevent pyflakes constraints

102

# We need these name available in the module for extensions.

102

# We need these name available in the module for extensions.

103

104

REVLOGV0

104

REVLOGV0

105

REVLOGV1

105

REVLOGV1

106

REVLOGV2

106

REVLOGV2

107

CHANGELOGV2

107

CHANGELOGV2

108

FLAG_INLINE_DATA

108

FLAG_INLINE_DATA

109

FLAG_GENERALDELTA

109

FLAG_GENERALDELTA

110

REVLOG_DEFAULT_FLAGS

110

REVLOG_DEFAULT_FLAGS

111

REVLOG_DEFAULT_FORMAT

111

REVLOG_DEFAULT_FORMAT

112

REVLOG_DEFAULT_VERSION

112

REVLOG_DEFAULT_VERSION

113

REVLOGV1_FLAGS

113

REVLOGV1_FLAGS

114

REVLOGV2_FLAGS

114

REVLOGV2_FLAGS

115

REVIDX_ISCENSORED

115

REVIDX_ISCENSORED

116

REVIDX_ELLIPSIS

116

REVIDX_ELLIPSIS

117

REVIDX_HASCOPIESINFO

117

REVIDX_HASCOPIESINFO

118

REVIDX_EXTSTORED

118

REVIDX_EXTSTORED

119

REVIDX_DEFAULT_FLAGS

119

REVIDX_DEFAULT_FLAGS

120

REVIDX_FLAGS_ORDER

120

REVIDX_FLAGS_ORDER

121

REVIDX_RAWTEXT_CHANGING_FLAGS

121

REVIDX_RAWTEXT_CHANGING_FLAGS

122

123

parsers = policy.importmod('parsers')

123

parsers = policy.importmod('parsers')

124

rustancestor = policy.importrust('ancestor')

124

rustancestor = policy.importrust('ancestor')

125

rustdagop = policy.importrust('dagop')

125

rustdagop = policy.importrust('dagop')

126

rustrevlog = policy.importrust('revlog')

126

rustrevlog = policy.importrust('revlog')

127

128

# Aliased for performance.

128

# Aliased for performance.

129

_zlibdecompress = zlib.decompress

129

_zlibdecompress = zlib.decompress

130

131

# max size of inline data embedded into a revlog

131

# max size of inline data embedded into a revlog

132

_maxinline = 131072

132

_maxinline = 131072

133

134

# Flag processors for REVIDX_ELLIPSIS.

134

# Flag processors for REVIDX_ELLIPSIS.

135

def ellipsisreadprocessor(rl, text):

135

def ellipsisreadprocessor(rl, text):

136

return text, False

136

return text, False

137

138

139

def ellipsiswriteprocessor(rl, text):

139

def ellipsiswriteprocessor(rl, text):

140

return text, False

140

return text, False

141

142

143

def ellipsisrawprocessor(rl, text):

143

def ellipsisrawprocessor(rl, text):

144

return False

144

return False

145

146

147

ellipsisprocessor = (

147

ellipsisprocessor = (

148

ellipsisreadprocessor,

148

ellipsisreadprocessor,

149

ellipsiswriteprocessor,

149

ellipsiswriteprocessor,

150

ellipsisrawprocessor,

150

ellipsisrawprocessor,

151

)

151

)

152

153

154

def _verify_revision(rl, skipflags, state, node):

154

def _verify_revision(rl, skipflags, state, node):

155

"""Verify the integrity of the given revlog ``node`` while providing a hook

155

"""Verify the integrity of the given revlog ``node`` while providing a hook

156

point for extensions to influence the operation."""

156

point for extensions to influence the operation."""

157

if skipflags:

157

if skipflags:

158

state[b'skipread'].add(node)

158

state[b'skipread'].add(node)

159

else:

159

else:

160

# Side-effect: read content and verify hash.

160

# Side-effect: read content and verify hash.

161

rl.revision(node)

161

rl.revision(node)

162

163

164

# True if a fast implementation for persistent-nodemap is available

164

# True if a fast implementation for persistent-nodemap is available

165

#

165

#

166

# We also consider we have a "fast" implementation in "pure" python because

166

# We also consider we have a "fast" implementation in "pure" python because

167

# people using pure don't really have performance consideration (and a

167

# people using pure don't really have performance consideration (and a

168

# wheelbarrow of other slowness source)

168

# wheelbarrow of other slowness source)

169

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(

169

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(

170

parsers, 'BaseIndexObject'

170

parsers, 'BaseIndexObject'

171

)

171

)

172

173

174

@interfaceutil.implementer(repository.irevisiondelta)

174

@interfaceutil.implementer(repository.irevisiondelta)

175

@attr.s(slots=True)

175

@attr.s(slots=True)

176

class revlogrevisiondelta:

176

class revlogrevisiondelta:

177

node = attr.ib()

177

node = attr.ib()

178

p1node = attr.ib()

178

p1node = attr.ib()

179

p2node = attr.ib()

179

p2node = attr.ib()

180

basenode = attr.ib()

180

basenode = attr.ib()

181

flags = attr.ib()

181

flags = attr.ib()

182

baserevisionsize = attr.ib()

182

baserevisionsize = attr.ib()

183

revision = attr.ib()

183

revision = attr.ib()

184

delta = attr.ib()

184

delta = attr.ib()

185

sidedata = attr.ib()

185

sidedata = attr.ib()

186

protocol_flags = attr.ib()

186

protocol_flags = attr.ib()

187

linknode = attr.ib(default=None)

187

linknode = attr.ib(default=None)

188

189

190

@interfaceutil.implementer(repository.iverifyproblem)

190

@interfaceutil.implementer(repository.iverifyproblem)

191

@attr.s(frozen=True)

191

@attr.s(frozen=True)

192

class revlogproblem:

192

class revlogproblem:

193

warning = attr.ib(default=None)

193

warning = attr.ib(default=None)

194

error = attr.ib(default=None)

194

error = attr.ib(default=None)

195

node = attr.ib(default=None)

195

node = attr.ib(default=None)

196

197

198

def parse_index_v1(data, inline):

198

def parse_index_v1(data, inline):

199

# call the C implementation to parse the index data

199

# call the C implementation to parse the index data

200

index, cache = parsers.parse_index2(data, inline)

200

index, cache = parsers.parse_index2(data, inline)

201

return index, cache

201

return index, cache

202

203

204

def parse_index_v2(data, inline):

204

def parse_index_v2(data, inline):

205

# call the C implementation to parse the index data

205

# call the C implementation to parse the index data

206

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

206

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

207

return index, cache

207

return index, cache

208

209

210

def parse_index_cl_v2(data, inline):

210

def parse_index_cl_v2(data, inline):

211

# call the C implementation to parse the index data

211

# call the C implementation to parse the index data

212

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

212

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

213

return index, cache

213

return index, cache

214

215

216

if hasattr(parsers, 'parse_index_devel_nodemap'):

216

if hasattr(parsers, 'parse_index_devel_nodemap'):

217

218

def parse_index_v1_nodemap(data, inline):

218

def parse_index_v1_nodemap(data, inline):

219

index, cache = parsers.parse_index_devel_nodemap(data, inline)

219

index, cache = parsers.parse_index_devel_nodemap(data, inline)

220

return index, cache

220

return index, cache

221

222

223

else:

223

else:

224

parse_index_v1_nodemap = None

224

parse_index_v1_nodemap = None

225

226

227

def parse_index_v1_mixed(data, inline):

227

def parse_index_v1_mixed(data, inline):

228

index, cache = parse_index_v1(data, inline)

228

index, cache = parse_index_v1(data, inline)

229

return rustrevlog.MixedIndex(index), cache

229

return rustrevlog.MixedIndex(index), cache

230

231

232

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

232

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

233

# signed integer)

233

# signed integer)

234

_maxentrysize = 0x7FFFFFFF

234

_maxentrysize = 0x7FFFFFFF

235

236

FILE_TOO_SHORT_MSG = _(

236

FILE_TOO_SHORT_MSG = _(

237

b'cannot read from revlog %s;'

237

b'cannot read from revlog %s;'

238

b' expected %d bytes from offset %d, data size is %d'

238

b' expected %d bytes from offset %d, data size is %d'

239

)

239

)

240

241

hexdigits = b'0123456789abcdefABCDEF'

241

hexdigits = b'0123456789abcdefABCDEF'

242

243

244

class revlog:

244

class revlog:

245

"""

245

"""

246

the underlying revision storage object

246

the underlying revision storage object

247

248

A revlog consists of two parts, an index and the revision data.

248

A revlog consists of two parts, an index and the revision data.

249

250

The index is a file with a fixed record size containing

250

The index is a file with a fixed record size containing

251

information on each revision, including its nodeid (hash), the

251

information on each revision, including its nodeid (hash), the

252

nodeids of its parents, the position and offset of its data within

252

nodeids of its parents, the position and offset of its data within

253

the data file, and the revision it's based on. Finally, each entry

253

the data file, and the revision it's based on. Finally, each entry

254

contains a linkrev entry that can serve as a pointer to external

254

contains a linkrev entry that can serve as a pointer to external

255

data.

255

data.

256

257

The revision data itself is a linear collection of data chunks.

257

The revision data itself is a linear collection of data chunks.

258

Each chunk represents a revision and is usually represented as a

258

Each chunk represents a revision and is usually represented as a

259

delta against the previous chunk. To bound lookup time, runs of

259

delta against the previous chunk. To bound lookup time, runs of

260

deltas are limited to about 2 times the length of the original

260

deltas are limited to about 2 times the length of the original

261

version data. This makes retrieval of a version proportional to

261

version data. This makes retrieval of a version proportional to

262

its size, or O(1) relative to the number of revisions.

262

its size, or O(1) relative to the number of revisions.

263

264

Both pieces of the revlog are written to in an append-only

264

Both pieces of the revlog are written to in an append-only

265

fashion, which means we never need to rewrite a file to insert or

265

fashion, which means we never need to rewrite a file to insert or

266

remove data, and can use some simple techniques to avoid the need

266

remove data, and can use some simple techniques to avoid the need

267

for locking while reading.

267

for locking while reading.

268

269

If checkambig, indexfile is opened with checkambig=True at

269

If checkambig, indexfile is opened with checkambig=True at

270

writing, to avoid file stat ambiguity.

270

writing, to avoid file stat ambiguity.

271

272

If mmaplargeindex is True, and an mmapindexthreshold is set, the

272

If mmaplargeindex is True, and an mmapindexthreshold is set, the

273

index will be mmapped rather than read if it is larger than the

273

index will be mmapped rather than read if it is larger than the

274

configured threshold.

274

configured threshold.

275

276

If censorable is True, the revlog can have censored revisions.

276

If censorable is True, the revlog can have censored revisions.

277

278

If `upperboundcomp` is not None, this is the expected maximal gain from

278

If `upperboundcomp` is not None, this is the expected maximal gain from

279

compression for the data content.

279

compression for the data content.

280

281

`concurrencychecker` is an optional function that receives 3 arguments: a

281

`concurrencychecker` is an optional function that receives 3 arguments: a

282

file handle, a filename, and an expected position. It should check whether

282

file handle, a filename, and an expected position. It should check whether

283

the current position in the file handle is valid, and log/warn/fail (by

283

the current position in the file handle is valid, and log/warn/fail (by

284

raising).

284

raising).

285

286

See mercurial/revlogutils/contants.py for details about the content of an

286

See mercurial/revlogutils/contants.py for details about the content of an

287

index entry.

287

index entry.

288

"""

288

"""

289

290

_flagserrorclass = error.RevlogError

290

_flagserrorclass = error.RevlogError

291

292

@staticmethod

292

@staticmethod

293

def is_inline_index(header_bytes):

293

def is_inline_index(header_bytes):

294

"""Determine if a revlog is inline from the initial bytes of the index"""

294

"""Determine if a revlog is inline from the initial bytes of the index"""

295

header = INDEX_HEADER.unpack(header_bytes)[0]

295

header = INDEX_HEADER.unpack(header_bytes)[0]

296

297

_format_flags = header & ~0xFFFF

297

_format_flags = header & ~0xFFFF

298

_format_version = header & 0xFFFF

298

_format_version = header & 0xFFFF

299

300

features = FEATURES_BY_VERSION[_format_version]

300

features = FEATURES_BY_VERSION[_format_version]

301

return features[b'inline'](_format_flags)

301

return features[b'inline'](_format_flags)

302

303

def __init__(

303

def __init__(

304

self,

304

self,

305

opener,

305

opener,

306

target,

306

target,

307

radix,

307

radix,

308

postfix=None, # only exist for `tmpcensored` now

308

postfix=None, # only exist for `tmpcensored` now

309

checkambig=False,

309

checkambig=False,

310

mmaplargeindex=False,

310

mmaplargeindex=False,

311

censorable=False,

311

censorable=False,

312

upperboundcomp=None,

312

upperboundcomp=None,

313

persistentnodemap=False,

313

persistentnodemap=False,

314

concurrencychecker=None,

314

concurrencychecker=None,

315

trypending=False,

315

trypending=False,

316

try_split=False,

316

try_split=False,

317

canonical_parent_order=True,

317

canonical_parent_order=True,

318

):

318

):

319

"""

319

"""

320

create a revlog object

320

create a revlog object

321

322

opener is a function that abstracts the file opening operation

322

opener is a function that abstracts the file opening operation

323

and can be used to implement COW semantics or the like.

323

and can be used to implement COW semantics or the like.

324

325

`target`: a (KIND, ID) tuple that identify the content stored in

325

`target`: a (KIND, ID) tuple that identify the content stored in

326

this revlog. It help the rest of the code to understand what the revlog

326

this revlog. It help the rest of the code to understand what the revlog

327

is about without having to resort to heuristic and index filename

327

is about without having to resort to heuristic and index filename

328

analysis. Note: that this must be reliably be set by normal code, but

328

analysis. Note: that this must be reliably be set by normal code, but

329

that test, debug, or performance measurement code might not set this to

329

that test, debug, or performance measurement code might not set this to

330

accurate value.

330

accurate value.

331

"""

331

"""

332

self.upperboundcomp = upperboundcomp

332

self.upperboundcomp = upperboundcomp

333

334

self.radix = radix

334

self.radix = radix

335

336

self._docket_file = None

336

self._docket_file = None

337

self._indexfile = None

337

self._indexfile = None

338

self._datafile = None

338

self._datafile = None

339

self._sidedatafile = None

339

self._sidedatafile = None

340

self._nodemap_file = None

340

self._nodemap_file = None

341

self.postfix = postfix

341

self.postfix = postfix

342

self._trypending = trypending

342

self._trypending = trypending

343

self._try_split = try_split

343

self._try_split = try_split

344

self.opener = opener

344

self.opener = opener

345

if persistentnodemap:

345

if persistentnodemap:

346

self._nodemap_file = nodemaputil.get_nodemap_file(self)

346

self._nodemap_file = nodemaputil.get_nodemap_file(self)

347

348

assert target[0] in ALL_KINDS

348

assert target[0] in ALL_KINDS

349

assert len(target) == 2

349

assert len(target) == 2

350

self.target = target

350

self.target = target

351

# When True, indexfile is opened with checkambig=True at writing, to

351

# When True, indexfile is opened with checkambig=True at writing, to

352

# avoid file stat ambiguity.

352

# avoid file stat ambiguity.

353

self._checkambig = checkambig

353

self._checkambig = checkambig

354

self._mmaplargeindex = mmaplargeindex

354

self._mmaplargeindex = mmaplargeindex

355

self._censorable = censorable

355

self._censorable = censorable

356

# 3-tuple of (node, rev, text) for a raw revision.

356

# 3-tuple of (node, rev, text) for a raw revision.

357

self._revisioncache = None

357

self._revisioncache = None

358

# Maps rev to chain base rev.

358

# Maps rev to chain base rev.

359

self._chainbasecache = util.lrucachedict(100)

359

self._chainbasecache = util.lrucachedict(100)

360

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

360

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

361

self._chunkcache = (0, b'')

361

self._chunkcache = (0, b'')

362

# How much data to read and cache into the raw revlog data cache.

362

# How much data to read and cache into the raw revlog data cache.

363

self._chunkcachesize = 65536

363

self._chunkcachesize = 65536

364

self._maxchainlen = None

364

self._maxchainlen = None

365

self._deltabothparents = True

365

self._deltabothparents = True

366

self._candidate_group_chunk_size = 0

366

self._candidate_group_chunk_size = 0

367

self._debug_delta = False

367

self._debug_delta = False

368

self.index = None

368

self.index = None

369

self._docket = None

369

self._docket = None

370

self._nodemap_docket = None

370

self._nodemap_docket = None

371

# Mapping of partial identifiers to full nodes.

371

# Mapping of partial identifiers to full nodes.

372

self._pcache = {}

372

self._pcache = {}

373

# Mapping of revision integer to full node.

373

# Mapping of revision integer to full node.

374

self._compengine = b'zlib'

374

self._compengine = b'zlib'

375

self._compengineopts = {}

375

self._compengineopts = {}

376

self._maxdeltachainspan = -1

376

self._maxdeltachainspan = -1

377

self._withsparseread = False

377

self._withsparseread = False

378

self._sparserevlog = False

378

self._sparserevlog = False

379

self.hassidedata = False

379

self.hassidedata = False

380

self._srdensitythreshold = 0.50

380

self._srdensitythreshold = 0.50

381

self._srmingapsize = 262144

381

self._srmingapsize = 262144

382

383

# other optionnals features

383

# other optionnals features

384

385

# might remove rank configuration once the computation has no impact

385

# might remove rank configuration once the computation has no impact

386

self._compute_rank = False

386

self._compute_rank = False

387

388

# Make copy of flag processors so each revlog instance can support

388

# Make copy of flag processors so each revlog instance can support

389

# custom flags.

389

# custom flags.

390

self._flagprocessors = dict(flagutil.flagprocessors)

390

self._flagprocessors = dict(flagutil.flagprocessors)

391

392

# 3-tuple of file handles being used for active writing.

392

# 3-tuple of file handles being used for active writing.

393

self._writinghandles = None

393

self._writinghandles = None

394

# prevent nesting of addgroup

394

# prevent nesting of addgroup

395

self._adding_group = None

395

self._adding_group = None

396

397

self._loadindex()

397

self._loadindex()

398

399

self._concurrencychecker = concurrencychecker

399

self._concurrencychecker = concurrencychecker

400

401

# parent order is supposed to be semantically irrelevant, so we

401

# parent order is supposed to be semantically irrelevant, so we

402

# normally resort parents to ensure that the first parent is non-null,

402

# normally resort parents to ensure that the first parent is non-null,

403

# if there is a non-null parent at all.

403

# if there is a non-null parent at all.

404

# filelog abuses the parent order as flag to mark some instances of

404

# filelog abuses the parent order as flag to mark some instances of

405

# meta-encoded files, so allow it to disable this behavior.

405

# meta-encoded files, so allow it to disable this behavior.

406

self.canonical_parent_order = canonical_parent_order

406

self.canonical_parent_order = canonical_parent_order

407

408

def _init_opts(self):

408

def _init_opts(self):

409

"""process options (from above/config) to setup associated default revlog mode

409

"""process options (from above/config) to setup associated default revlog mode

410

411

These values might be affected when actually reading on disk information.

411

These values might be affected when actually reading on disk information.

412

413

The relevant values are returned for use in _loadindex().

413

The relevant values are returned for use in _loadindex().

414

415

* newversionflags:

415

* newversionflags:

416

version header to use if we need to create a new revlog

416

version header to use if we need to create a new revlog

417

418

* mmapindexthreshold:

418

* mmapindexthreshold:

419

minimal index size for start to use mmap

419

minimal index size for start to use mmap

420

421

* force_nodemap:

421

* force_nodemap:

422

force the usage of a "development" version of the nodemap code

422

force the usage of a "development" version of the nodemap code

423

"""

423

"""

424

mmapindexthreshold = None

424

mmapindexthreshold = None

425

opts = self.opener.options

425

opts = self.opener.options

426

427

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

427

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

428

new_header = CHANGELOGV2

428

new_header = CHANGELOGV2

429

self._compute_rank = opts.get(b'changelogv2.compute-rank', True)

429

self._compute_rank = opts.get(b'changelogv2.compute-rank', True)

430

elif b'revlogv2' in opts:

430

elif b'revlogv2' in opts:

431

new_header = REVLOGV2

431

new_header = REVLOGV2

432

elif b'revlogv1' in opts:

432

elif b'revlogv1' in opts:

433

new_header = REVLOGV1 | FLAG_INLINE_DATA

433

new_header = REVLOGV1 | FLAG_INLINE_DATA

434

if b'generaldelta' in opts:

434

if b'generaldelta' in opts:

435

new_header |= FLAG_GENERALDELTA

435

new_header |= FLAG_GENERALDELTA

436

elif b'revlogv0' in self.opener.options:

436

elif b'revlogv0' in self.opener.options:

437

new_header = REVLOGV0

437

new_header = REVLOGV0

438

else:

438

else:

439

new_header = REVLOG_DEFAULT_VERSION

439

new_header = REVLOG_DEFAULT_VERSION

440

441

if b'chunkcachesize' in opts:

441

if b'chunkcachesize' in opts:

442

self._chunkcachesize = opts[b'chunkcachesize']

442

self._chunkcachesize = opts[b'chunkcachesize']

443

if b'maxchainlen' in opts:

443

if b'maxchainlen' in opts:

444

self._maxchainlen = opts[b'maxchainlen']

444

self._maxchainlen = opts[b'maxchainlen']

445

if b'deltabothparents' in opts:

445

if b'deltabothparents' in opts:

446

self._deltabothparents = opts[b'deltabothparents']

446

self._deltabothparents = opts[b'deltabothparents']

447

dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')

447

dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')

448

if dps_cgds:

448

if dps_cgds:

449

self._candidate_group_chunk_size = dps_cgds

449

self._candidate_group_chunk_size = dps_cgds

450

self._lazydelta = bool(opts.get(b'lazydelta', True))

450

self._lazydelta = bool(opts.get(b'lazydelta', True))

451

self._lazydeltabase = False

451

self._lazydeltabase = False

452

if self._lazydelta:

452

if self._lazydelta:

453

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

453

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

454

if b'debug-delta' in opts:

454

if b'debug-delta' in opts:

455

self._debug_delta = opts[b'debug-delta']

455

self._debug_delta = opts[b'debug-delta']

456

if b'compengine' in opts:

456

if b'compengine' in opts:

457

self._compengine = opts[b'compengine']

457

self._compengine = opts[b'compengine']

458

if b'zlib.level' in opts:

458

if b'zlib.level' in opts:

459

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

459

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

460

if b'zstd.level' in opts:

460

if b'zstd.level' in opts:

461

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

461

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

462

if b'maxdeltachainspan' in opts:

462

if b'maxdeltachainspan' in opts:

463

self._maxdeltachainspan = opts[b'maxdeltachainspan']

463

self._maxdeltachainspan = opts[b'maxdeltachainspan']

464

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

464

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

465

mmapindexthreshold = opts[b'mmapindexthreshold']

465

mmapindexthreshold = opts[b'mmapindexthreshold']

466

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

466

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

467

withsparseread = bool(opts.get(b'with-sparse-read', False))

467

withsparseread = bool(opts.get(b'with-sparse-read', False))

468

# sparse-revlog forces sparse-read

468

# sparse-revlog forces sparse-read

469

self._withsparseread = self._sparserevlog or withsparseread

469

self._withsparseread = self._sparserevlog or withsparseread

470

if b'sparse-read-density-threshold' in opts:

470

if b'sparse-read-density-threshold' in opts:

471

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

471

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

472

if b'sparse-read-min-gap-size' in opts:

472

if b'sparse-read-min-gap-size' in opts:

473

self._srmingapsize = opts[b'sparse-read-min-gap-size']

473

self._srmingapsize = opts[b'sparse-read-min-gap-size']

474

if opts.get(b'enableellipsis'):

474

if opts.get(b'enableellipsis'):

475

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

475

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

476

477

# revlog v0 doesn't have flag processors

477

# revlog v0 doesn't have flag processors

478

for flag, processor in opts.get(b'flagprocessors', {}).items():

478

for flag, processor in opts.get(b'flagprocessors', {}).items():

479

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

479

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

480

481

if self._chunkcachesize <= 0:

481

if self._chunkcachesize <= 0:

482

raise error.RevlogError(

482

raise error.RevlogError(

483

_(b'revlog chunk cache size %r is not greater than 0')

483

_(b'revlog chunk cache size %r is not greater than 0')

484

% self._chunkcachesize

484

% self._chunkcachesize

485

)

485

)

486

elif self._chunkcachesize & (self._chunkcachesize - 1):

486

elif self._chunkcachesize & (self._chunkcachesize - 1):

487

raise error.RevlogError(

487

raise error.RevlogError(

488

_(b'revlog chunk cache size %r is not a power of 2')

488

_(b'revlog chunk cache size %r is not a power of 2')

489

% self._chunkcachesize

489

% self._chunkcachesize

490

)

490

)

491

force_nodemap = opts.get(b'devel-force-nodemap', False)

491

force_nodemap = opts.get(b'devel-force-nodemap', False)

492

return new_header, mmapindexthreshold, force_nodemap

492

return new_header, mmapindexthreshold, force_nodemap

493

494

def _get_data(self, filepath, mmap_threshold, size=None):

494

def _get_data(self, filepath, mmap_threshold, size=None):

495

"""return a file content with or without mmap

495

"""return a file content with or without mmap

496

497

If the file is missing return the empty string"""

497

If the file is missing return the empty string"""

498

try:

498

try:

499

with self.opener(filepath) as fp:

499

with self.opener(filepath) as fp:

500

if mmap_threshold is not None:

500

if mmap_threshold is not None:

501

file_size = self.opener.fstat(fp).st_size

501

file_size = self.opener.fstat(fp).st_size

502

if file_size >= mmap_threshold:

502

if file_size >= mmap_threshold:

503

if size is not None:

503

if size is not None:

504

# avoid potentiel mmap crash

504

# avoid potentiel mmap crash

505

size = min(file_size, size)

505

size = min(file_size, size)

506

# TODO: should .close() to release resources without

506

# TODO: should .close() to release resources without

507

# relying on Python GC

507

# relying on Python GC

508

if size is None:

508

if size is None:

509

return util.buffer(util.mmapread(fp))

509

return util.buffer(util.mmapread(fp))

510

else:

510

else:

511

return util.buffer(util.mmapread(fp, size))

511

return util.buffer(util.mmapread(fp, size))

512

if size is None:

512

if size is None:

513

return fp.read()

513

return fp.read()

514

else:

514

else:

515

return fp.read(size)

515

return fp.read(size)

516

except FileNotFoundError:

516

except FileNotFoundError:

517

return b''

517

return b''

518

519

def get_streams(self, max_linkrev, force_inline=False):

519

def get_streams(self, max_linkrev, force_inline=False):

520

"""return a list of streams that represent this revlog

520

"""return a list of streams that represent this revlog

521

522

This is used by stream-clone to do bytes to bytes copies of a repository.

522

This is used by stream-clone to do bytes to bytes copies of a repository.

523

524

This streams data for all revisions that refer to a changelog revision up

524

This streams data for all revisions that refer to a changelog revision up

525

to `max_linkrev`.

525

to `max_linkrev`.

526

527

If `force_inline` is set, it enforces that the stream will represent an inline revlog.

527

If `force_inline` is set, it enforces that the stream will represent an inline revlog.

528

529

It returns is a list of three-tuple:

529

It returns is a list of three-tuple:

530

531

[

531

[

532

(filename, bytes_stream, stream_size),

532

(filename, bytes_stream, stream_size),

533

…

533

…

534

]

534

]

535

"""

535

"""

536

n = len(self)

536

n = len(self)

537

index = self.index

537

index = self.index

538

while n > 0:

538

while n > 0:

539

linkrev = index[n - 1][4]

539

linkrev = index[n - 1][4]

540

if linkrev < max_linkrev:

540

if linkrev < max_linkrev:

541

break

541

break

542

# note: this loop will rarely go through multiple iterations, since

542

# note: this loop will rarely go through multiple iterations, since

543

# it only traverses commits created during the current streaming

543

# it only traverses commits created during the current streaming

544

# pull operation.

544

# pull operation.

545

#

545

#

546

# If this become a problem, using a binary search should cap the

546

# If this become a problem, using a binary search should cap the

547

# runtime of this.

547

# runtime of this.

548

n = n - 1

548

n = n - 1

549

if n == 0:

549

if n == 0:

550

# no data to send

550

# no data to send

551

return []

551

return []

552

index_size = n * index.entry_size

552

index_size = n * index.entry_size

553

data_size = self.end(n - 1)

553

data_size = self.end(n - 1)

554

555

# XXX we might have been split (or stripped) since the object

555

# XXX we might have been split (or stripped) since the object

556

# initialization, We need to close this race too, but having a way to

556

# initialization, We need to close this race too, but having a way to

557

# pre-open the file we feed to the revlog and never closing them before

557

# pre-open the file we feed to the revlog and never closing them before

558

# we are done streaming.

558

# we are done streaming.

559

560

if self._inline:

560

if self._inline:

561

562

def get_stream():

562

def get_stream():

563

with self._indexfp() as fp:

563

with self._indexfp() as fp:

564

yield None

564

yield None

565

size = index_size + data_size

565

size = index_size + data_size

566

if size <= 65536:

566

if size <= 65536:

567

yield fp.read(size)

567

yield fp.read(size)

568

else:

568

else:

569

yield from util.filechunkiter(fp, limit=size)

569

yield from util.filechunkiter(fp, limit=size)

570

571

inline_stream = get_stream()

571

inline_stream = get_stream()

572

next(inline_stream)

572

next(inline_stream)

573

return [

573

return [

574

(self._indexfile, inline_stream, index_size + data_size),

574

(self._indexfile, inline_stream, index_size + data_size),

575

]

575

]

576

elif force_inline:

576

elif force_inline:

577

578

def get_stream():

578

def get_stream():

579

with self.reading():

579

with self.reading():

580

yield None

580

yield None

581

582

for rev in range(n):

582

for rev in range(n):

583

idx = self.index.entry_binary(rev)

583

idx = self.index.entry_binary(rev)

584

if rev == 0 and self._docket is None:

584

if rev == 0 and self._docket is None:

585

# re-inject the inline flag

585

# re-inject the inline flag

586

header = self._format_flags

586

header = self._format_flags

587

header |= self._format_version

587

header |= self._format_version

588

header |= FLAG_INLINE_DATA

588

header |= FLAG_INLINE_DATA

589

header = self.index.pack_header(header)

589

header = self.index.pack_header(header)

590

idx = header + idx

590

idx = header + idx

591

yield idx

591

yield idx

592

yield self._getsegmentforrevs(rev, rev)[1]

592

yield self._getsegmentforrevs(rev, rev)[1]

593

594

inline_stream = get_stream()

594

inline_stream = get_stream()

595

next(inline_stream)

595

next(inline_stream)

596

return [

596

return [

597

(self._indexfile, inline_stream, index_size + data_size),

597

(self._indexfile, inline_stream, index_size + data_size),

598

]

598

]

599

else:

599

else:

600

601

def get_index_stream():

601

def get_index_stream():

602

with self._indexfp() as fp:

602

with self._indexfp() as fp:

603

yield None

603

yield None

604

if index_size <= 65536:

604

if index_size <= 65536:

605

yield fp.read(index_size)

605

yield fp.read(index_size)

606

else:

606

else:

607

yield from util.filechunkiter(fp, limit=index_size)

607

yield from util.filechunkiter(fp, limit=index_size)

608

609

def get_data_stream():

609

def get_data_stream():

610

with self._datafp() as fp:

610

with self._datafp() as fp:

611

yield None

611

yield None

612

if data_size <= 65536:

612

if data_size <= 65536:

613

yield fp.read(data_size)

613

yield fp.read(data_size)

614

else:

614

else:

615

yield from util.filechunkiter(fp, limit=data_size)

615

yield from util.filechunkiter(fp, limit=data_size)

616

617

index_stream = get_index_stream()

617

index_stream = get_index_stream()

618

next(index_stream)

618

next(index_stream)

619

data_stream = get_data_stream()

619

data_stream = get_data_stream()

620

next(data_stream)

620

next(data_stream)

621

return [

621

return [

622

(self._datafile, data_stream, data_size),

622

(self._datafile, data_stream, data_size),

623

(self._indexfile, index_stream, index_size),

623

(self._indexfile, index_stream, index_size),

624

]

624

]

625

626

def _loadindex(self, docket=None):

626

def _loadindex(self, docket=None):

627

628

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

628

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

629

630

if self.postfix is not None:

630

if self.postfix is not None:

631

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

631

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

632

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

632

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

633

entry_point = b'%s.i.a' % self.radix

633

entry_point = b'%s.i.a' % self.radix

634

elif self._try_split and self.opener.exists(self._split_index_file):

634

elif self._try_split and self.opener.exists(self._split_index_file):

635

entry_point = self._split_index_file

635

entry_point = self._split_index_file

636

else:

636

else:

637

entry_point = b'%s.i' % self.radix

637

entry_point = b'%s.i' % self.radix

638

639

if docket is not None:

639

if docket is not None:

640

self._docket = docket

640

self._docket = docket

641

self._docket_file = entry_point

641

self._docket_file = entry_point

642

else:

642

else:

643

self._initempty = True

643

self._initempty = True

644

entry_data = self._get_data(entry_point, mmapindexthreshold)

644

entry_data = self._get_data(entry_point, mmapindexthreshold)

645

if len(entry_data) > 0:

645

if len(entry_data) > 0:

646

header = INDEX_HEADER.unpack(entry_data[:4])[0]

646

header = INDEX_HEADER.unpack(entry_data[:4])[0]

647

self._initempty = False

647

self._initempty = False

648

else:

648

else:

649

header = new_header

649

header = new_header

650

651

self._format_flags = header & ~0xFFFF

651

self._format_flags = header & ~0xFFFF

652

self._format_version = header & 0xFFFF

652

self._format_version = header & 0xFFFF

653

654

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

654

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

655

if supported_flags is None:

655

if supported_flags is None:

656

msg = _(b'unknown version (%d) in revlog %s')

656

msg = _(b'unknown version (%d) in revlog %s')

657

msg %= (self._format_version, self.display_id)

657

msg %= (self._format_version, self.display_id)

658

raise error.RevlogError(msg)

658

raise error.RevlogError(msg)

659

elif self._format_flags & ~supported_flags:

659

elif self._format_flags & ~supported_flags:

660

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

660

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

661

display_flag = self._format_flags >> 16

661

display_flag = self._format_flags >> 16

662

msg %= (display_flag, self._format_version, self.display_id)

662

msg %= (display_flag, self._format_version, self.display_id)

663

raise error.RevlogError(msg)

663

raise error.RevlogError(msg)

664

665

features = FEATURES_BY_VERSION[self._format_version]

665

features = FEATURES_BY_VERSION[self._format_version]

666

self._inline = features[b'inline'](self._format_flags)

666

self._inline = features[b'inline'](self._format_flags)

667

self._generaldelta = features[b'generaldelta'](self._format_flags)

667

self._generaldelta = features[b'generaldelta'](self._format_flags)

668

self.hassidedata = features[b'sidedata']

668

self.hassidedata = features[b'sidedata']

669

670

if not features[b'docket']:

670

if not features[b'docket']:

671

self._indexfile = entry_point

671

self._indexfile = entry_point

672

index_data = entry_data

672

index_data = entry_data

673

else:

673

else:

674

self._docket_file = entry_point

674

self._docket_file = entry_point

675

if self._initempty:

675

if self._initempty:

676

self._docket = docketutil.default_docket(self, header)

676

self._docket = docketutil.default_docket(self, header)

677

else:

677

else:

678

self._docket = docketutil.parse_docket(

678

self._docket = docketutil.parse_docket(

679

self, entry_data, use_pending=self._trypending

679

self, entry_data, use_pending=self._trypending

680

)

680

)

681

682

if self._docket is not None:

682

if self._docket is not None:

683

self._indexfile = self._docket.index_filepath()

683

self._indexfile = self._docket.index_filepath()

684

index_data = b''

684

index_data = b''

685

index_size = self._docket.index_end

685

index_size = self._docket.index_end

686

if index_size > 0:

686

if index_size > 0:

687

index_data = self._get_data(

687

index_data = self._get_data(

688

self._indexfile, mmapindexthreshold, size=index_size

688

self._indexfile, mmapindexthreshold, size=index_size

689

)

689

)

690

if len(index_data) < index_size:

690

if len(index_data) < index_size:

691

msg = _(b'too few index data for %s: got %d, expected %d')

691

msg = _(b'too few index data for %s: got %d, expected %d')

692

msg %= (self.display_id, len(index_data), index_size)

692

msg %= (self.display_id, len(index_data), index_size)

693

raise error.RevlogError(msg)

693

raise error.RevlogError(msg)

694

695

self._inline = False

695

self._inline = False

696

# generaldelta implied by version 2 revlogs.

696

# generaldelta implied by version 2 revlogs.

697

self._generaldelta = True

697

self._generaldelta = True

698

# the logic for persistent nodemap will be dealt with within the

698

# the logic for persistent nodemap will be dealt with within the

699

# main docket, so disable it for now.

699

# main docket, so disable it for now.

700

self._nodemap_file = None

700

self._nodemap_file = None

701

702

if self._docket is not None:

702

if self._docket is not None:

703

self._datafile = self._docket.data_filepath()

703

self._datafile = self._docket.data_filepath()

704

self._sidedatafile = self._docket.sidedata_filepath()

704

self._sidedatafile = self._docket.sidedata_filepath()

705

elif self.postfix is None:

705

elif self.postfix is None:

706

self._datafile = b'%s.d' % self.radix

706

self._datafile = b'%s.d' % self.radix

707

else:

707

else:

708

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

708

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

709

710

self.nodeconstants = sha1nodeconstants

710

self.nodeconstants = sha1nodeconstants

711

self.nullid = self.nodeconstants.nullid

711

self.nullid = self.nodeconstants.nullid

712

713

# sparse-revlog can't be on without general-delta (issue6056)

713

# sparse-revlog can't be on without general-delta (issue6056)

714

if not self._generaldelta:

714

if not self._generaldelta:

715

self._sparserevlog = False

715

self._sparserevlog = False

716

717

self._storedeltachains = True

717

self._storedeltachains = True

718

719

devel_nodemap = (

719

devel_nodemap = (

720

self._nodemap_file

720

self._nodemap_file

721

and force_nodemap

721

and force_nodemap

722

and parse_index_v1_nodemap is not None

722

and parse_index_v1_nodemap is not None

723

)

723

)

724

725

use_rust_index = False

725

use_rust_index = False

726

if rustrevlog is not None:

726

if rustrevlog is not None:

727

if self._nodemap_file is not None:

727

if self._nodemap_file is not None:

728

use_rust_index = True

728

use_rust_index = True

729

else:

729

else:

730

use_rust_index = self.opener.options.get(b'rust.index')

730

use_rust_index = self.opener.options.get(b'rust.index')

731

732

self._parse_index = parse_index_v1

732

self._parse_index = parse_index_v1

733

if self._format_version == REVLOGV0:

733

if self._format_version == REVLOGV0:

734

self._parse_index = revlogv0.parse_index_v0

734

self._parse_index = revlogv0.parse_index_v0

735

elif self._format_version == REVLOGV2:

735

elif self._format_version == REVLOGV2:

736

self._parse_index = parse_index_v2

736

self._parse_index = parse_index_v2

737

elif self._format_version == CHANGELOGV2:

737

elif self._format_version == CHANGELOGV2:

738

self._parse_index = parse_index_cl_v2

738

self._parse_index = parse_index_cl_v2

739

elif devel_nodemap:

739

elif devel_nodemap:

740

self._parse_index = parse_index_v1_nodemap

740

self._parse_index = parse_index_v1_nodemap

741

elif use_rust_index:

741

elif use_rust_index:

742

self._parse_index = parse_index_v1_mixed

742

self._parse_index = parse_index_v1_mixed

743

try:

743

try:

744

d = self._parse_index(index_data, self._inline)

744

d = self._parse_index(index_data, self._inline)

745

index, chunkcache = d

745

index, chunkcache = d

746

use_nodemap = (

746

use_nodemap = (

747

not self._inline

747

not self._inline

748

and self._nodemap_file is not None

748

and self._nodemap_file is not None

749

and hasattr(index, 'update_nodemap_data')

749

and hasattr(index, 'update_nodemap_data')

750

)

750

)

751

if use_nodemap:

751

if use_nodemap:

752

nodemap_data = nodemaputil.persisted_data(self)

752

nodemap_data = nodemaputil.persisted_data(self)

753

if nodemap_data is not None:

753

if nodemap_data is not None:

754

docket = nodemap_data[0]

754

docket = nodemap_data[0]

755

if (

755

if (

756

len(d[0]) > docket.tip_rev

756

len(d[0]) > docket.tip_rev

757

and d[0][docket.tip_rev][7] == docket.tip_node

757

and d[0][docket.tip_rev][7] == docket.tip_node

758

):

758

):

759

# no changelog tampering

759

# no changelog tampering

760

self._nodemap_docket = docket

760

self._nodemap_docket = docket

761

index.update_nodemap_data(*nodemap_data)

761

index.update_nodemap_data(*nodemap_data)

762

except (ValueError, IndexError):

762

except (ValueError, IndexError):

763

raise error.RevlogError(

763

raise error.RevlogError(

764

_(b"index %s is corrupted") % self.display_id

764

_(b"index %s is corrupted") % self.display_id

765

)

765

)

766

self.index = index

766

self.index = index

767

self._segmentfile = randomaccessfile.randomaccessfile(

767

self._segmentfile = randomaccessfile.randomaccessfile(

768

self.opener,

768

self.opener,

769

(self._indexfile if self._inline else self._datafile),

769

(self._indexfile if self._inline else self._datafile),

770

self._chunkcachesize,

770

self._chunkcachesize,

771

chunkcache,

771

chunkcache,

772

)

772

)

773

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

773

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

774

self.opener,

774

self.opener,

775

self._sidedatafile,

775

self._sidedatafile,

776

self._chunkcachesize,

776

self._chunkcachesize,

777

)

777

)

778

# revnum -> (chain-length, sum-delta-length)

778

# revnum -> (chain-length, sum-delta-length)

779

self._chaininfocache = util.lrucachedict(500)

779

self._chaininfocache = util.lrucachedict(500)

780

# revlog header -> revlog compressor

780

# revlog header -> revlog compressor

781

self._decompressors = {}

781

self._decompressors = {}

782

783

def get_revlog(self):

783

def get_revlog(self):

784

"""simple function to mirror API of other not-really-revlog API"""

784

"""simple function to mirror API of other not-really-revlog API"""

785

return self

785

return self

786

787

@util.propertycache

787

@util.propertycache

788

def revlog_kind(self):

788

def revlog_kind(self):

789

return self.target[0]

789

return self.target[0]

790

791

@util.propertycache

791

@util.propertycache

792

def display_id(self):

792

def display_id(self):

793

"""The public facing "ID" of the revlog that we use in message"""

793

"""The public facing "ID" of the revlog that we use in message"""

794

if self.revlog_kind == KIND_FILELOG:

794

if self.revlog_kind == KIND_FILELOG:

795

# Reference the file without the "data/" prefix, so it is familiar

795

# Reference the file without the "data/" prefix, so it is familiar

796

# to the user.

796

# to the user.

797

return self.target[1]

797

return self.target[1]

798

else:

798

else:

799

return self.radix

799

return self.radix

800

801

def _get_decompressor(self, t):

801

def _get_decompressor(self, t):

802

try:

802

try:

803

compressor = self._decompressors[t]

803

compressor = self._decompressors[t]

804

except KeyError:

804

except KeyError:

805

try:

805

try:

806

engine = util.compengines.forrevlogheader(t)

806

engine = util.compengines.forrevlogheader(t)

807

compressor = engine.revlogcompressor(self._compengineopts)

807

compressor = engine.revlogcompressor(self._compengineopts)

808

self._decompressors[t] = compressor

808

self._decompressors[t] = compressor

809

except KeyError:

809

except KeyError:

810

raise error.RevlogError(

810

raise error.RevlogError(

811

_(b'unknown compression type %s') % binascii.hexlify(t)

811

_(b'unknown compression type %s') % binascii.hexlify(t)

812

)

812

)

813

return compressor

813

return compressor

814

815

@util.propertycache

815

@util.propertycache

816

def _compressor(self):

816

def _compressor(self):

817

engine = util.compengines[self._compengine]

817

engine = util.compengines[self._compengine]

818

return engine.revlogcompressor(self._compengineopts)

818

return engine.revlogcompressor(self._compengineopts)

819

820

@util.propertycache

820

@util.propertycache

821

def _decompressor(self):

821

def _decompressor(self):

822

"""the default decompressor"""

822

"""the default decompressor"""

823

if self._docket is None:

823

if self._docket is None:

824

return None

824

return None

825

t = self._docket.default_compression_header

825

t = self._docket.default_compression_header

826

c = self._get_decompressor(t)

826

c = self._get_decompressor(t)

827

return c.decompress

827

return c.decompress

828

829

def _indexfp(self):

829

def _indexfp(self):

830

"""file object for the revlog's index file"""

830

"""file object for the revlog's index file"""

831

return self.opener(self._indexfile, mode=b"r")

831

return self.opener(self._indexfile, mode=b"r")

832

833

def __index_write_fp(self):

833

def __index_write_fp(self):

834

# You should not use this directly and use `_writing` instead

834

# You should not use this directly and use `_writing` instead

835

try:

835

try:

836

f = self.opener(

836

f = self.opener(

837

self._indexfile, mode=b"r+", checkambig=self._checkambig

837

self._indexfile, mode=b"r+", checkambig=self._checkambig

838

)

838

)

839

if self._docket is None:

839

if self._docket is None:

840

f.seek(0, os.SEEK_END)

840

f.seek(0, os.SEEK_END)

841

else:

841

else:

842

f.seek(self._docket.index_end, os.SEEK_SET)

842

f.seek(self._docket.index_end, os.SEEK_SET)

843

return f

843

return f

844

except FileNotFoundError:

844

except FileNotFoundError:

845

return self.opener(

845

return self.opener(

846

self._indexfile, mode=b"w+", checkambig=self._checkambig

846

self._indexfile, mode=b"w+", checkambig=self._checkambig

847

)

847

)

848

849

def __index_new_fp(self):

849

def __index_new_fp(self):

850

# You should not use this unless you are upgrading from inline revlog

850

# You should not use this unless you are upgrading from inline revlog

851

return self.opener(

851

return self.opener(

852

self._indexfile,

852

self._indexfile,

853

mode=b"w",

853

mode=b"w",

854

checkambig=self._checkambig,

854

checkambig=self._checkambig,

855

atomictemp=True,

855

atomictemp=True,

856

)

856

)

857

858

def _datafp(self, mode=b'r'):

858

def _datafp(self, mode=b'r'):

859

"""file object for the revlog's data file"""

859

"""file object for the revlog's data file"""

860

return self.opener(self._datafile, mode=mode)

860

return self.opener(self._datafile, mode=mode)

861

862

@contextlib.contextmanager

862

@contextlib.contextmanager

863

def _sidedatareadfp(self):

863

def _sidedatareadfp(self):

864

"""file object suitable to read sidedata"""

864

"""file object suitable to read sidedata"""

865

if self._writinghandles:

865

if self._writinghandles:

866

yield self._writinghandles[2]

866

yield self._writinghandles[2]

867

else:

867

else:

868

with self.opener(self._sidedatafile) as fp:

868

with self.opener(self._sidedatafile) as fp:

869

yield fp

869

yield fp

870

871

def tiprev(self):

871

def tiprev(self):

872

return len(self.index) - 1

872

return len(self.index) - 1

873

874

def tip(self):

874

def tip(self):

875

return self.node(self.tiprev())

875

return self.node(self.tiprev())

876

877

def __contains__(self, rev):

877

def __contains__(self, rev):

878

return 0 <= rev < len(self)

878

return 0 <= rev < len(self)

879

880

def __len__(self):

880

def __len__(self):

881

return len(self.index)

881

return len(self.index)

882

883

def __iter__(self):

883

def __iter__(self):

884

return iter(range(len(self)))

884

return iter(range(len(self)))

885

886

def revs(self, start=0, stop=None):

886

def revs(self, start=0, stop=None):

887

"""iterate over all rev in this revlog (from start to stop)"""

887

"""iterate over all rev in this revlog (from start to stop)"""

888

return storageutil.iterrevs(len(self), start=start, stop=stop)

888

return storageutil.iterrevs(len(self), start=start, stop=stop)

889

890

def hasnode(self, node):

890

def hasnode(self, node):

891

try:

891

try:

892

self.rev(node)

892

self.rev(node)

893

return True

893

return True

894

except KeyError:

894

except KeyError:

895

return False

895

return False

896

897

def _candelta(self, baserev, rev):

897

def _candelta(self, baserev, rev):

898

"""whether two revisions (baserev, rev) can be delta-ed or not"""

898

"""whether two revisions (baserev, rev) can be delta-ed or not"""

899

# Disable delta if either rev requires a content-changing flag

899

# Disable delta if either rev requires a content-changing flag

900

# processor (ex. LFS). This is because such flag processor can alter

900

# processor (ex. LFS). This is because such flag processor can alter

901

# the rawtext content that the delta will be based on, and two clients

901

# the rawtext content that the delta will be based on, and two clients

902

# could have a same revlog node with different flags (i.e. different

902

# could have a same revlog node with different flags (i.e. different

903

# rawtext contents) and the delta could be incompatible.

903

# rawtext contents) and the delta could be incompatible.

904

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

904

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

905

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

905

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

906

):

906

):

907

return False

907

return False

908

return True

908

return True

909

910

def update_caches(self, transaction):

910

def update_caches(self, transaction):

911

"""update on disk cache

911

"""update on disk cache

912

913

If a transaction is passed, the update may be delayed to transaction

913

If a transaction is passed, the update may be delayed to transaction

914

commit."""

914

commit."""

915

if self._nodemap_file is not None:

915

if self._nodemap_file is not None:

916

if transaction is None:

916

if transaction is None:

917

nodemaputil.update_persistent_nodemap(self)

917

nodemaputil.update_persistent_nodemap(self)

918

else:

918

else:

919

nodemaputil.setup_persistent_nodemap(transaction, self)

919

nodemaputil.setup_persistent_nodemap(transaction, self)

920

921

def clearcaches(self):

921

def clearcaches(self):

922

"""Clear in-memory caches"""

922

"""Clear in-memory caches"""

923

self._revisioncache = None

923

self._revisioncache = None

924

self._chainbasecache.clear()

924

self._chainbasecache.clear()

925

self._segmentfile.clear_cache()

925

self._segmentfile.clear_cache()

926

self._segmentfile_sidedata.clear_cache()

926

self._segmentfile_sidedata.clear_cache()

927

self._pcache = {}

927

self._pcache = {}

928

self._nodemap_docket = None

928

self._nodemap_docket = None

929

self.index.clearcaches()

929

self.index.clearcaches()

930

# The python code is the one responsible for validating the docket, we

930

# The python code is the one responsible for validating the docket, we

931

# end up having to refresh it here.

931

# end up having to refresh it here.

932

use_nodemap = (

932

use_nodemap = (

933

not self._inline

933

not self._inline

934

and self._nodemap_file is not None

934

and self._nodemap_file is not None

935

and hasattr(self.index, 'update_nodemap_data')

935

and hasattr(self.index, 'update_nodemap_data')

936

)

936

)

937

if use_nodemap:

937

if use_nodemap:

938

nodemap_data = nodemaputil.persisted_data(self)

938

nodemap_data = nodemaputil.persisted_data(self)

939

if nodemap_data is not None:

939

if nodemap_data is not None:

940

self._nodemap_docket = nodemap_data[0]

940

self._nodemap_docket = nodemap_data[0]

941

self.index.update_nodemap_data(*nodemap_data)

941

self.index.update_nodemap_data(*nodemap_data)

942

943

def rev(self, node):

943

def rev(self, node):

944

"""return the revision number associated with a <nodeid>"""

944

"""return the revision number associated with a <nodeid>"""

945

try:

945

try:

946

return self.index.rev(node)

946

return self.index.rev(node)

947

except TypeError:

947

except TypeError:

948

raise

948

raise

949

except error.RevlogError:

949

except error.RevlogError:

950

# parsers.c radix tree lookup failed

950

# parsers.c radix tree lookup failed

951

if (

951

if (

952

node == self.nodeconstants.wdirid

952

node == self.nodeconstants.wdirid

953

or node in self.nodeconstants.wdirfilenodeids

953

or node in self.nodeconstants.wdirfilenodeids

954

):

954

):

955

raise error.WdirUnsupported

955

raise error.WdirUnsupported

956

raise error.LookupError(node, self.display_id, _(b'no node'))

956

raise error.LookupError(node, self.display_id, _(b'no node'))

957

958

# Accessors for index entries.

958

# Accessors for index entries.

959

960

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

960

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

961

# are flags.

961

# are flags.

962

def start(self, rev):

962

def start(self, rev):

963

return int(self.index[rev][0] >> 16)

963

return int(self.index[rev][0] >> 16)

964

965

def sidedata_cut_off(self, rev):

965

def sidedata_cut_off(self, rev):

966

sd_cut_off = self.index[rev][8]

966

sd_cut_off = self.index[rev][8]

967

if sd_cut_off != 0:

967

if sd_cut_off != 0:

968

return sd_cut_off

968

return sd_cut_off

969

# This is some annoying dance, because entries without sidedata

969

# This is some annoying dance, because entries without sidedata

970

# currently use 0 as their ofsset. (instead of previous-offset +

970

# currently use 0 as their ofsset. (instead of previous-offset +

971

# previous-size)

971

# previous-size)

972

#

972

#

973

# We should reconsider this sidedata → 0 sidata_offset policy.

973

# We should reconsider this sidedata → 0 sidata_offset policy.

974

# In the meantime, we need this.

974

# In the meantime, we need this.

975

while 0 <= rev:

975

while 0 <= rev:

976

e = self.index[rev]

976

e = self.index[rev]

977

if e[9] != 0:

977

if e[9] != 0:

978

return e[8] + e[9]

978

return e[8] + e[9]

979

rev -= 1

979

rev -= 1

980

return 0

980

return 0

981

982

def flags(self, rev):

982

def flags(self, rev):

983

return self.index[rev][0] & 0xFFFF

983

return self.index[rev][0] & 0xFFFF

984

985

def length(self, rev):

985

def length(self, rev):

986

return self.index[rev][1]

986

return self.index[rev][1]

987

988

def sidedata_length(self, rev):

988

def sidedata_length(self, rev):

989

if not self.hassidedata:

989

if not self.hassidedata:

990

return 0

990

return 0

991

return self.index[rev][9]

991

return self.index[rev][9]

992

993

def rawsize(self, rev):

993

def rawsize(self, rev):

994

"""return the length of the uncompressed text for a given revision"""

994

"""return the length of the uncompressed text for a given revision"""

995

l = self.index[rev][2]

995

l = self.index[rev][2]

996

if l >= 0:

996

if l >= 0:

997

return l

997

return l

998

999

t = self.rawdata(rev)

999

t = self.rawdata(rev)

1000

return len(t)

1000

return len(t)

1001

1002

def size(self, rev):

1002

def size(self, rev):

1003

"""length of non-raw text (processed by a "read" flag processor)"""

1003

"""length of non-raw text (processed by a "read" flag processor)"""

1004

# fast path: if no "read" flag processor could change the content,

1004

# fast path: if no "read" flag processor could change the content,

1005

# size is rawsize. note: ELLIPSIS is known to not change the content.

1005

# size is rawsize. note: ELLIPSIS is known to not change the content.

1006

flags = self.flags(rev)

1006

flags = self.flags(rev)

1007

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1007

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1008

return self.rawsize(rev)

1008

return self.rawsize(rev)

1009

1010

return len(self.revision(rev))

1010

return len(self.revision(rev))

1011

1012

def fast_rank(self, rev):

1012

def fast_rank(self, rev):

1013

"""Return the rank of a revision if already known, or None otherwise.

1013

"""Return the rank of a revision if already known, or None otherwise.

1014

1015

The rank of a revision is the size of the sub-graph it defines as a

1015

The rank of a revision is the size of the sub-graph it defines as a

1016

head. Equivalently, the rank of a revision `r` is the size of the set

1016

head. Equivalently, the rank of a revision `r` is the size of the set

1017

`ancestors(r)`, `r` included.

1017

`ancestors(r)`, `r` included.

1018

1019

This method returns the rank retrieved from the revlog in constant

1019

This method returns the rank retrieved from the revlog in constant

1020

time. It makes no attempt at computing unknown values for versions of

1020

time. It makes no attempt at computing unknown values for versions of

1021

the revlog which do not persist the rank.

1021

the revlog which do not persist the rank.

1022

"""

1022

"""

1023

rank = self.index[rev][ENTRY_RANK]

1023

rank = self.index[rev][ENTRY_RANK]

1024

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

1024

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

1025

return None

1025

return None

1026

if rev == nullrev:

1026

if rev == nullrev:

1027

return 0 # convention

1027

return 0 # convention

1028

return rank

1028

return rank

1029

1030

def chainbase(self, rev):

1030

def chainbase(self, rev):

1031

base = self._chainbasecache.get(rev)

1031

base = self._chainbasecache.get(rev)

1032

if base is not None:

1032

if base is not None:

1033

return base

1033

return base

1034

1035

index = self.index

1035

index = self.index

1036

iterrev = rev

1036

iterrev = rev

1037

base = index[iterrev][3]

1037

base = index[iterrev][3]

1038

while base != iterrev:

1038

while base != iterrev:

1039

iterrev = base

1039

iterrev = base

1040

base = index[iterrev][3]

1040

base = index[iterrev][3]

1041

1042

self._chainbasecache[rev] = base

1042

self._chainbasecache[rev] = base

1043

return base

1043

return base

1044

1045

def linkrev(self, rev):

1045

def linkrev(self, rev):

1046

return self.index[rev][4]

1046

return self.index[rev][4]

1047

1048

def parentrevs(self, rev):

1048

def parentrevs(self, rev):

1049

try:

1049

try:

1050

entry = self.index[rev]

1050

entry = self.index[rev]

1051

except IndexError:

1051

except IndexError:

1052

if rev == wdirrev:

1052

if rev == wdirrev:

1053

raise error.WdirUnsupported

1053

raise error.WdirUnsupported

1054

raise

1054

raise

1055

1056

if self.canonical_parent_order and entry[5] == nullrev:

1056

if self.canonical_parent_order and entry[5] == nullrev:

1057

return entry[6], entry[5]

1057

return entry[6], entry[5]

1058

else:

1058

else:

1059

return entry[5], entry[6]

1059

return entry[5], entry[6]

1060

1061

# fast parentrevs(rev) where rev isn't filtered

1061

# fast parentrevs(rev) where rev isn't filtered

1062

_uncheckedparentrevs = parentrevs

1062

_uncheckedparentrevs = parentrevs

1063

1064

def node(self, rev):

1064

def node(self, rev):

1065

try:

1065

try:

1066

return self.index[rev][7]

1066

return self.index[rev][7]

1067

except IndexError:

1067

except IndexError:

1068

if rev == wdirrev:

1068

if rev == wdirrev:

1069

raise error.WdirUnsupported

1069

raise error.WdirUnsupported

1070

raise

1070

raise

1071

1072

# Derived from index values.

1072

# Derived from index values.

1073

1074

def end(self, rev):

1074

def end(self, rev):

1075

return self.start(rev) + self.length(rev)

1075

return self.start(rev) + self.length(rev)

1076

1077

def parents(self, node):

1077

def parents(self, node):

1078

i = self.index

1078

i = self.index

1079

d = i[self.rev(node)]

1079

d = i[self.rev(node)]

1080

# inline node() to avoid function call overhead

1080

# inline node() to avoid function call overhead

1081

if self.canonical_parent_order and d[5] == self.nullid:

1081

if self.canonical_parent_order and d[5] == self.nullid:

1082

return i[d[6]][7], i[d[5]][7]

1082

return i[d[6]][7], i[d[5]][7]

1083

else:

1083

else:

1084

return i[d[5]][7], i[d[6]][7]

1084

return i[d[5]][7], i[d[6]][7]

1085

1086

def chainlen(self, rev):

1086

def chainlen(self, rev):

1087

return self._chaininfo(rev)[0]

1087

return self._chaininfo(rev)[0]

1088

1089

def _chaininfo(self, rev):

1089

def _chaininfo(self, rev):

1090

chaininfocache = self._chaininfocache

1090

chaininfocache = self._chaininfocache

1091

if rev in chaininfocache:

1091

if rev in chaininfocache:

1092

return chaininfocache[rev]

1092

return chaininfocache[rev]

1093

index = self.index

1093

index = self.index

1094

generaldelta = self._generaldelta

1094

generaldelta = self._generaldelta

1095

iterrev = rev

1095

iterrev = rev

1096

e = index[iterrev]

1096

e = index[iterrev]

1097

clen = 0

1097

clen = 0

1098

compresseddeltalen = 0

1098

compresseddeltalen = 0

1099

while iterrev != e[3]:

1099

while iterrev != e[3]:

1100

clen += 1

1100

clen += 1

1101

compresseddeltalen += e[1]

1101

compresseddeltalen += e[1]

1102

if generaldelta:

1102

if generaldelta:

1103

iterrev = e[3]

1103

iterrev = e[3]

1104

else:

1104

else:

1105

iterrev -= 1

1105

iterrev -= 1

1106

if iterrev in chaininfocache:

1106

if iterrev in chaininfocache:

1107

t = chaininfocache[iterrev]

1107

t = chaininfocache[iterrev]

1108

clen += t[0]

1108

clen += t[0]

1109

compresseddeltalen += t[1]

1109

compresseddeltalen += t[1]

1110

break

1110

break

1111

e = index[iterrev]

1111

e = index[iterrev]

1112

else:

1112

else:

1113

# Add text length of base since decompressing that also takes

1113

# Add text length of base since decompressing that also takes

1114

# work. For cache hits the length is already included.

1114

# work. For cache hits the length is already included.

1115

compresseddeltalen += e[1]

1115

compresseddeltalen += e[1]

1116

r = (clen, compresseddeltalen)

1116

r = (clen, compresseddeltalen)

1117

chaininfocache[rev] = r

1117

chaininfocache[rev] = r

1118

return r

1118

return r

1119

1120

def _deltachain(self, rev, stoprev=None):

1120

def _deltachain(self, rev, stoprev=None):

1121

"""Obtain the delta chain for a revision.

1121

"""Obtain the delta chain for a revision.

1122

1123

``stoprev`` specifies a revision to stop at. If not specified, we

1123

``stoprev`` specifies a revision to stop at. If not specified, we

1124

stop at the base of the chain.

1124

stop at the base of the chain.

1125

1126

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1126

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1127

revs in ascending order and ``stopped`` is a bool indicating whether

1127

revs in ascending order and ``stopped`` is a bool indicating whether

1128

``stoprev`` was hit.

1128

``stoprev`` was hit.

1129

"""

1129

"""

1130

# Try C implementation.

1130

# Try C implementation.

1131

try:

1131

try:

1132

return self.index.deltachain(rev, stoprev, self._generaldelta)

1132

return self.index.deltachain(rev, stoprev, self._generaldelta)

1133

except AttributeError:

1133

except AttributeError:

1134

pass

1134

pass

1135

1136

chain = []

1136

chain = []

1137

1138

# Alias to prevent attribute lookup in tight loop.

1138

# Alias to prevent attribute lookup in tight loop.

1139

index = self.index

1139

index = self.index

1140

generaldelta = self._generaldelta

1140

generaldelta = self._generaldelta

1141

1142

iterrev = rev

1142

iterrev = rev

1143

e = index[iterrev]

1143

e = index[iterrev]

1144

while iterrev != e[3] and iterrev != stoprev:

1144

while iterrev != e[3] and iterrev != stoprev:

1145

chain.append(iterrev)

1145

chain.append(iterrev)

1146

if generaldelta:

1146

if generaldelta:

1147

iterrev = e[3]

1147

iterrev = e[3]

1148

else:

1148

else:

1149

iterrev -= 1

1149

iterrev -= 1

1150

e = index[iterrev]

1150

e = index[iterrev]

1151

1152

if iterrev == stoprev:

1152

if iterrev == stoprev:

1153

stopped = True

1153

stopped = True

1154

else:

1154

else:

1155

chain.append(iterrev)

1155

chain.append(iterrev)

1156

stopped = False

1156

stopped = False

1157

1158

chain.reverse()

1158

chain.reverse()

1159

return chain, stopped

1159

return chain, stopped

1160

1161

def ancestors(self, revs, stoprev=0, inclusive=False):

1161

def ancestors(self, revs, stoprev=0, inclusive=False):

1162

"""Generate the ancestors of 'revs' in reverse revision order.

1162

"""Generate the ancestors of 'revs' in reverse revision order.

1163

Does not generate revs lower than stoprev.

1163

Does not generate revs lower than stoprev.

1164

1165

See the documentation for ancestor.lazyancestors for more details."""

1165

See the documentation for ancestor.lazyancestors for more details."""

1166

1167

# first, make sure start revisions aren't filtered

1167

# first, make sure start revisions aren't filtered

1168

revs = list(revs)

1168

revs = list(revs)

1169

checkrev = self.node

1169

checkrev = self.node

1170

for r in revs:

1170

for r in revs:

1171

checkrev(r)

1171

checkrev(r)

1172

# and we're sure ancestors aren't filtered as well

1172

# and we're sure ancestors aren't filtered as well

1173

1174

if rustancestor is not None and self.index.rust_ext_compat:

1174

if rustancestor is not None and self.index.rust_ext_compat:

1175

lazyancestors = rustancestor.LazyAncestors

1175

lazyancestors = rustancestor.LazyAncestors

1176

arg = self.index

1176

arg = self.index

1177

else:

1177

else:

1178

lazyancestors = ancestor.lazyancestors

1178

lazyancestors = ancestor.lazyancestors

1179

arg = self._uncheckedparentrevs

1179

arg = self._uncheckedparentrevs

1180

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1180

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1181

1182

def descendants(self, revs):

1182

def descendants(self, revs):

1183

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1183

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1184

1185

def findcommonmissing(self, common=None, heads=None):

1185

def findcommonmissing(self, common=None, heads=None):

1186

"""Return a tuple of the ancestors of common and the ancestors of heads

1186

"""Return a tuple of the ancestors of common and the ancestors of heads

1187

that are not ancestors of common. In revset terminology, we return the

1187

that are not ancestors of common. In revset terminology, we return the

1188

tuple:

1188

tuple:

1189

1190

::common, (::heads) - (::common)

1190

::common, (::heads) - (::common)

1191

1192

The list is sorted by revision number, meaning it is

1192

The list is sorted by revision number, meaning it is

1193

topologically sorted.

1193

topologically sorted.

1194

1195

'heads' and 'common' are both lists of node IDs. If heads is

1195

'heads' and 'common' are both lists of node IDs. If heads is

1196

not supplied, uses all of the revlog's heads. If common is not

1196

not supplied, uses all of the revlog's heads. If common is not

1197

supplied, uses nullid."""

1197

supplied, uses nullid."""

1198

if common is None:

1198

if common is None:

1199

common = [self.nullid]

1199

common = [self.nullid]

1200

if heads is None:

1200

if heads is None:

1201

heads = self.heads()

1201

heads = self.heads()

1202

1203

common = [self.rev(n) for n in common]

1203

common = [self.rev(n) for n in common]

1204

heads = [self.rev(n) for n in heads]

1204

heads = [self.rev(n) for n in heads]

1205

1206

# we want the ancestors, but inclusive

1206

# we want the ancestors, but inclusive

1207

class lazyset:

1207

class lazyset:

1208

def __init__(self, lazyvalues):

1208

def __init__(self, lazyvalues):

1209

self.addedvalues = set()

1209

self.addedvalues = set()

1210

self.lazyvalues = lazyvalues

1210

self.lazyvalues = lazyvalues

1211

1212

def __contains__(self, value):

1212

def __contains__(self, value):

1213

return value in self.addedvalues or value in self.lazyvalues

1213

return value in self.addedvalues or value in self.lazyvalues

1214

1215

def __iter__(self):

1215

def __iter__(self):

1216

added = self.addedvalues

1216

added = self.addedvalues

1217

for r in added:

1217

for r in added:

1218

yield r

1218

yield r

1219

for r in self.lazyvalues:

1219

for r in self.lazyvalues:

1220

if not r in added:

1220

if not r in added:

1221

yield r

1221

yield r

1222

1223

def add(self, value):

1223

def add(self, value):

1224

self.addedvalues.add(value)

1224

self.addedvalues.add(value)

1225

1226

def update(self, values):

1226

def update(self, values):

1227

self.addedvalues.update(values)

1227

self.addedvalues.update(values)

1228

1229

has = lazyset(self.ancestors(common))

1229

has = lazyset(self.ancestors(common))

1230

has.add(nullrev)

1230

has.add(nullrev)

1231

has.update(common)

1231

has.update(common)

1232

1233

# take all ancestors from heads that aren't in has

1233

# take all ancestors from heads that aren't in has

1234

missing = set()

1234

missing = set()

1235

visit = collections.deque(r for r in heads if r not in has)

1235

visit = collections.deque(r for r in heads if r not in has)

1236

while visit:

1236

while visit:

1237

r = visit.popleft()

1237

r = visit.popleft()

1238

if r in missing:

1238

if r in missing:

1239

continue

1239

continue

1240

else:

1240

else:

1241

missing.add(r)

1241

missing.add(r)

1242

for p in self.parentrevs(r):

1242

for p in self.parentrevs(r):

1243

if p not in has:

1243

if p not in has:

1244

visit.append(p)

1244

visit.append(p)

1245

missing = list(missing)

1245

missing = list(missing)

1246

missing.sort()

1246

missing.sort()

1247

return has, [self.node(miss) for miss in missing]

1247

return has, [self.node(miss) for miss in missing]

1248

1249

def incrementalmissingrevs(self, common=None):

1249

def incrementalmissingrevs(self, common=None):

1250

"""Return an object that can be used to incrementally compute the

1250

"""Return an object that can be used to incrementally compute the

1251

revision numbers of the ancestors of arbitrary sets that are not

1251

revision numbers of the ancestors of arbitrary sets that are not

1252

ancestors of common. This is an ancestor.incrementalmissingancestors

1252

ancestors of common. This is an ancestor.incrementalmissingancestors

1253

object.

1253

object.

1254

1255

'common' is a list of revision numbers. If common is not supplied, uses

1255

'common' is a list of revision numbers. If common is not supplied, uses

1256

nullrev.

1256

nullrev.

1257

"""

1257

"""

1258

if common is None:

1258

if common is None:

1259

common = [nullrev]

1259

common = [nullrev]

1260

1261

if rustancestor is not None and self.index.rust_ext_compat:

1261

if rustancestor is not None and self.index.rust_ext_compat:

1262

return rustancestor.MissingAncestors(self.index, common)

1262

return rustancestor.MissingAncestors(self.index, common)

1263

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1263

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1264

1265

def findmissingrevs(self, common=None, heads=None):

1265

def findmissingrevs(self, common=None, heads=None):

1266

"""Return the revision numbers of the ancestors of heads that

1266

"""Return the revision numbers of the ancestors of heads that

1267

are not ancestors of common.

1267

are not ancestors of common.

1268

1269

More specifically, return a list of revision numbers corresponding to

1269

More specifically, return a list of revision numbers corresponding to

1270

nodes N such that every N satisfies the following constraints:

1270

nodes N such that every N satisfies the following constraints:

1271

1272

1. N is an ancestor of some node in 'heads'

1272

1. N is an ancestor of some node in 'heads'

1273

2. N is not an ancestor of any node in 'common'

1273

2. N is not an ancestor of any node in 'common'

1274

1275

The list is sorted by revision number, meaning it is

1275

The list is sorted by revision number, meaning it is

1276

topologically sorted.

1276

topologically sorted.

1277

1278

'heads' and 'common' are both lists of revision numbers. If heads is

1278

'heads' and 'common' are both lists of revision numbers. If heads is

1279

not supplied, uses all of the revlog's heads. If common is not

1279

not supplied, uses all of the revlog's heads. If common is not

1280

supplied, uses nullid."""

1280

supplied, uses nullid."""

1281

if common is None:

1281

if common is None:

1282

common = [nullrev]

1282

common = [nullrev]

1283

if heads is None:

1283

if heads is None:

1284

heads = self.headrevs()

1284

heads = self.headrevs()

1285

1286

inc = self.incrementalmissingrevs(common=common)

1286

inc = self.incrementalmissingrevs(common=common)

1287

return inc.missingancestors(heads)

1287

return inc.missingancestors(heads)

1288

1289

def findmissing(self, common=None, heads=None):

1289

def findmissing(self, common=None, heads=None):

1290

"""Return the ancestors of heads that are not ancestors of common.

1290

"""Return the ancestors of heads that are not ancestors of common.

1291

1292

More specifically, return a list of nodes N such that every N

1292

More specifically, return a list of nodes N such that every N

1293

satisfies the following constraints:

1293

satisfies the following constraints:

1294

1295

1. N is an ancestor of some node in 'heads'

1295

1. N is an ancestor of some node in 'heads'

1296

2. N is not an ancestor of any node in 'common'

1296

2. N is not an ancestor of any node in 'common'

1297

1298

The list is sorted by revision number, meaning it is

1298

The list is sorted by revision number, meaning it is

1299

topologically sorted.

1299

topologically sorted.

1300

1301

'heads' and 'common' are both lists of node IDs. If heads is

1301

'heads' and 'common' are both lists of node IDs. If heads is

1302

not supplied, uses all of the revlog's heads. If common is not

1302

not supplied, uses all of the revlog's heads. If common is not

1303

supplied, uses nullid."""

1303

supplied, uses nullid."""

1304

if common is None:

1304

if common is None:

1305

common = [self.nullid]

1305

common = [self.nullid]

1306

if heads is None:

1306

if heads is None:

1307

heads = self.heads()

1307

heads = self.heads()

1308

1309

common = [self.rev(n) for n in common]

1309

common = [self.rev(n) for n in common]

1310

heads = [self.rev(n) for n in heads]

1310

heads = [self.rev(n) for n in heads]

1311

1312

inc = self.incrementalmissingrevs(common=common)

1312

inc = self.incrementalmissingrevs(common=common)

1313

return [self.node(r) for r in inc.missingancestors(heads)]

1313

return [self.node(r) for r in inc.missingancestors(heads)]

1314

1315

def nodesbetween(self, roots=None, heads=None):

1315

def nodesbetween(self, roots=None, heads=None):

1316

"""Return a topological path from 'roots' to 'heads'.

1316

"""Return a topological path from 'roots' to 'heads'.

1317

1318

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1318

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1319

topologically sorted list of all nodes N that satisfy both of

1319

topologically sorted list of all nodes N that satisfy both of

1320

these constraints:

1320

these constraints:

1321

1322

1. N is a descendant of some node in 'roots'

1322

1. N is a descendant of some node in 'roots'

1323

2. N is an ancestor of some node in 'heads'

1323

2. N is an ancestor of some node in 'heads'

1324

1325

Every node is considered to be both a descendant and an ancestor

1325

Every node is considered to be both a descendant and an ancestor

1326

of itself, so every reachable node in 'roots' and 'heads' will be

1326

of itself, so every reachable node in 'roots' and 'heads' will be

1327

included in 'nodes'.

1327

included in 'nodes'.

1328

1329

'outroots' is the list of reachable nodes in 'roots', i.e., the

1329

'outroots' is the list of reachable nodes in 'roots', i.e., the

1330

subset of 'roots' that is returned in 'nodes'. Likewise,

1330

subset of 'roots' that is returned in 'nodes'. Likewise,

1331

'outheads' is the subset of 'heads' that is also in 'nodes'.

1331

'outheads' is the subset of 'heads' that is also in 'nodes'.

1332

1333

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1333

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1334

unspecified, uses nullid as the only root. If 'heads' is

1334

unspecified, uses nullid as the only root. If 'heads' is

1335

unspecified, uses list of all of the revlog's heads."""

1335

unspecified, uses list of all of the revlog's heads."""

1336

nonodes = ([], [], [])

1336

nonodes = ([], [], [])

1337

if roots is not None:

1337

if roots is not None:

1338

roots = list(roots)

1338

roots = list(roots)

1339

if not roots:

1339

if not roots:

1340

return nonodes

1340

return nonodes

1341

lowestrev = min([self.rev(n) for n in roots])

1341

lowestrev = min([self.rev(n) for n in roots])

1342

else:

1342

else:

1343

roots = [self.nullid] # Everybody's a descendant of nullid

1343

roots = [self.nullid] # Everybody's a descendant of nullid

1344

lowestrev = nullrev

1344

lowestrev = nullrev

1345

if (lowestrev == nullrev) and (heads is None):

1345

if (lowestrev == nullrev) and (heads is None):

1346

# We want _all_ the nodes!

1346

# We want _all_ the nodes!

1347

return (

1347

return (

1348

[self.node(r) for r in self],

1348

[self.node(r) for r in self],

1349

[self.nullid],

1349

[self.nullid],

1350

list(self.heads()),

1350

list(self.heads()),

1351

)

1351

)

1352

if heads is None:

1352

if heads is None:

1353

# All nodes are ancestors, so the latest ancestor is the last

1353

# All nodes are ancestors, so the latest ancestor is the last

1354

# node.

1354

# node.

1355

highestrev = len(self) - 1

1355

highestrev = len(self) - 1

1356

# Set ancestors to None to signal that every node is an ancestor.

1356

# Set ancestors to None to signal that every node is an ancestor.

1357

ancestors = None

1357

ancestors = None

1358

# Set heads to an empty dictionary for later discovery of heads

1358

# Set heads to an empty dictionary for later discovery of heads

1359

heads = {}

1359

heads = {}

1360

else:

1360

else:

1361

heads = list(heads)

1361

heads = list(heads)

1362

if not heads:

1362

if not heads:

1363

return nonodes

1363

return nonodes

1364

ancestors = set()

1364

ancestors = set()

1365

# Turn heads into a dictionary so we can remove 'fake' heads.

1365

# Turn heads into a dictionary so we can remove 'fake' heads.

1366

# Also, later we will be using it to filter out the heads we can't

1366

# Also, later we will be using it to filter out the heads we can't

1367

# find from roots.

1367

# find from roots.

1368

heads = dict.fromkeys(heads, False)

1368

heads = dict.fromkeys(heads, False)

1369

# Start at the top and keep marking parents until we're done.

1369

# Start at the top and keep marking parents until we're done.

1370

nodestotag = set(heads)

1370

nodestotag = set(heads)

1371

# Remember where the top was so we can use it as a limit later.

1371

# Remember where the top was so we can use it as a limit later.

1372

highestrev = max([self.rev(n) for n in nodestotag])

1372

highestrev = max([self.rev(n) for n in nodestotag])

1373

while nodestotag:

1373

while nodestotag:

1374

# grab a node to tag

1374

# grab a node to tag

1375

n = nodestotag.pop()

1375

n = nodestotag.pop()

1376

# Never tag nullid

1376

# Never tag nullid

1377

if n == self.nullid:

1377

if n == self.nullid:

1378

continue

1378

continue

1379

# A node's revision number represents its place in a

1379

# A node's revision number represents its place in a

1380

# topologically sorted list of nodes.

1380

# topologically sorted list of nodes.

1381

r = self.rev(n)

1381

r = self.rev(n)

1382

if r >= lowestrev:

1382

if r >= lowestrev:

1383

if n not in ancestors:

1383

if n not in ancestors:

1384

# If we are possibly a descendant of one of the roots

1384

# If we are possibly a descendant of one of the roots

1385

# and we haven't already been marked as an ancestor

1385

# and we haven't already been marked as an ancestor

1386

ancestors.add(n) # Mark as ancestor

1386

ancestors.add(n) # Mark as ancestor

1387

# Add non-nullid parents to list of nodes to tag.

1387

# Add non-nullid parents to list of nodes to tag.

1388

nodestotag.update(

1388

nodestotag.update(

1389

[p for p in self.parents(n) if p != self.nullid]

1389

[p for p in self.parents(n) if p != self.nullid]

1390

)

1390

)

1391

elif n in heads: # We've seen it before, is it a fake head?

1391

elif n in heads: # We've seen it before, is it a fake head?

1392

# So it is, real heads should not be the ancestors of

1392

# So it is, real heads should not be the ancestors of

1393

# any other heads.

1393

# any other heads.

1394

heads.pop(n)

1394

heads.pop(n)

1395

if not ancestors:

1395

if not ancestors:

1396

return nonodes

1396

return nonodes

1397

# Now that we have our set of ancestors, we want to remove any

1397

# Now that we have our set of ancestors, we want to remove any

1398

# roots that are not ancestors.

1398

# roots that are not ancestors.

1399

1400

# If one of the roots was nullid, everything is included anyway.

1400

# If one of the roots was nullid, everything is included anyway.

1401

if lowestrev > nullrev:

1401

if lowestrev > nullrev:

1402

# But, since we weren't, let's recompute the lowest rev to not

1402

# But, since we weren't, let's recompute the lowest rev to not

1403

# include roots that aren't ancestors.

1403

# include roots that aren't ancestors.

1404

1405

# Filter out roots that aren't ancestors of heads

1405

# Filter out roots that aren't ancestors of heads

1406

roots = [root for root in roots if root in ancestors]

1406

roots = [root for root in roots if root in ancestors]

1407

# Recompute the lowest revision

1407

# Recompute the lowest revision

1408

if roots:

1408

if roots:

1409

lowestrev = min([self.rev(root) for root in roots])

1409

lowestrev = min([self.rev(root) for root in roots])

1410

else:

1410

else:

1411

# No more roots? Return empty list

1411

# No more roots? Return empty list

1412

return nonodes

1412

return nonodes

1413

else:

1413

else:

1414

# We are descending from nullid, and don't need to care about

1414

# We are descending from nullid, and don't need to care about

1415

# any other roots.

1415

# any other roots.

1416

lowestrev = nullrev

1416

lowestrev = nullrev

1417

roots = [self.nullid]

1417

roots = [self.nullid]

1418

# Transform our roots list into a set.

1418

# Transform our roots list into a set.

1419

descendants = set(roots)

1419

descendants = set(roots)

1420

# Also, keep the original roots so we can filter out roots that aren't

1420

# Also, keep the original roots so we can filter out roots that aren't

1421

# 'real' roots (i.e. are descended from other roots).

1421

# 'real' roots (i.e. are descended from other roots).

1422

roots = descendants.copy()

1422

roots = descendants.copy()

1423

# Our topologically sorted list of output nodes.

1423

# Our topologically sorted list of output nodes.

1424

orderedout = []

1424

orderedout = []

1425

# Don't start at nullid since we don't want nullid in our output list,

1425

# Don't start at nullid since we don't want nullid in our output list,

1426

# and if nullid shows up in descendants, empty parents will look like

1426

# and if nullid shows up in descendants, empty parents will look like

1427

# they're descendants.

1427

# they're descendants.

1428

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1428

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1429

n = self.node(r)

1429

n = self.node(r)

1430

isdescendant = False

1430

isdescendant = False

1431

if lowestrev == nullrev: # Everybody is a descendant of nullid

1431

if lowestrev == nullrev: # Everybody is a descendant of nullid

1432

isdescendant = True

1432

isdescendant = True

1433

elif n in descendants:

1433

elif n in descendants:

1434

# n is already a descendant

1434

# n is already a descendant

1435

isdescendant = True

1435

isdescendant = True

1436

# This check only needs to be done here because all the roots

1436

# This check only needs to be done here because all the roots

1437

# will start being marked is descendants before the loop.

1437

# will start being marked is descendants before the loop.

1438

if n in roots:

1438

if n in roots:

1439

# If n was a root, check if it's a 'real' root.

1439

# If n was a root, check if it's a 'real' root.

1440

p = tuple(self.parents(n))

1440

p = tuple(self.parents(n))

1441

# If any of its parents are descendants, it's not a root.

1441

# If any of its parents are descendants, it's not a root.

1442

if (p[0] in descendants) or (p[1] in descendants):

1442

if (p[0] in descendants) or (p[1] in descendants):

1443

roots.remove(n)

1443

roots.remove(n)

1444

else:

1444

else:

1445

p = tuple(self.parents(n))

1445

p = tuple(self.parents(n))

1446

# A node is a descendant if either of its parents are

1446

# A node is a descendant if either of its parents are

1447

# descendants. (We seeded the dependents list with the roots

1447

# descendants. (We seeded the dependents list with the roots

1448

# up there, remember?)

1448

# up there, remember?)

1449

if (p[0] in descendants) or (p[1] in descendants):

1449

if (p[0] in descendants) or (p[1] in descendants):

1450

descendants.add(n)

1450

descendants.add(n)

1451

isdescendant = True

1451

isdescendant = True

1452

if isdescendant and ((ancestors is None) or (n in ancestors)):

1452

if isdescendant and ((ancestors is None) or (n in ancestors)):

1453

# Only include nodes that are both descendants and ancestors.

1453

# Only include nodes that are both descendants and ancestors.

1454

orderedout.append(n)

1454

orderedout.append(n)

1455

if (ancestors is not None) and (n in heads):

1455

if (ancestors is not None) and (n in heads):

1456

# We're trying to figure out which heads are reachable

1456

# We're trying to figure out which heads are reachable

1457

# from roots.

1457

# from roots.

1458

# Mark this head as having been reached

1458

# Mark this head as having been reached

1459

heads[n] = True

1459

heads[n] = True

1460

elif ancestors is None:

1460

elif ancestors is None:

1461

# Otherwise, we're trying to discover the heads.

1461

# Otherwise, we're trying to discover the heads.

1462

# Assume this is a head because if it isn't, the next step

1462

# Assume this is a head because if it isn't, the next step

1463

# will eventually remove it.

1463

# will eventually remove it.

1464

heads[n] = True

1464

heads[n] = True

1465

# But, obviously its parents aren't.

1465

# But, obviously its parents aren't.

1466

for p in self.parents(n):

1466

for p in self.parents(n):

1467

heads.pop(p, None)

1467

heads.pop(p, None)

1468

heads = [head for head, flag in heads.items() if flag]

1468

heads = [head for head, flag in heads.items() if flag]

1469

roots = list(roots)

1469

roots = list(roots)

1470

assert orderedout

1470

assert orderedout

1471

assert roots

1471

assert roots

1472

assert heads

1472

assert heads

1473

return (orderedout, roots, heads)

1473

return (orderedout, roots, heads)

1474

1475

def headrevs(self, revs=None):

1475

def headrevs(self, revs=None):

1476

if revs is None:

1476

if revs is None:

1477

try:

1477

try:

1478

return self.index.headrevs()

1478

return self.index.headrevs()

1479

except AttributeError:

1479

except AttributeError:

1480

return self._headrevs()

1480

return self._headrevs()

1481

if rustdagop is not None and self.index.rust_ext_compat:

1481

if rustdagop is not None and self.index.rust_ext_compat:

1482

return rustdagop.headrevs(self.index, revs)

1482

return rustdagop.headrevs(self.index, revs)

1483

return dagop.headrevs(revs, self._uncheckedparentrevs)

1483

return dagop.headrevs(revs, self._uncheckedparentrevs)

1484

1485

def computephases(self, roots):

1485

def computephases(self, roots):

1486

return self.index.computephasesmapsets(roots)

1486

return self.index.computephasesmapsets(roots)

1487

1488

def _headrevs(self):

1488

def _headrevs(self):

1489

count = len(self)

1489

count = len(self)

1490

if not count:

1490

if not count:

1491

return [nullrev]

1491

return [nullrev]

1492

# we won't iter over filtered rev so nobody is a head at start

1492

# we won't iter over filtered rev so nobody is a head at start

1493

ishead = [0] * (count + 1)

1493

ishead = [0] * (count + 1)

1494

index = self.index

1494

index = self.index

1495

for r in self:

1495

for r in self:

1496

ishead[r] = 1 # I may be an head

1496

ishead[r] = 1 # I may be an head

1497

e = index[r]

1497

e = index[r]

1498

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1498

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1499

return [r for r, val in enumerate(ishead) if val]

1499

return [r for r, val in enumerate(ishead) if val]

1500

1501

def heads(self, start=None, stop=None):

1501

def heads(self, start=None, stop=None):

1502

"""return the list of all nodes that have no children

1502

"""return the list of all nodes that have no children

1503

1504

if start is specified, only heads that are descendants of

1504

if start is specified, only heads that are descendants of

1505

start will be returned

1505

start will be returned

1506

if stop is specified, it will consider all the revs from stop

1506

if stop is specified, it will consider all the revs from stop

1507

as if they had no children

1507

as if they had no children

1508

"""

1508

"""

1509

if start is None and stop is None:

1509

if start is None and stop is None:

1510

if not len(self):

1510

if not len(self):

1511

return [self.nullid]

1511

return [self.nullid]

1512

return [self.node(r) for r in self.headrevs()]

1512

return [self.node(r) for r in self.headrevs()]

1513

1514

if start is None:

1514

if start is None:

1515

start = nullrev

1515

start = nullrev

1516

else:

1516

else:

1517

start = self.rev(start)

1517

start = self.rev(start)

1518

1519

stoprevs = {self.rev(n) for n in stop or []}

1519

stoprevs = {self.rev(n) for n in stop or []}

1520

1521

revs = dagop.headrevssubset(

1521

revs = dagop.headrevssubset(

1522

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1522

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1523

)

1523

)

1524

1525

return [self.node(rev) for rev in revs]

1525

return [self.node(rev) for rev in revs]

1526

1527

def children(self, node):

1527

def children(self, node):

1528

"""find the children of a given node"""

1528

"""find the children of a given node"""

1529

c = []

1529

c = []

1530

p = self.rev(node)

1530

p = self.rev(node)

1531

for r in self.revs(start=p + 1):

1531

for r in self.revs(start=p + 1):

1532

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1532

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1533

if prevs:

1533

if prevs:

1534

for pr in prevs:

1534

for pr in prevs:

1535

if pr == p:

1535

if pr == p:

1536

c.append(self.node(r))

1536

c.append(self.node(r))

1537

elif p == nullrev:

1537

elif p == nullrev:

1538

c.append(self.node(r))

1538

c.append(self.node(r))

1539

return c

1539

return c

1540

1541

def commonancestorsheads(self, a, b):

1541

def commonancestorsheads(self, a, b):

1542

"""calculate all the heads of the common ancestors of nodes a and b"""

1542

"""calculate all the heads of the common ancestors of nodes a and b"""

1543

a, b = self.rev(a), self.rev(b)

1543

a, b = self.rev(a), self.rev(b)

1544

ancs = self._commonancestorsheads(a, b)

1544

ancs = self._commonancestorsheads(a, b)

1545

return pycompat.maplist(self.node, ancs)

1545

return pycompat.maplist(self.node, ancs)

1546

1547

def _commonancestorsheads(self, *revs):

1547

def _commonancestorsheads(self, *revs):

1548

"""calculate all the heads of the common ancestors of revs"""

1548

"""calculate all the heads of the common ancestors of revs"""

1549

try:

1549

try:

1550

ancs = self.index.commonancestorsheads(*revs)

1550

ancs = self.index.commonancestorsheads(*revs)

1551

except (AttributeError, OverflowError): # C implementation failed

1551

except (AttributeError, OverflowError): # C implementation failed

1552

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1552

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1553

return ancs

1553

return ancs

1554

1555

def isancestor(self, a, b):

1555

def isancestor(self, a, b):

1556

"""return True if node a is an ancestor of node b

1556

"""return True if node a is an ancestor of node b

1557

1558

A revision is considered an ancestor of itself."""

1558

A revision is considered an ancestor of itself."""

1559

a, b = self.rev(a), self.rev(b)

1559

a, b = self.rev(a), self.rev(b)

1560

return self.isancestorrev(a, b)

1560

return self.isancestorrev(a, b)

1561

1562

def isancestorrev(self, a, b):

1562

def isancestorrev(self, a, b):

1563

"""return True if revision a is an ancestor of revision b

1563

"""return True if revision a is an ancestor of revision b

1564

1565

A revision is considered an ancestor of itself.

1565

A revision is considered an ancestor of itself.

1566

1567

The implementation of this is trivial but the use of

1567

The implementation of this is trivial but the use of

1568

reachableroots is not."""

1568

reachableroots is not."""

1569

if a == nullrev:

1569

if a == nullrev:

1570

return True

1570

return True

1571

elif a == b:

1571

elif a == b:

1572

return True

1572

return True

1573

elif a > b:

1573

elif a > b:

1574

return False

1574

return False

1575

return bool(self.reachableroots(a, [b], [a], includepath=False))

1575

return bool(self.reachableroots(a, [b], [a], includepath=False))

1576

1577

def reachableroots(self, minroot, heads, roots, includepath=False):

1577

def reachableroots(self, minroot, heads, roots, includepath=False):

1578

"""return (heads(::(<roots> and <roots>::<heads>)))

1578

"""return (heads(::(<roots> and <roots>::<heads>)))

1579

1580

If includepath is True, return (<roots>::<heads>)."""

1580

If includepath is True, return (<roots>::<heads>)."""

1581

try:

1581

try:

1582

return self.index.reachableroots2(

1582

return self.index.reachableroots2(

1583

minroot, heads, roots, includepath

1583

minroot, heads, roots, includepath

1584

)

1584

)

1585

except AttributeError:

1585

except AttributeError:

1586

return dagop._reachablerootspure(

1586

return dagop._reachablerootspure(

1587

self.parentrevs, minroot, roots, heads, includepath

1587

self.parentrevs, minroot, roots, heads, includepath

1588

)

1588

)

1589

1590

def ancestor(self, a, b):

1590

def ancestor(self, a, b):

1591

"""calculate the "best" common ancestor of nodes a and b"""

1591

"""calculate the "best" common ancestor of nodes a and b"""

1592

1593

a, b = self.rev(a), self.rev(b)

1593

a, b = self.rev(a), self.rev(b)

1594

try:

1594

try:

1595

ancs = self.index.ancestors(a, b)

1595

ancs = self.index.ancestors(a, b)

1596

except (AttributeError, OverflowError):

1596

except (AttributeError, OverflowError):

1597

ancs = ancestor.ancestors(self.parentrevs, a, b)

1597

ancs = ancestor.ancestors(self.parentrevs, a, b)

1598

if ancs:

1598

if ancs:

1599

# choose a consistent winner when there's a tie

1599

# choose a consistent winner when there's a tie

1600

return min(map(self.node, ancs))

1600

return min(map(self.node, ancs))

1601

return self.nullid

1601

return self.nullid

1602

1603

def _match(self, id):

1603

def _match(self, id):

1604

if isinstance(id, int):

1604

if isinstance(id, int):

1605

# rev

1605

# rev

1606

return self.node(id)

1606

return self.node(id)

1607

if len(id) == self.nodeconstants.nodelen:

1607

if len(id) == self.nodeconstants.nodelen:

1608

# possibly a binary node

1608

# possibly a binary node

1609

# odds of a binary node being all hex in ASCII are 1 in 10**25

1609

# odds of a binary node being all hex in ASCII are 1 in 10**25

1610

try:

1610

try:

1611

node = id

1611

node = id

1612

self.rev(node) # quick search the index

1612

self.rev(node) # quick search the index

1613

return node

1613

return node

1614

except error.LookupError:

1614

except error.LookupError:

1615

pass # may be partial hex id

1615

pass # may be partial hex id

1616

try:

1616

try:

1617

# str(rev)

1617

# str(rev)

1618

rev = int(id)

1618

rev = int(id)

1619

if b"%d" % rev != id:

1619

if b"%d" % rev != id:

1620

raise ValueError

1620

raise ValueError

1621

if rev < 0:

1621

if rev < 0:

1622

rev = len(self) + rev

1622

rev = len(self) + rev

1623

if rev < 0 or rev >= len(self):

1623

if rev < 0 or rev >= len(self):

1624

raise ValueError

1624

raise ValueError

1625

return self.node(rev)

1625

return self.node(rev)

1626

except (ValueError, OverflowError):

1626

except (ValueError, OverflowError):

1627

pass

1627

pass

1628

if len(id) == 2 * self.nodeconstants.nodelen:

1628

if len(id) == 2 * self.nodeconstants.nodelen:

1629

try:

1629

try:

1630

# a full hex nodeid?

1630

# a full hex nodeid?

1631

node = bin(id)

1631

node = bin(id)

1632

self.rev(node)

1632

self.rev(node)

1633

return node

1633

return node

1634

except (binascii.Error, error.LookupError):

1634

except (binascii.Error, error.LookupError):

1635

pass

1635

pass

1636

1637

def _partialmatch(self, id):

1637

def _partialmatch(self, id):

1638

# we don't care wdirfilenodeids as they should be always full hash

1638

# we don't care wdirfilenodeids as they should be always full hash

1639

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1639

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1640

ambiguous = False

1640

ambiguous = False

1641

try:

1641

try:

1642

partial = self.index.partialmatch(id)

1642

partial = self.index.partialmatch(id)

1643

if partial and self.hasnode(partial):

1643

if partial and self.hasnode(partial):

1644

if maybewdir:

1644

if maybewdir:

1645

# single 'ff...' match in radix tree, ambiguous with wdir

1645

# single 'ff...' match in radix tree, ambiguous with wdir

1646

ambiguous = True

1646

ambiguous = True

1647

else:

1647

else:

1648

return partial

1648

return partial

1649

elif maybewdir:

1649

elif maybewdir:

1650

# no 'ff...' match in radix tree, wdir identified

1650

# no 'ff...' match in radix tree, wdir identified

1651

raise error.WdirUnsupported

1651

raise error.WdirUnsupported

1652

else:

1652

else:

1653

return None

1653

return None

1654

except error.RevlogError:

1654

except error.RevlogError:

1655

# parsers.c radix tree lookup gave multiple matches

1655

# parsers.c radix tree lookup gave multiple matches

1656

# fast path: for unfiltered changelog, radix tree is accurate

1656

# fast path: for unfiltered changelog, radix tree is accurate

1657

if not getattr(self, 'filteredrevs', None):

1657

if not getattr(self, 'filteredrevs', None):

1658

ambiguous = True

1658

ambiguous = True

1659

# fall through to slow path that filters hidden revisions

1659

# fall through to slow path that filters hidden revisions

1660

except (AttributeError, ValueError):

1660

except (AttributeError, ValueError):

1661

# we are pure python, or key is not hex

1661

# we are pure python, or key is not hex

1662

pass

1662

pass

1663

if ambiguous:

1663

if ambiguous:

1664

raise error.AmbiguousPrefixLookupError(

1664

raise error.AmbiguousPrefixLookupError(

1665

id, self.display_id, _(b'ambiguous identifier')

1665

id, self.display_id, _(b'ambiguous identifier')

1666

)

1666

)

1667

1668

if id in self._pcache:

1668

if id in self._pcache:

1669

return self._pcache[id]

1669

return self._pcache[id]

1670

1671

if len(id) <= 40:

1671

if len(id) <= 40:

1672

# hex(node)[:...]

1672

# hex(node)[:...]

1673

l = len(id) // 2 * 2 # grab an even number of digits

1673

l = len(id) // 2 * 2 # grab an even number of digits

1674

try:

1674

try:

1675

# we're dropping the last digit, so let's check that it's hex,

1675

# we're dropping the last digit, so let's check that it's hex,

1676

# to avoid the expensive computation below if it's not

1676

# to avoid the expensive computation below if it's not

1677

if len(id) % 2 > 0:

1677

if len(id) % 2 > 0:

1678

if not (id[-1] in hexdigits):

1678

if not (id[-1] in hexdigits):

1679

return None

1679

return None

1680

prefix = bin(id[:l])

1680

prefix = bin(id[:l])

1681

except binascii.Error:

1681

except binascii.Error:

1682

pass

1682

pass

1683

else:

1683

else:

1684

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1684

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1685

nl = [

1685

nl = [

1686

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1686

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1687

]

1687

]

1688

if self.nodeconstants.nullhex.startswith(id):

1688

if self.nodeconstants.nullhex.startswith(id):

1689

nl.append(self.nullid)

1689

nl.append(self.nullid)

1690

if len(nl) > 0:

1690

if len(nl) > 0:

1691

if len(nl) == 1 and not maybewdir:

1691

if len(nl) == 1 and not maybewdir:

1692

self._pcache[id] = nl[0]

1692

self._pcache[id] = nl[0]

1693

return nl[0]

1693

return nl[0]

1694

raise error.AmbiguousPrefixLookupError(

1694

raise error.AmbiguousPrefixLookupError(

1695

id, self.display_id, _(b'ambiguous identifier')

1695

id, self.display_id, _(b'ambiguous identifier')

1696

)

1696

)

1697

if maybewdir:

1697

if maybewdir:

1698

raise error.WdirUnsupported

1698

raise error.WdirUnsupported

1699

return None

1699

return None

1700

1701

def lookup(self, id):

1701

def lookup(self, id):

1702

"""locate a node based on:

1702

"""locate a node based on:

1703

- revision number or str(revision number)

1703

- revision number or str(revision number)

1704

- nodeid or subset of hex nodeid

1704

- nodeid or subset of hex nodeid

1705

"""

1705

"""

1706

n = self._match(id)

1706

n = self._match(id)

1707

if n is not None:

1707

if n is not None:

1708

return n

1708

return n

1709

n = self._partialmatch(id)

1709

n = self._partialmatch(id)

1710

if n:

1710

if n:

1711

return n

1711

return n

1712

1713

raise error.LookupError(id, self.display_id, _(b'no match found'))

1713

raise error.LookupError(id, self.display_id, _(b'no match found'))

1714

1715

def shortest(self, node, minlength=1):

1715

def shortest(self, node, minlength=1):

1716

"""Find the shortest unambiguous prefix that matches node."""

1716

"""Find the shortest unambiguous prefix that matches node."""

1717

1718

def isvalid(prefix):

1718

def isvalid(prefix):

1719

try:

1719

try:

1720

matchednode = self._partialmatch(prefix)

1720

matchednode = self._partialmatch(prefix)

1721

except error.AmbiguousPrefixLookupError:

1721

except error.AmbiguousPrefixLookupError:

1722

return False

1722

return False

1723

except error.WdirUnsupported:

1723

except error.WdirUnsupported:

1724

# single 'ff...' match

1724

# single 'ff...' match

1725

return True

1725

return True

1726

if matchednode is None:

1726

if matchednode is None:

1727

raise error.LookupError(node, self.display_id, _(b'no node'))

1727

raise error.LookupError(node, self.display_id, _(b'no node'))

1728

return True

1728

return True

1729

1730

def maybewdir(prefix):

1730

def maybewdir(prefix):

1731

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1731

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1732

1733

hexnode = hex(node)

1733

hexnode = hex(node)

1734

1735

def disambiguate(hexnode, minlength):

1735

def disambiguate(hexnode, minlength):

1736

"""Disambiguate against wdirid."""

1736

"""Disambiguate against wdirid."""

1737

for length in range(minlength, len(hexnode) + 1):

1737

for length in range(minlength, len(hexnode) + 1):

1738

prefix = hexnode[:length]

1738

prefix = hexnode[:length]

1739

if not maybewdir(prefix):

1739

if not maybewdir(prefix):

1740

return prefix

1740

return prefix

1741

1742

if not getattr(self, 'filteredrevs', None):

1742

if not getattr(self, 'filteredrevs', None):

1743

try:

1743

try:

1744

length = max(self.index.shortest(node), minlength)

1744

length = max(self.index.shortest(node), minlength)

1745

return disambiguate(hexnode, length)

1745

return disambiguate(hexnode, length)

1746

except error.RevlogError:

1746

except error.RevlogError:

1747

if node != self.nodeconstants.wdirid:

1747

if node != self.nodeconstants.wdirid:

1748

raise error.LookupError(

1748

raise error.LookupError(

1749

node, self.display_id, _(b'no node')

1749

node, self.display_id, _(b'no node')

1750

)

1750

)

1751

except AttributeError:

1751

except AttributeError:

1752

# Fall through to pure code

1752

# Fall through to pure code

1753

pass

1753

pass

1754

1755

if node == self.nodeconstants.wdirid:

1755

if node == self.nodeconstants.wdirid:

1756

for length in range(minlength, len(hexnode) + 1):

1756

for length in range(minlength, len(hexnode) + 1):

1757

prefix = hexnode[:length]

1757

prefix = hexnode[:length]

1758

if isvalid(prefix):

1758

if isvalid(prefix):

1759

return prefix

1759

return prefix

1760

1761

for length in range(minlength, len(hexnode) + 1):

1761

for length in range(minlength, len(hexnode) + 1):

1762

prefix = hexnode[:length]

1762

prefix = hexnode[:length]

1763

if isvalid(prefix):

1763

if isvalid(prefix):

1764

return disambiguate(hexnode, length)

1764

return disambiguate(hexnode, length)

1765

1766

def cmp(self, node, text):

1766

def cmp(self, node, text):

1767

"""compare text with a given file revision

1767

"""compare text with a given file revision

1768

1769

returns True if text is different than what is stored.

1769

returns True if text is different than what is stored.

1770

"""

1770

"""

1771

p1, p2 = self.parents(node)

1771

p1, p2 = self.parents(node)

1772

return storageutil.hashrevisionsha1(text, p1, p2) != node

1772

return storageutil.hashrevisionsha1(text, p1, p2) != node

1773

1774

def _getsegmentforrevs(self, startrev, endrev, df=None):

1774

def _getsegmentforrevs(self, startrev, endrev, df=None):

1775

"""Obtain a segment of raw data corresponding to a range of revisions.

1775

"""Obtain a segment of raw data corresponding to a range of revisions.

1776

1777

Accepts the start and end revisions and an optional already-open

1777

Accepts the start and end revisions and an optional already-open

1778

file handle to be used for reading. If the file handle is read, its

1778

file handle to be used for reading. If the file handle is read, its

1779

seek position will not be preserved.

1779

seek position will not be preserved.

1780

1781

Requests for data may be satisfied by a cache.

1781

Requests for data may be satisfied by a cache.

1782

1783

Returns a 2-tuple of (offset, data) for the requested range of

1783

Returns a 2-tuple of (offset, data) for the requested range of

1784

revisions. Offset is the integer offset from the beginning of the

1784

revisions. Offset is the integer offset from the beginning of the

1785

revlog and data is a str or buffer of the raw byte data.

1785

revlog and data is a str or buffer of the raw byte data.

1786

1787

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1787

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1788

to determine where each revision's data begins and ends.

1788

to determine where each revision's data begins and ends.

1789

"""

1789

"""

1790

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1790

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1791

# (functions are expensive).

1791

# (functions are expensive).

1792

index = self.index

1792

index = self.index

1793

istart = index[startrev]

1793

istart = index[startrev]

1794

start = int(istart[0] >> 16)

1794

start = int(istart[0] >> 16)

1795

if startrev == endrev:

1795

if startrev == endrev:

1796

end = start + istart[1]

1796

end = start + istart[1]

1797

else:

1797

else:

1798

iend = index[endrev]

1798

iend = index[endrev]

1799

end = int(iend[0] >> 16) + iend[1]

1799

end = int(iend[0] >> 16) + iend[1]

1800

1801

if self._inline:

1801

if self._inline:

1802

start += (startrev + 1) * self.index.entry_size

1802

start += (startrev + 1) * self.index.entry_size

1803

end += (endrev + 1) * self.index.entry_size

1803

end += (endrev + 1) * self.index.entry_size

1804

length = end - start

1804

length = end - start

1805

1806

return start, self._segmentfile.read_chunk(start, length, df)

1806

return start, self._segmentfile.read_chunk(start, length, df)

1807

1808

def _chunk(self, rev, df=None):

1808

def _chunk(self, rev, df=None):

1809

"""Obtain a single decompressed chunk for a revision.

1809

"""Obtain a single decompressed chunk for a revision.

1810

1811

Accepts an integer revision and an optional already-open file handle

1811

Accepts an integer revision and an optional already-open file handle

1812

to be used for reading. If used, the seek position of the file will not

1812

to be used for reading. If used, the seek position of the file will not

1813

be preserved.

1813

be preserved.

1814

1815

Returns a str holding uncompressed data for the requested revision.

1815

Returns a str holding uncompressed data for the requested revision.

1816

"""

1816

"""

1817

compression_mode = self.index[rev][10]

1817

compression_mode = self.index[rev][10]

1818

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1818

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1819

if compression_mode == COMP_MODE_PLAIN:

1819

if compression_mode == COMP_MODE_PLAIN:

1820

return data

1820

return data

1821

elif compression_mode == COMP_MODE_DEFAULT:

1821

elif compression_mode == COMP_MODE_DEFAULT:

1822

return self._decompressor(data)

1822

return self._decompressor(data)

1823

elif compression_mode == COMP_MODE_INLINE:

1823

elif compression_mode == COMP_MODE_INLINE:

1824

return self.decompress(data)

1824

return self.decompress(data)

1825

else:

1825

else:

1826

msg = b'unknown compression mode %d'

1826

msg = b'unknown compression mode %d'

1827

msg %= compression_mode

1827

msg %= compression_mode

1828

raise error.RevlogError(msg)

1828

raise error.RevlogError(msg)

1829

1830

def _chunks(self, revs, df=None, targetsize=None):

1830

def _chunks(self, revs, df=None, targetsize=None):

1831

"""Obtain decompressed chunks for the specified revisions.

1831

"""Obtain decompressed chunks for the specified revisions.

1832

1833

Accepts an iterable of numeric revisions that are assumed to be in

1833

Accepts an iterable of numeric revisions that are assumed to be in

1834

ascending order. Also accepts an optional already-open file handle

1834

ascending order. Also accepts an optional already-open file handle

1835

to be used for reading. If used, the seek position of the file will

1835

to be used for reading. If used, the seek position of the file will

1836

not be preserved.

1836

not be preserved.

1837

1838

This function is similar to calling ``self._chunk()`` multiple times,

1838

This function is similar to calling ``self._chunk()`` multiple times,

1839

but is faster.

1839

but is faster.

1840

1841

Returns a list with decompressed data for each requested revision.

1841

Returns a list with decompressed data for each requested revision.

1842

"""

1842

"""

1843

if not revs:

1843

if not revs:

1844

return []

1844

return []

1845

start = self.start

1845

start = self.start

1846

length = self.length

1846

length = self.length

1847

inline = self._inline

1847

inline = self._inline

1848

iosize = self.index.entry_size

1848

iosize = self.index.entry_size

1849

buffer = util.buffer

1849

buffer = util.buffer

1850

1851

l = []

1851

l = []

1852

ladd = l.append

1852

ladd = l.append

1853

1854

if not self._withsparseread:

1854

if not self._withsparseread:

1855

slicedchunks = (revs,)

1855

slicedchunks = (revs,)

1856

else:

1856

else:

1857

slicedchunks = deltautil.slicechunk(

1857

slicedchunks = deltautil.slicechunk(

1858

self, revs, targetsize=targetsize

1858

self, revs, targetsize=targetsize

1859

)

1859

)

1860

1861

for revschunk in slicedchunks:

1861

for revschunk in slicedchunks:

1862

firstrev = revschunk[0]

1862

firstrev = revschunk[0]

1863

# Skip trailing revisions with empty diff

1863

# Skip trailing revisions with empty diff

1864

for lastrev in revschunk[::-1]:

1864

for lastrev in revschunk[::-1]:

1865

if length(lastrev) != 0:

1865

if length(lastrev) != 0:

1866

break

1866

break

1867

1868

try:

1868

try:

1869

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1869

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1870

except OverflowError:

1870

except OverflowError:

1871

# issue4215 - we can't cache a run of chunks greater than

1871

# issue4215 - we can't cache a run of chunks greater than

1872

# 2G on Windows

1872

# 2G on Windows

1873

return [self._chunk(rev, df=df) for rev in revschunk]

1873

return [self._chunk(rev, df=df) for rev in revschunk]

1874

1875

decomp = self.decompress

1875

decomp = self.decompress

1876

# self._decompressor might be None, but will not be used in that case

1876

# self._decompressor might be None, but will not be used in that case

1877

def_decomp = self._decompressor

1877

def_decomp = self._decompressor

1878

for rev in revschunk:

1878

for rev in revschunk:

1879

chunkstart = start(rev)

1879

chunkstart = start(rev)

1880

if inline:

1880

if inline:

1881

chunkstart += (rev + 1) * iosize

1881

chunkstart += (rev + 1) * iosize

1882

chunklength = length(rev)

1882

chunklength = length(rev)

1883

comp_mode = self.index[rev][10]

1883

comp_mode = self.index[rev][10]

1884

c = buffer(data, chunkstart - offset, chunklength)

1884

c = buffer(data, chunkstart - offset, chunklength)

1885

if comp_mode == COMP_MODE_PLAIN:

1885

if comp_mode == COMP_MODE_PLAIN:

1886

ladd(c)

1886

ladd(c)

1887

elif comp_mode == COMP_MODE_INLINE:

1887

elif comp_mode == COMP_MODE_INLINE:

1888

ladd(decomp(c))

1888

ladd(decomp(c))

1889

elif comp_mode == COMP_MODE_DEFAULT:

1889

elif comp_mode == COMP_MODE_DEFAULT:

1890

ladd(def_decomp(c))

1890

ladd(def_decomp(c))

1891

else:

1891

else:

1892

msg = b'unknown compression mode %d'

1892

msg = b'unknown compression mode %d'

1893

msg %= comp_mode

1893

msg %= comp_mode

1894

raise error.RevlogError(msg)

1894

raise error.RevlogError(msg)

1895

1896

return l

1896

return l

1897

1898

def deltaparent(self, rev):

1898

def deltaparent(self, rev):

1899

"""return deltaparent of the given revision"""

1899

"""return deltaparent of the given revision"""

1900

base = self.index[rev][3]

1900

base = self.index[rev][3]

1901

if base == rev:

1901

if base == rev:

1902

return nullrev

1902

return nullrev

1903

elif self._generaldelta:

1903

elif self._generaldelta:

1904

return base

1904

return base

1905

else:

1905

else:

1906

return rev - 1

1906

return rev - 1

1907

1908

def issnapshot(self, rev):

1908

def issnapshot(self, rev):

1909

"""tells whether rev is a snapshot"""

1909

"""tells whether rev is a snapshot"""

1910

if not self._sparserevlog:

1910

if not self._sparserevlog:

1911

return self.deltaparent(rev) == nullrev

1911

return self.deltaparent(rev) == nullrev

1912

elif hasattr(self.index, 'issnapshot'):

1912

elif hasattr(self.index, 'issnapshot'):

1913

# directly assign the method to cache the testing and access

1913

# directly assign the method to cache the testing and access

1914

self.issnapshot = self.index.issnapshot

1914

self.issnapshot = self.index.issnapshot

1915

return self.issnapshot(rev)

1915

return self.issnapshot(rev)

1916

if rev == nullrev:

1916

if rev == nullrev:

1917

return True

1917

return True

1918

entry = self.index[rev]

1918

entry = self.index[rev]

1919

base = entry[3]

1919

base = entry[3]

1920

if base == rev:

1920

if base == rev:

1921

return True

1921

return True

1922

if base == nullrev:

1922

if base == nullrev:

1923

return True

1923

return True

1924

p1 = entry[5]

1924

p1 = entry[5]

1925

while self.length(p1) == 0:

1925

while self.length(p1) == 0:

1926

b = self.deltaparent(p1)

1926

b = self.deltaparent(p1)

1927

if b == p1:

1927

if b == p1:

1928

break

1928

break

1929

p1 = b

1929

p1 = b

1930

p2 = entry[6]

1930

p2 = entry[6]

1931

while self.length(p2) == 0:

1931

while self.length(p2) == 0:

1932

b = self.deltaparent(p2)

1932

b = self.deltaparent(p2)

1933

if b == p2:

1933

if b == p2:

1934

break

1934

break

1935

p2 = b

1935

p2 = b

1936

if base == p1 or base == p2:

1936

if base == p1 or base == p2:

1937

return False

1937

return False

1938

return self.issnapshot(base)

1938

return self.issnapshot(base)

1939

1940

def snapshotdepth(self, rev):

1940

def snapshotdepth(self, rev):

1941

"""number of snapshot in the chain before this one"""

1941

"""number of snapshot in the chain before this one"""

1942

if not self.issnapshot(rev):

1942

if not self.issnapshot(rev):

1943

raise error.ProgrammingError(b'revision %d not a snapshot')

1943

raise error.ProgrammingError(b'revision %d not a snapshot')

1944

return len(self._deltachain(rev)[0]) - 1

1944

return len(self._deltachain(rev)[0]) - 1

1945

1946

def revdiff(self, rev1, rev2):

1946

def revdiff(self, rev1, rev2):

1947

"""return or calculate a delta between two revisions

1947

"""return or calculate a delta between two revisions

1948

1949

The delta calculated is in binary form and is intended to be written to

1949

The delta calculated is in binary form and is intended to be written to

1950

revlog data directly. So this function needs raw revision data.

1950

revlog data directly. So this function needs raw revision data.

1951

"""

1951

"""

1952

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1952

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1953

return bytes(self._chunk(rev2))

1953

return bytes(self._chunk(rev2))

1954

1955

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1955

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1956

1957

def revision(self, nodeorrev):

1957

def revision(self, nodeorrev):

1958

"""return an uncompressed revision of a given node or revision

1958

"""return an uncompressed revision of a given node or revision

1959

number.

1959

number.

1960

"""

1960

"""

1961

return self._revisiondata(nodeorrev)

1961

return self._revisiondata(nodeorrev)

1962

1963

def sidedata(self, nodeorrev):

1963

def sidedata(self, nodeorrev):

1964

"""a map of extra data related to the changeset but not part of the hash

1964

"""a map of extra data related to the changeset but not part of the hash

1965

1966

This function currently return a dictionary. However, more advanced

1966

This function currently return a dictionary. However, more advanced

1967

mapping object will likely be used in the future for a more

1967

mapping object will likely be used in the future for a more

1968

efficient/lazy code.

1968

efficient/lazy code.

1969

"""

1969

"""

1970

# deal with <nodeorrev> argument type

1970

# deal with <nodeorrev> argument type

1971

if isinstance(nodeorrev, int):

1971

if isinstance(nodeorrev, int):

1972

rev = nodeorrev

1972

rev = nodeorrev

1973

else:

1973

else:

1974

rev = self.rev(nodeorrev)

1974

rev = self.rev(nodeorrev)

1975

return self._sidedata(rev)

1975

return self._sidedata(rev)

1976

1977

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1977

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1978

# deal with <nodeorrev> argument type

1978

# deal with <nodeorrev> argument type

1979

if isinstance(nodeorrev, int):

1979

if isinstance(nodeorrev, int):

1980

rev = nodeorrev

1980

rev = nodeorrev

1981

node = self.node(rev)

1981

node = self.node(rev)

1982

else:

1982

else:

1983

node = nodeorrev

1983

node = nodeorrev

1984

rev = None

1984

rev = None

1985

1986

# fast path the special `nullid` rev

1986

# fast path the special `nullid` rev

1987

if node == self.nullid:

1987

if node == self.nullid:

1988

return b""

1988

return b""

1989

1990

# ``rawtext`` is the text as stored inside the revlog. Might be the

1990

# ``rawtext`` is the text as stored inside the revlog. Might be the

1991

# revision or might need to be processed to retrieve the revision.

1991

# revision or might need to be processed to retrieve the revision.

1992

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1992

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1993

1994

if raw and validated:

1994

if raw and validated:

1995

# if we don't want to process the raw text and that raw

1995

# if we don't want to process the raw text and that raw

1996

# text is cached, we can exit early.

1996

# text is cached, we can exit early.

1997

return rawtext

1997

return rawtext

1998

if rev is None:

1998

if rev is None:

1999

rev = self.rev(node)

1999

rev = self.rev(node)

2000

# the revlog's flag for this revision

2000

# the revlog's flag for this revision

2001

# (usually alter its state or content)

2001

# (usually alter its state or content)

2002

flags = self.flags(rev)

2002

flags = self.flags(rev)

2003

2004

if validated and flags == REVIDX_DEFAULT_FLAGS:

2004

if validated and flags == REVIDX_DEFAULT_FLAGS:

2005

# no extra flags set, no flag processor runs, text = rawtext

2005

# no extra flags set, no flag processor runs, text = rawtext

2006

return rawtext

2006

return rawtext

2007

2008

if raw:

2008

if raw:

2009

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2009

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2010

text = rawtext

2010

text = rawtext

2011

else:

2011

else:

2012

r = flagutil.processflagsread(self, rawtext, flags)

2012

r = flagutil.processflagsread(self, rawtext, flags)

2013

text, validatehash = r

2013

text, validatehash = r

2014

if validatehash:

2014

if validatehash:

2015

self.checkhash(text, node, rev=rev)

2015

self.checkhash(text, node, rev=rev)

2016

if not validated:

2016

if not validated:

2017

self._revisioncache = (node, rev, rawtext)

2017

self._revisioncache = (node, rev, rawtext)

2018

2019

return text

2019

return text

2020

2021

def _rawtext(self, node, rev, _df=None):

2021

def _rawtext(self, node, rev, _df=None):

2022

"""return the possibly unvalidated rawtext for a revision

2022

"""return the possibly unvalidated rawtext for a revision

2023

2024

returns (rev, rawtext, validated)

2024

returns (rev, rawtext, validated)

2025

"""

2025

"""

2026

2027

# revision in the cache (could be useful to apply delta)

2027

# revision in the cache (could be useful to apply delta)

2028

cachedrev = None

2028

cachedrev = None

2029

# An intermediate text to apply deltas to

2029

# An intermediate text to apply deltas to

2030

basetext = None

2030

basetext = None

2031

2032

# Check if we have the entry in cache

2032

# Check if we have the entry in cache

2033

# The cache entry looks like (node, rev, rawtext)

2033

# The cache entry looks like (node, rev, rawtext)

2034

if self._revisioncache:

2034

if self._revisioncache:

2035

if self._revisioncache[0] == node:

2035

if self._revisioncache[0] == node:

2036

return (rev, self._revisioncache[2], True)

2036

return (rev, self._revisioncache[2], True)

2037

cachedrev = self._revisioncache[1]

2037

cachedrev = self._revisioncache[1]

2038

2039

if rev is None:

2039

if rev is None:

2040

rev = self.rev(node)

2040

rev = self.rev(node)

2041

2042

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2042

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2043

if stopped:

2043

if stopped:

2044

basetext = self._revisioncache[2]

2044

basetext = self._revisioncache[2]

2045

2046

# drop cache to save memory, the caller is expected to

2046

# drop cache to save memory, the caller is expected to

2047

# update self._revisioncache after validating the text

2047

# update self._revisioncache after validating the text

2048

self._revisioncache = None

2048

self._revisioncache = None

2049

2050

targetsize = None

2050

targetsize = None

2051

rawsize = self.index[rev][2]

2051

rawsize = self.index[rev][2]

2052

if 0 <= rawsize:

2052

if 0 <= rawsize:

2053

targetsize = 4 * rawsize

2053

targetsize = 4 * rawsize

2054

2055

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2055

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2056

if basetext is None:

2056

if basetext is None:

2057

basetext = bytes(bins[0])

2057

basetext = bytes(bins[0])

2058

bins = bins[1:]

2058

bins = bins[1:]

2059

2060

rawtext = mdiff.patches(basetext, bins)

2060

rawtext = mdiff.patches(basetext, bins)

2061

del basetext # let us have a chance to free memory early

2061

del basetext # let us have a chance to free memory early

2062

return (rev, rawtext, False)

2062

return (rev, rawtext, False)

2063

2064

def _sidedata(self, rev):

2064

def _sidedata(self, rev):

2065

"""Return the sidedata for a given revision number."""

2065

"""Return the sidedata for a given revision number."""

2066

index_entry = self.index[rev]

2066

index_entry = self.index[rev]

2067

sidedata_offset = index_entry[8]

2067

sidedata_offset = index_entry[8]

2068

sidedata_size = index_entry[9]

2068

sidedata_size = index_entry[9]

2069

2070

if self._inline:

2070

if self._inline:

2071

sidedata_offset += self.index.entry_size * (1 + rev)

2071

sidedata_offset += self.index.entry_size * (1 + rev)

2072

if sidedata_size == 0:

2072

if sidedata_size == 0:

2073

return {}

2073

return {}

2074

2075

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2075

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2076

filename = self._sidedatafile

2076

filename = self._sidedatafile

2077

end = self._docket.sidedata_end

2077

end = self._docket.sidedata_end

2078

offset = sidedata_offset

2078

offset = sidedata_offset

2079

length = sidedata_size

2079

length = sidedata_size

2080

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2080

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2081

raise error.RevlogError(m)

2081

raise error.RevlogError(m)

2082

2083

comp_segment = self._segmentfile_sidedata.read_chunk(

2083

comp_segment = self._segmentfile_sidedata.read_chunk(

2084

sidedata_offset, sidedata_size

2084

sidedata_offset, sidedata_size

2085

)

2085

)

2086

2087

comp = self.index[rev][11]

2087

comp = self.index[rev][11]

2088

if comp == COMP_MODE_PLAIN:

2088

if comp == COMP_MODE_PLAIN:

2089

segment = comp_segment

2089

segment = comp_segment

2090

elif comp == COMP_MODE_DEFAULT:

2090

elif comp == COMP_MODE_DEFAULT:

2091

segment = self._decompressor(comp_segment)

2091

segment = self._decompressor(comp_segment)

2092

elif comp == COMP_MODE_INLINE:

2092

elif comp == COMP_MODE_INLINE:

2093

segment = self.decompress(comp_segment)

2093

segment = self.decompress(comp_segment)

2094

else:

2094

else:

2095

msg = b'unknown compression mode %d'

2095

msg = b'unknown compression mode %d'

2096

msg %= comp

2096

msg %= comp

2097

raise error.RevlogError(msg)

2097

raise error.RevlogError(msg)

2098

2099

sidedata = sidedatautil.deserialize_sidedata(segment)

2099

sidedata = sidedatautil.deserialize_sidedata(segment)

2100

return sidedata

2100

return sidedata

2101

2102

def rawdata(self, nodeorrev):

2102

def rawdata(self, nodeorrev):

2103

"""return an uncompressed raw data of a given node or revision number."""

2103

"""return an uncompressed raw data of a given node or revision number."""

2104

return self._revisiondata(nodeorrev, raw=True)

2104

return self._revisiondata(nodeorrev, raw=True)

2105

2106

def hash(self, text, p1, p2):

2106

def hash(self, text, p1, p2):

2107

"""Compute a node hash.

2107

"""Compute a node hash.

2108

2109

Available as a function so that subclasses can replace the hash

2109

Available as a function so that subclasses can replace the hash

2110

as needed.

2110

as needed.

2111

"""

2111

"""

2112

return storageutil.hashrevisionsha1(text, p1, p2)

2112

return storageutil.hashrevisionsha1(text, p1, p2)

2113

2114

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2114

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2115

"""Check node hash integrity.

2115

"""Check node hash integrity.

2116

2117

Available as a function so that subclasses can extend hash mismatch

2117

Available as a function so that subclasses can extend hash mismatch

2118

behaviors as needed.

2118

behaviors as needed.

2119

"""

2119

"""

2120

try:

2120

try:

2121

if p1 is None and p2 is None:

2121

if p1 is None and p2 is None:

2122

p1, p2 = self.parents(node)

2122

p1, p2 = self.parents(node)

2123

if node != self.hash(text, p1, p2):

2123

if node != self.hash(text, p1, p2):

2124

# Clear the revision cache on hash failure. The revision cache

2124

# Clear the revision cache on hash failure. The revision cache

2125

# only stores the raw revision and clearing the cache does have

2125

# only stores the raw revision and clearing the cache does have

2126

# the side-effect that we won't have a cache hit when the raw

2126

# the side-effect that we won't have a cache hit when the raw

2127

# revision data is accessed. But this case should be rare and

2127

# revision data is accessed. But this case should be rare and

2128

# it is extra work to teach the cache about the hash

2128

# it is extra work to teach the cache about the hash

2129

# verification state.

2129

# verification state.

2130

if self._revisioncache and self._revisioncache[0] == node:

2130

if self._revisioncache and self._revisioncache[0] == node:

2131

self._revisioncache = None

2131

self._revisioncache = None

2132

2133

revornode = rev

2133

revornode = rev

2134

if revornode is None:

2134

if revornode is None:

2135

revornode = templatefilters.short(hex(node))

2135

revornode = templatefilters.short(hex(node))

2136

raise error.RevlogError(

2136

raise error.RevlogError(

2137

_(b"integrity check failed on %s:%s")

2137

_(b"integrity check failed on %s:%s")

2138

% (self.display_id, pycompat.bytestr(revornode))

2138

% (self.display_id, pycompat.bytestr(revornode))

2139

)

2139

)

2140

except error.RevlogError:

2140

except error.RevlogError:

2141

if self._censorable and storageutil.iscensoredtext(text):

2141

if self._censorable and storageutil.iscensoredtext(text):

2142

raise error.CensoredNodeError(self.display_id, node, text)

2142

raise error.CensoredNodeError(self.display_id, node, text)

2143

raise

2143

raise

2144

2145

@property

2145

@property

2146

def _split_index_file(self):

2146

def _split_index_file(self):

2147

"""the path where to expect the index of an ongoing splitting operation

2147

"""the path where to expect the index of an ongoing splitting operation

2148

2149

The file will only exist if a splitting operation is in progress, but

2149

The file will only exist if a splitting operation is in progress, but

2150

it is always expected at the same location."""

2150

it is always expected at the same location."""

2151

parts = self.radix.split(b'/')

2151

parts = self.radix.split(b'/')

2152

if len(parts) > 1:

2152

if len(parts) > 1:

2153

# adds a '-s' prefix to the ``data/` or `meta/` base

2153

# adds a '-s' prefix to the ``data/` or `meta/` base

2154

head = parts[0] + b'-s'

2154

head = parts[0] + b'-s'

2155

mids = parts[1:-1]

2155

mids = parts[1:-1]

2156

tail = parts[-1] + b'.i'

2156

tail = parts[-1] + b'.i'

2157

pieces = [head] + mids + [tail]

2157

pieces = [head] + mids + [tail]

2158

return b'/'.join(pieces)

2158

return b'/'.join(pieces)

2159

else:

2159

else:

2160

# the revlog is stored at the root of the store (changelog or

2160

# the revlog is stored at the root of the store (changelog or

2161

# manifest), no risk of collision.

2161

# manifest), no risk of collision.

2162

return self.radix + b'.i.s'

2162

return self.radix + b'.i.s'

2163

2164

def _enforceinlinesize(self, tr, side_write=True):

2164

def _enforceinlinesize(self, tr, side_write=True):

2165

"""Check if the revlog is too big for inline and convert if so.

2165

"""Check if the revlog is too big for inline and convert if so.

2166

2167

This should be called after revisions are added to the revlog. If the

2167

This should be called after revisions are added to the revlog. If the

2168

revlog has grown too large to be an inline revlog, it will convert it

2168

revlog has grown too large to be an inline revlog, it will convert it

2169

to use multiple index and data files.

2169

to use multiple index and data files.

2170

"""

2170

"""

2171

tiprev = len(self) - 1

2171

tiprev = len(self) - 1

2172

total_size = self.start(tiprev) + self.length(tiprev)

2172

total_size = self.start(tiprev) + self.length(tiprev)

2173

if not self._inline or total_size < _maxinline:

2173

if not self._inline or total_size < _maxinline:

2174

return

2174

return

2175

2176

troffset = tr.findoffset(self._indexfile)

2176

troffset = tr.findoffset(self._indexfile)

2177

if troffset is None:

2177

if troffset is None:

2178

raise error.RevlogError(

2178

raise error.RevlogError(

2179

_(b"%s not found in the transaction") % self._indexfile

2179

_(b"%s not found in the transaction") % self._indexfile

2180

)

2180

)

2181

if troffset:

2181

if troffset:

2182

tr.addbackup(self._indexfile, for_offset=True)

2182

tr.addbackup(self._indexfile, for_offset=True)

2183

tr.add(self._datafile, 0)

2183

tr.add(self._datafile, 0)

2184

2185

existing_handles = False

2185

existing_handles = False

2186

if self._writinghandles is not None:

2186

if self._writinghandles is not None:

2187

existing_handles = True

2187

existing_handles = True

2188

fp = self._writinghandles[0]

2188

fp = self._writinghandles[0]

2189

fp.flush()

2189

fp.flush()

2190

fp.close()

2190

fp.close()

2191

# We can't use the cached file handle after close(). So prevent

2191

# We can't use the cached file handle after close(). So prevent

2192

# its usage.

2192

# its usage.

2193

self._writinghandles = None

2193

self._writinghandles = None

2194

self._segmentfile.writing_handle = None

2194

self._segmentfile.writing_handle = None

2195

# No need to deal with sidedata writing handle as it is only

2195

# No need to deal with sidedata writing handle as it is only

2196

# relevant with revlog-v2 which is never inline, not reaching

2196

# relevant with revlog-v2 which is never inline, not reaching

2197

# this code

2197

# this code

2198

if side_write:

2198

if side_write:

2199

old_index_file_path = self._indexfile

2199

old_index_file_path = self._indexfile

2200

new_index_file_path = self._split_index_file

2200

new_index_file_path = self._split_index_file

2201

opener = self.opener

2201

opener = self.opener

2202

weak_self = weakref.ref(self)

2202

weak_self = weakref.ref(self)

2203

2204

# the "split" index replace the real index when the transaction is finalized

2204

# the "split" index replace the real index when the transaction is finalized

2205

def finalize_callback(tr):

2205

def finalize_callback(tr):

2206

opener.rename(

2206

opener.rename(

2207

new_index_file_path,

2207

new_index_file_path,

2208

old_index_file_path,

2208

old_index_file_path,

2209

checkambig=True,

2209

checkambig=True,

2210

)

2210

)

2211

maybe_self = weak_self()

2211

maybe_self = weak_self()

2212

if maybe_self is not None:

2212

if maybe_self is not None:

2213

maybe_self._indexfile = old_index_file_path

2213

maybe_self._indexfile = old_index_file_path

2214

2215

def abort_callback(tr):

2215

def abort_callback(tr):

2216

maybe_self = weak_self()

2216

maybe_self = weak_self()

2217

if maybe_self is not None:

2217

if maybe_self is not None:

2218

maybe_self._indexfile = old_index_file_path

2218

maybe_self._indexfile = old_index_file_path

2219

2220

tr.registertmp(new_index_file_path)

2220

tr.registertmp(new_index_file_path)

2221

if self.target[1] is not None:

2221

if self.target[1] is not None:

2222

callback_id = b'000-revlog-split-%d-%s' % self.target

2222

callback_id = b'000-revlog-split-%d-%s' % self.target

2223

else:

2223

else:

2224

callback_id = b'000-revlog-split-%d' % self.target[0]

2224

callback_id = b'000-revlog-split-%d' % self.target[0]

2225

tr.addfinalize(callback_id, finalize_callback)

2225

tr.addfinalize(callback_id, finalize_callback)

2226

tr.addabort(callback_id, abort_callback)

2226

tr.addabort(callback_id, abort_callback)

2227

2228

new_dfh = self._datafp(b'w+')

2228

new_dfh = self._datafp(b'w+')

2229

new_dfh.truncate(0) # drop any potentially existing data

2229

new_dfh.truncate(0) # drop any potentially existing data

2230

try:

2230

try:

2231

with self.~~_indexfp~~() as ~~read_ifh~~:

2231

with self.reading():

2232

for r in self:

2232

for r in self:

2233

new_dfh.write(self._getsegmentforrevs(r, r, df=~~read_ifh~~)[1])

2233

new_dfh.write(self._getsegmentforrevs(r, r)[1])

2234

new_dfh.flush()

2234

new_dfh.flush()

2235

2236

if side_write:

2236

if side_write:

2237

self._indexfile = new_index_file_path

2237

self._indexfile = new_index_file_path

2238

with self.__index_new_fp() as fp:

2238

with self.__index_new_fp() as fp:

2239

self._format_flags &= ~FLAG_INLINE_DATA

2239

self._format_flags &= ~FLAG_INLINE_DATA

2240

self._inline = False

2240

self._inline = False

2241

for i in self:

2241

for i in self:

2242

e = self.index.entry_binary(i)

2242

e = self.index.entry_binary(i)

2243

if i == 0 and self._docket is None:

2243

if i == 0 and self._docket is None:

2244

header = self._format_flags | self._format_version

2244

header = self._format_flags | self._format_version

2245

header = self.index.pack_header(header)

2245

header = self.index.pack_header(header)

2246

e = header + e

2246

e = header + e

2247

fp.write(e)

2247

fp.write(e)

2248

if self._docket is not None:

2248

if self._docket is not None:

2249

self._docket.index_end = fp.tell()

2249

self._docket.index_end = fp.tell()

2250

2251

# If we don't use side-write, the temp file replace the real

2251

# If we don't use side-write, the temp file replace the real

2252

# index when we exit the context manager

2252

# index when we exit the context manager

2253

2254

nodemaputil.setup_persistent_nodemap(tr, self)

2254

nodemaputil.setup_persistent_nodemap(tr, self)

2255

self._segmentfile = randomaccessfile.randomaccessfile(

2255

self._segmentfile = randomaccessfile.randomaccessfile(

2256

self.opener,

2256

self.opener,

2257

self._datafile,

2257

self._datafile,

2258

self._chunkcachesize,

2258

self._chunkcachesize,

2259

)

2259

)

2260

2261

if existing_handles:

2261

if existing_handles:

2262

# switched from inline to conventional reopen the index

2262

# switched from inline to conventional reopen the index

2263

ifh = self.__index_write_fp()

2263

ifh = self.__index_write_fp()

2264

self._writinghandles = (ifh, new_dfh, None)

2264

self._writinghandles = (ifh, new_dfh, None)

2265

self._segmentfile.writing_handle = new_dfh

2265

self._segmentfile.writing_handle = new_dfh

2266

new_dfh = None

2266

new_dfh = None

2267

# No need to deal with sidedata writing handle as it is only

2267

# No need to deal with sidedata writing handle as it is only

2268

# relevant with revlog-v2 which is never inline, not reaching

2268

# relevant with revlog-v2 which is never inline, not reaching

2269

# this code

2269

# this code

2270

finally:

2270

finally:

2271

if new_dfh is not None:

2271

if new_dfh is not None:

2272

new_dfh.close()

2272

new_dfh.close()

2273

2274

def _nodeduplicatecallback(self, transaction, node):

2274

def _nodeduplicatecallback(self, transaction, node):

2275

"""called when trying to add a node already stored."""

2275

"""called when trying to add a node already stored."""

2276

2277

@contextlib.contextmanager

2277

@contextlib.contextmanager

2278

def reading(self):

2278

def reading(self):

2279

"""Context manager that keeps data and sidedata files open for reading"""

2279

"""Context manager that keeps data and sidedata files open for reading"""

2280

if len(self.index) == 0:

2280

if len(self.index) == 0:

2281

yield # nothing to be read

2281

yield # nothing to be read

2282

else:

2282

else:

2283

with self._segmentfile.reading():

2283

with self._segmentfile.reading():

2284

with self._segmentfile_sidedata.reading():

2284

with self._segmentfile_sidedata.reading():

2285

yield

2285

yield

2286

2287

@contextlib.contextmanager

2287

@contextlib.contextmanager

2288

def _writing(self, transaction):

2288

def _writing(self, transaction):

2289

if self._trypending:

2289

if self._trypending:

2290

msg = b'try to write in a `trypending` revlog: %s'

2290

msg = b'try to write in a `trypending` revlog: %s'

2291

msg %= self.display_id

2291

msg %= self.display_id

2292

raise error.ProgrammingError(msg)

2292

raise error.ProgrammingError(msg)

2293

if self._writinghandles is not None:

2293

if self._writinghandles is not None:

2294

yield

2294

yield

2295

else:

2295

else:

2296

ifh = dfh = sdfh = None

2296

ifh = dfh = sdfh = None

2297

try:

2297

try:

2298

r = len(self)

2298

r = len(self)

2299

# opening the data file.

2299

# opening the data file.

2300

dsize = 0

2300

dsize = 0

2301

if r:

2301

if r:

2302

dsize = self.end(r - 1)

2302

dsize = self.end(r - 1)

2303

dfh = None

2303

dfh = None

2304

if not self._inline:

2304

if not self._inline:

2305

try:

2305

try:

2306

dfh = self._datafp(b"r+")

2306

dfh = self._datafp(b"r+")

2307

if self._docket is None:

2307

if self._docket is None:

2308

dfh.seek(0, os.SEEK_END)

2308

dfh.seek(0, os.SEEK_END)

2309

else:

2309

else:

2310

dfh.seek(self._docket.data_end, os.SEEK_SET)

2310

dfh.seek(self._docket.data_end, os.SEEK_SET)

2311

except FileNotFoundError:

2311

except FileNotFoundError:

2312

dfh = self._datafp(b"w+")

2312

dfh = self._datafp(b"w+")

2313

transaction.add(self._datafile, dsize)

2313

transaction.add(self._datafile, dsize)

2314

if self._sidedatafile is not None:

2314

if self._sidedatafile is not None:

2315

# revlog-v2 does not inline, help Pytype

2315

# revlog-v2 does not inline, help Pytype

2316

assert dfh is not None

2316

assert dfh is not None

2317

try:

2317

try:

2318

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2318

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2319

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2319

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2320

except FileNotFoundError:

2320

except FileNotFoundError:

2321

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2321

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2322

transaction.add(

2322

transaction.add(

2323

self._sidedatafile, self._docket.sidedata_end

2323

self._sidedatafile, self._docket.sidedata_end

2324

)

2324

)

2325

2326

# opening the index file.

2326

# opening the index file.

2327

isize = r * self.index.entry_size

2327

isize = r * self.index.entry_size

2328

ifh = self.__index_write_fp()

2328

ifh = self.__index_write_fp()

2329

if self._inline:

2329

if self._inline:

2330

transaction.add(self._indexfile, dsize + isize)

2330

transaction.add(self._indexfile, dsize + isize)

2331

else:

2331

else:

2332

transaction.add(self._indexfile, isize)

2332

transaction.add(self._indexfile, isize)

2333

# exposing all file handle for writing.

2333

# exposing all file handle for writing.

2334

self._writinghandles = (ifh, dfh, sdfh)

2334

self._writinghandles = (ifh, dfh, sdfh)

2335

self._segmentfile.writing_handle = ifh if self._inline else dfh

2335

self._segmentfile.writing_handle = ifh if self._inline else dfh

2336

self._segmentfile_sidedata.writing_handle = sdfh

2336

self._segmentfile_sidedata.writing_handle = sdfh

2337

yield

2337

yield

2338

if self._docket is not None:

2338

if self._docket is not None:

2339

self._write_docket(transaction)

2339

self._write_docket(transaction)

2340

finally:

2340

finally:

2341

self._writinghandles = None

2341

self._writinghandles = None

2342

self._segmentfile.writing_handle = None

2342

self._segmentfile.writing_handle = None

2343

self._segmentfile_sidedata.writing_handle = None

2343

self._segmentfile_sidedata.writing_handle = None

2344

if dfh is not None:

2344

if dfh is not None:

2345

dfh.close()

2345

dfh.close()

2346

if sdfh is not None:

2346

if sdfh is not None:

2347

sdfh.close()

2347

sdfh.close()

2348

# closing the index file last to avoid exposing referent to

2348

# closing the index file last to avoid exposing referent to

2349

# potential unflushed data content.

2349

# potential unflushed data content.

2350

if ifh is not None:

2350

if ifh is not None:

2351

ifh.close()

2351

ifh.close()

2352

2353

def _write_docket(self, transaction):

2353

def _write_docket(self, transaction):

2354

"""write the current docket on disk

2354

"""write the current docket on disk

2355

2356

Exist as a method to help changelog to implement transaction logic

2356

Exist as a method to help changelog to implement transaction logic

2357

2358

We could also imagine using the same transaction logic for all revlog

2358

We could also imagine using the same transaction logic for all revlog

2359

since docket are cheap."""

2359

since docket are cheap."""

2360

self._docket.write(transaction)

2360

self._docket.write(transaction)

2361

2362

def addrevision(

2362

def addrevision(

2363

self,

2363

self,

2364

text,

2364

text,

2365

transaction,

2365

transaction,

2366

link,

2366

link,

2367

p1,

2367

p1,

2368

p2,

2368

p2,

2369

cachedelta=None,

2369

cachedelta=None,

2370

node=None,

2370

node=None,

2371

flags=REVIDX_DEFAULT_FLAGS,

2371

flags=REVIDX_DEFAULT_FLAGS,

2372

deltacomputer=None,

2372

deltacomputer=None,

2373

sidedata=None,

2373

sidedata=None,

2374

):

2374

):

2375

"""add a revision to the log

2375

"""add a revision to the log

2376

2377

text - the revision data to add

2377

text - the revision data to add

2378

transaction - the transaction object used for rollback

2378

transaction - the transaction object used for rollback

2379

link - the linkrev data to add

2379

link - the linkrev data to add

2380

p1, p2 - the parent nodeids of the revision

2380

p1, p2 - the parent nodeids of the revision

2381

cachedelta - an optional precomputed delta

2381

cachedelta - an optional precomputed delta

2382

node - nodeid of revision; typically node is not specified, and it is

2382

node - nodeid of revision; typically node is not specified, and it is

2383

computed by default as hash(text, p1, p2), however subclasses might

2383

computed by default as hash(text, p1, p2), however subclasses might

2384

use different hashing method (and override checkhash() in such case)

2384

use different hashing method (and override checkhash() in such case)

2385

flags - the known flags to set on the revision

2385

flags - the known flags to set on the revision

2386

deltacomputer - an optional deltacomputer instance shared between

2386

deltacomputer - an optional deltacomputer instance shared between

2387

multiple calls

2387

multiple calls

2388

"""

2388

"""

2389

if link == nullrev:

2389

if link == nullrev:

2390

raise error.RevlogError(

2390

raise error.RevlogError(

2391

_(b"attempted to add linkrev -1 to %s") % self.display_id

2391

_(b"attempted to add linkrev -1 to %s") % self.display_id

2392

)

2392

)

2393

2394

if sidedata is None:

2394

if sidedata is None:

2395

sidedata = {}

2395

sidedata = {}

2396

elif sidedata and not self.hassidedata:

2396

elif sidedata and not self.hassidedata:

2397

raise error.ProgrammingError(

2397

raise error.ProgrammingError(

2398

_(b"trying to add sidedata to a revlog who don't support them")

2398

_(b"trying to add sidedata to a revlog who don't support them")

2399

)

2399

)

2400

2401

if flags:

2401

if flags:

2402

node = node or self.hash(text, p1, p2)

2402

node = node or self.hash(text, p1, p2)

2403

2404

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2404

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2405

2406

# If the flag processor modifies the revision data, ignore any provided

2406

# If the flag processor modifies the revision data, ignore any provided

2407

# cachedelta.

2407

# cachedelta.

2408

if rawtext != text:

2408

if rawtext != text:

2409

cachedelta = None

2409

cachedelta = None

2410

2411

if len(rawtext) > _maxentrysize:

2411

if len(rawtext) > _maxentrysize:

2412

raise error.RevlogError(

2412

raise error.RevlogError(

2413

_(

2413

_(

2414

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2414

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2415

)

2415

)

2416

% (self.display_id, len(rawtext))

2416

% (self.display_id, len(rawtext))

2417

)

2417

)

2418

2419

node = node or self.hash(rawtext, p1, p2)

2419

node = node or self.hash(rawtext, p1, p2)

2420

rev = self.index.get_rev(node)

2420

rev = self.index.get_rev(node)

2421

if rev is not None:

2421

if rev is not None:

2422

return rev

2422

return rev

2423

2424

if validatehash:

2424

if validatehash:

2425

self.checkhash(rawtext, node, p1=p1, p2=p2)

2425

self.checkhash(rawtext, node, p1=p1, p2=p2)

2426

2427

return self.addrawrevision(

2427

return self.addrawrevision(

2428

rawtext,

2428

rawtext,

2429

transaction,

2429

transaction,

2430

link,

2430

link,

2431

p1,

2431

p1,

2432

p2,

2432

p2,

2433

node,

2433

node,

2434

flags,

2434

flags,

2435

cachedelta=cachedelta,

2435

cachedelta=cachedelta,

2436

deltacomputer=deltacomputer,

2436

deltacomputer=deltacomputer,

2437

sidedata=sidedata,

2437

sidedata=sidedata,

2438

)

2438

)

2439

2440

def addrawrevision(

2440

def addrawrevision(

2441

self,

2441

self,

2442

rawtext,

2442

rawtext,

2443

transaction,

2443

transaction,

2444

link,

2444

link,

2445

p1,

2445

p1,

2446

p2,

2446

p2,

2447

node,

2447

node,

2448

flags,

2448

flags,

2449

cachedelta=None,

2449

cachedelta=None,

2450

deltacomputer=None,

2450

deltacomputer=None,

2451

sidedata=None,

2451

sidedata=None,

2452

):

2452

):

2453

"""add a raw revision with known flags, node and parents

2453

"""add a raw revision with known flags, node and parents

2454

useful when reusing a revision not stored in this revlog (ex: received

2454

useful when reusing a revision not stored in this revlog (ex: received

2455

over wire, or read from an external bundle).

2455

over wire, or read from an external bundle).

2456

"""

2456

"""

2457

with self._writing(transaction):

2457

with self._writing(transaction):

2458

return self._addrevision(

2458

return self._addrevision(

2459

node,

2459

node,

2460

rawtext,

2460

rawtext,

2461

transaction,

2461

transaction,

2462

link,

2462

link,

2463

p1,

2463

p1,

2464

p2,

2464

p2,

2465

flags,

2465

flags,

2466

cachedelta,

2466

cachedelta,

2467

deltacomputer=deltacomputer,

2467

deltacomputer=deltacomputer,

2468

sidedata=sidedata,

2468

sidedata=sidedata,

2469

)

2469

)

2470

2471

def compress(self, data):

2471

def compress(self, data):

2472

"""Generate a possibly-compressed representation of data."""

2472

"""Generate a possibly-compressed representation of data."""

2473

if not data:

2473

if not data:

2474

return b'', data

2474

return b'', data

2475

2476

compressed = self._compressor.compress(data)

2476

compressed = self._compressor.compress(data)

2477

2478

if compressed:

2478

if compressed:

2479

# The revlog compressor added the header in the returned data.

2479

# The revlog compressor added the header in the returned data.

2480

return b'', compressed

2480

return b'', compressed

2481

2482

if data[0:1] == b'\0':

2482

if data[0:1] == b'\0':

2483

return b'', data

2483

return b'', data

2484

return b'u', data

2484

return b'u', data

2485

2486

def decompress(self, data):

2486

def decompress(self, data):

2487

"""Decompress a revlog chunk.

2487

"""Decompress a revlog chunk.

2488

2489

The chunk is expected to begin with a header identifying the

2489

The chunk is expected to begin with a header identifying the

2490

format type so it can be routed to an appropriate decompressor.

2490

format type so it can be routed to an appropriate decompressor.

2491

"""

2491

"""

2492

if not data:

2492

if not data:

2493

return data

2493

return data

2494

2495

# Revlogs are read much more frequently than they are written and many

2495

# Revlogs are read much more frequently than they are written and many

2496

# chunks only take microseconds to decompress, so performance is

2496

# chunks only take microseconds to decompress, so performance is

2497

# important here.

2497

# important here.

2498

#

2498

#

2499

# We can make a few assumptions about revlogs:

2499

# We can make a few assumptions about revlogs:

2500

#

2500

#

2501

# 1) the majority of chunks will be compressed (as opposed to inline

2501

# 1) the majority of chunks will be compressed (as opposed to inline

2502

# raw data).

2502

# raw data).

2503

# 2) decompressing *any* data will likely by at least 10x slower than

2503

# 2) decompressing *any* data will likely by at least 10x slower than

2504

# returning raw inline data.

2504

# returning raw inline data.

2505

# 3) we want to prioritize common and officially supported compression

2505

# 3) we want to prioritize common and officially supported compression

2506

# engines

2506

# engines

2507

#

2507

#

2508

# It follows that we want to optimize for "decompress compressed data

2508

# It follows that we want to optimize for "decompress compressed data

2509

# when encoded with common and officially supported compression engines"

2509

# when encoded with common and officially supported compression engines"

2510

# case over "raw data" and "data encoded by less common or non-official

2510

# case over "raw data" and "data encoded by less common or non-official

2511

# compression engines." That is why we have the inline lookup first

2511

# compression engines." That is why we have the inline lookup first

2512

# followed by the compengines lookup.

2512

# followed by the compengines lookup.

2513

#

2513

#

2514

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2514

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2515

# compressed chunks. And this matters for changelog and manifest reads.

2515

# compressed chunks. And this matters for changelog and manifest reads.

2516

t = data[0:1]

2516

t = data[0:1]

2517

2518

if t == b'x':

2518

if t == b'x':

2519

try:

2519

try:

2520

return _zlibdecompress(data)

2520

return _zlibdecompress(data)

2521

except zlib.error as e:

2521

except zlib.error as e:

2522

raise error.RevlogError(

2522

raise error.RevlogError(

2523

_(b'revlog decompress error: %s')

2523

_(b'revlog decompress error: %s')

2524

% stringutil.forcebytestr(e)

2524

% stringutil.forcebytestr(e)

2525

)

2525

)

2526

# '\0' is more common than 'u' so it goes first.

2526

# '\0' is more common than 'u' so it goes first.

2527

elif t == b'\0':

2527

elif t == b'\0':

2528

return data

2528

return data

2529

elif t == b'u':

2529

elif t == b'u':

2530

return util.buffer(data, 1)

2530

return util.buffer(data, 1)

2531

2532

compressor = self._get_decompressor(t)

2532

compressor = self._get_decompressor(t)

2533

2534

return compressor.decompress(data)

2534

return compressor.decompress(data)

2535

2536

def _addrevision(

2536

def _addrevision(

2537

self,

2537

self,

2538

node,

2538

node,

2539

rawtext,

2539

rawtext,

2540

transaction,

2540

transaction,

2541

link,

2541

link,

2542

p1,

2542

p1,

2543

p2,

2543

p2,

2544

flags,

2544

flags,

2545

cachedelta,

2545

cachedelta,

2546

alwayscache=False,

2546

alwayscache=False,

2547

deltacomputer=None,

2547

deltacomputer=None,

2548

sidedata=None,

2548

sidedata=None,

2549

):

2549

):

2550

"""internal function to add revisions to the log

2550

"""internal function to add revisions to the log

2551

2552

see addrevision for argument descriptions.

2552

see addrevision for argument descriptions.

2553

2554

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2554

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2555

2556

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2556

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2557

be used.

2557

be used.

2558

2559

invariants:

2559

invariants:

2560

- rawtext is optional (can be None); if not set, cachedelta must be set.

2560

- rawtext is optional (can be None); if not set, cachedelta must be set.

2561

if both are set, they must correspond to each other.

2561

if both are set, they must correspond to each other.

2562

"""

2562

"""

2563

if node == self.nullid:

2563

if node == self.nullid:

2564

raise error.RevlogError(

2564

raise error.RevlogError(

2565

_(b"%s: attempt to add null revision") % self.display_id

2565

_(b"%s: attempt to add null revision") % self.display_id

2566

)

2566

)

2567

if (

2567

if (

2568

node == self.nodeconstants.wdirid

2568

node == self.nodeconstants.wdirid

2569

or node in self.nodeconstants.wdirfilenodeids

2569

or node in self.nodeconstants.wdirfilenodeids

2570

):

2570

):

2571

raise error.RevlogError(

2571

raise error.RevlogError(

2572

_(b"%s: attempt to add wdir revision") % self.display_id

2572

_(b"%s: attempt to add wdir revision") % self.display_id

2573

)

2573

)

2574

if self._writinghandles is None:

2574

if self._writinghandles is None:

2575

msg = b'adding revision outside `revlog._writing` context'

2575

msg = b'adding revision outside `revlog._writing` context'

2576

raise error.ProgrammingError(msg)

2576

raise error.ProgrammingError(msg)

2577

2578

btext = [rawtext]

2578

btext = [rawtext]

2579

2580

curr = len(self)

2580

curr = len(self)

2581

prev = curr - 1

2581

prev = curr - 1

2582

2583

offset = self._get_data_offset(prev)

2583

offset = self._get_data_offset(prev)

2584

2585

if self._concurrencychecker:

2585

if self._concurrencychecker:

2586

ifh, dfh, sdfh = self._writinghandles

2586

ifh, dfh, sdfh = self._writinghandles

2587

# XXX no checking for the sidedata file

2587

# XXX no checking for the sidedata file

2588

if self._inline:

2588

if self._inline:

2589

# offset is "as if" it were in the .d file, so we need to add on

2589

# offset is "as if" it were in the .d file, so we need to add on

2590

# the size of the entry metadata.

2590

# the size of the entry metadata.

2591

self._concurrencychecker(

2591

self._concurrencychecker(

2592

ifh, self._indexfile, offset + curr * self.index.entry_size

2592

ifh, self._indexfile, offset + curr * self.index.entry_size

2593

)

2593

)

2594

else:

2594

else:

2595

# Entries in the .i are a consistent size.

2595

# Entries in the .i are a consistent size.

2596

self._concurrencychecker(

2596

self._concurrencychecker(

2597

ifh, self._indexfile, curr * self.index.entry_size

2597

ifh, self._indexfile, curr * self.index.entry_size

2598

)

2598

)

2599

self._concurrencychecker(dfh, self._datafile, offset)

2599

self._concurrencychecker(dfh, self._datafile, offset)

2600

2601

p1r, p2r = self.rev(p1), self.rev(p2)

2601

p1r, p2r = self.rev(p1), self.rev(p2)

2602

2603

# full versions are inserted when the needed deltas

2603

# full versions are inserted when the needed deltas

2604

# become comparable to the uncompressed text

2604

# become comparable to the uncompressed text

2605

if rawtext is None:

2605

if rawtext is None:

2606

# need rawtext size, before changed by flag processors, which is

2606

# need rawtext size, before changed by flag processors, which is

2607

# the non-raw size. use revlog explicitly to avoid filelog's extra

2607

# the non-raw size. use revlog explicitly to avoid filelog's extra

2608

# logic that might remove metadata size.

2608

# logic that might remove metadata size.

2609

textlen = mdiff.patchedsize(

2609

textlen = mdiff.patchedsize(

2610

revlog.size(self, cachedelta[0]), cachedelta[1]

2610

revlog.size(self, cachedelta[0]), cachedelta[1]

2611

)

2611

)

2612

else:

2612

else:

2613

textlen = len(rawtext)

2613

textlen = len(rawtext)

2614

2615

if deltacomputer is None:

2615

if deltacomputer is None:

2616

write_debug = None

2616

write_debug = None

2617

if self._debug_delta:

2617

if self._debug_delta:

2618

write_debug = transaction._report

2618

write_debug = transaction._report

2619

deltacomputer = deltautil.deltacomputer(

2619

deltacomputer = deltautil.deltacomputer(

2620

self, write_debug=write_debug

2620

self, write_debug=write_debug

2621

)

2621

)

2622

2623

if cachedelta is not None and len(cachedelta) == 2:

2623

if cachedelta is not None and len(cachedelta) == 2:

2624

# If the cached delta has no information about how it should be

2624

# If the cached delta has no information about how it should be

2625

# reused, add the default reuse instruction according to the

2625

# reused, add the default reuse instruction according to the

2626

# revlog's configuration.

2626

# revlog's configuration.

2627

if self._generaldelta and self._lazydeltabase:

2627

if self._generaldelta and self._lazydeltabase:

2628

delta_base_reuse = DELTA_BASE_REUSE_TRY

2628

delta_base_reuse = DELTA_BASE_REUSE_TRY

2629

else:

2629

else:

2630

delta_base_reuse = DELTA_BASE_REUSE_NO

2630

delta_base_reuse = DELTA_BASE_REUSE_NO

2631

cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)

2631

cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)

2632

2633

revinfo = revlogutils.revisioninfo(

2633

revinfo = revlogutils.revisioninfo(

2634

node,

2634

node,

2635

p1,

2635

p1,

2636

p2,

2636

p2,

2637

btext,

2637

btext,

2638

textlen,

2638

textlen,

2639

cachedelta,

2639

cachedelta,

2640

flags,

2640

flags,

2641

)

2641

)

2642

2643

deltainfo = deltacomputer.finddeltainfo(revinfo)

2643

deltainfo = deltacomputer.finddeltainfo(revinfo)

2644

2645

compression_mode = COMP_MODE_INLINE

2645

compression_mode = COMP_MODE_INLINE

2646

if self._docket is not None:

2646

if self._docket is not None:

2647

default_comp = self._docket.default_compression_header

2647

default_comp = self._docket.default_compression_header

2648

r = deltautil.delta_compression(default_comp, deltainfo)

2648

r = deltautil.delta_compression(default_comp, deltainfo)

2649

compression_mode, deltainfo = r

2649

compression_mode, deltainfo = r

2650

2651

sidedata_compression_mode = COMP_MODE_INLINE

2651

sidedata_compression_mode = COMP_MODE_INLINE

2652

if sidedata and self.hassidedata:

2652

if sidedata and self.hassidedata:

2653

sidedata_compression_mode = COMP_MODE_PLAIN

2653

sidedata_compression_mode = COMP_MODE_PLAIN

2654

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2654

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2655

sidedata_offset = self._docket.sidedata_end

2655

sidedata_offset = self._docket.sidedata_end

2656

h, comp_sidedata = self.compress(serialized_sidedata)

2656

h, comp_sidedata = self.compress(serialized_sidedata)

2657

if (

2657

if (

2658

h != b'u'

2658

h != b'u'

2659

and comp_sidedata[0:1] != b'\0'

2659

and comp_sidedata[0:1] != b'\0'

2660

and len(comp_sidedata) < len(serialized_sidedata)

2660

and len(comp_sidedata) < len(serialized_sidedata)

2661

):

2661

):

2662

assert not h

2662

assert not h

2663

if (

2663

if (

2664

comp_sidedata[0:1]

2664

comp_sidedata[0:1]

2665

== self._docket.default_compression_header

2665

== self._docket.default_compression_header

2666

):

2666

):

2667

sidedata_compression_mode = COMP_MODE_DEFAULT

2667

sidedata_compression_mode = COMP_MODE_DEFAULT

2668

serialized_sidedata = comp_sidedata

2668

serialized_sidedata = comp_sidedata

2669

else:

2669

else:

2670

sidedata_compression_mode = COMP_MODE_INLINE

2670

sidedata_compression_mode = COMP_MODE_INLINE

2671

serialized_sidedata = comp_sidedata

2671

serialized_sidedata = comp_sidedata

2672

else:

2672

else:

2673

serialized_sidedata = b""

2673

serialized_sidedata = b""

2674

# Don't store the offset if the sidedata is empty, that way

2674

# Don't store the offset if the sidedata is empty, that way

2675

# we can easily detect empty sidedata and they will be no different

2675

# we can easily detect empty sidedata and they will be no different

2676

# than ones we manually add.

2676

# than ones we manually add.

2677

sidedata_offset = 0

2677

sidedata_offset = 0

2678

2679

rank = RANK_UNKNOWN

2679

rank = RANK_UNKNOWN

2680

if self._compute_rank:

2680

if self._compute_rank:

2681

if (p1r, p2r) == (nullrev, nullrev):

2681

if (p1r, p2r) == (nullrev, nullrev):

2682

rank = 1

2682

rank = 1

2683

elif p1r != nullrev and p2r == nullrev:

2683

elif p1r != nullrev and p2r == nullrev:

2684

rank = 1 + self.fast_rank(p1r)

2684

rank = 1 + self.fast_rank(p1r)

2685

elif p1r == nullrev and p2r != nullrev:

2685

elif p1r == nullrev and p2r != nullrev:

2686

rank = 1 + self.fast_rank(p2r)

2686

rank = 1 + self.fast_rank(p2r)

2687

else: # merge node

2687

else: # merge node

2688

if rustdagop is not None and self.index.rust_ext_compat:

2688

if rustdagop is not None and self.index.rust_ext_compat:

2689

rank = rustdagop.rank(self.index, p1r, p2r)

2689

rank = rustdagop.rank(self.index, p1r, p2r)

2690

else:

2690

else:

2691

pmin, pmax = sorted((p1r, p2r))

2691

pmin, pmax = sorted((p1r, p2r))

2692

rank = 1 + self.fast_rank(pmax)

2692

rank = 1 + self.fast_rank(pmax)

2693

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2693

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2694

2695

e = revlogutils.entry(

2695

e = revlogutils.entry(

2696

flags=flags,

2696

flags=flags,

2697

data_offset=offset,

2697

data_offset=offset,

2698

data_compressed_length=deltainfo.deltalen,

2698

data_compressed_length=deltainfo.deltalen,

2699

data_uncompressed_length=textlen,

2699

data_uncompressed_length=textlen,

2700

data_compression_mode=compression_mode,

2700

data_compression_mode=compression_mode,

2701

data_delta_base=deltainfo.base,

2701

data_delta_base=deltainfo.base,

2702

link_rev=link,

2702

link_rev=link,

2703

parent_rev_1=p1r,

2703

parent_rev_1=p1r,

2704

parent_rev_2=p2r,

2704

parent_rev_2=p2r,

2705

node_id=node,

2705

node_id=node,

2706

sidedata_offset=sidedata_offset,

2706

sidedata_offset=sidedata_offset,

2707

sidedata_compressed_length=len(serialized_sidedata),

2707

sidedata_compressed_length=len(serialized_sidedata),

2708

sidedata_compression_mode=sidedata_compression_mode,

2708

sidedata_compression_mode=sidedata_compression_mode,

2709

rank=rank,

2709

rank=rank,

2710

)

2710

)

2711

2712

self.index.append(e)

2712

self.index.append(e)

2713

entry = self.index.entry_binary(curr)

2713

entry = self.index.entry_binary(curr)

2714

if curr == 0 and self._docket is None:

2714

if curr == 0 and self._docket is None:

2715

header = self._format_flags | self._format_version

2715

header = self._format_flags | self._format_version

2716

header = self.index.pack_header(header)

2716

header = self.index.pack_header(header)

2717

entry = header + entry

2717

entry = header + entry

2718

self._writeentry(

2718

self._writeentry(

2719

transaction,

2719

transaction,

2720

entry,

2720

entry,

2721

deltainfo.data,

2721

deltainfo.data,

2722

link,

2722

link,

2723

offset,

2723

offset,

2724

serialized_sidedata,

2724

serialized_sidedata,

2725

sidedata_offset,

2725

sidedata_offset,

2726

)

2726

)

2727

2728

rawtext = btext[0]

2728

rawtext = btext[0]

2729

2730

if alwayscache and rawtext is None:

2730

if alwayscache and rawtext is None:

2731

rawtext = deltacomputer.buildtext(revinfo)

2731

rawtext = deltacomputer.buildtext(revinfo)

2732

2733

if type(rawtext) == bytes: # only accept immutable objects

2733

if type(rawtext) == bytes: # only accept immutable objects

2734

self._revisioncache = (node, curr, rawtext)

2734

self._revisioncache = (node, curr, rawtext)

2735

self._chainbasecache[curr] = deltainfo.chainbase

2735

self._chainbasecache[curr] = deltainfo.chainbase

2736

return curr

2736

return curr

2737

2738

def _get_data_offset(self, prev):

2738

def _get_data_offset(self, prev):

2739

"""Returns the current offset in the (in-transaction) data file.

2739

"""Returns the current offset in the (in-transaction) data file.

2740

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2740

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2741

file to store that information: since sidedata can be rewritten to the

2741

file to store that information: since sidedata can be rewritten to the

2742

end of the data file within a transaction, you can have cases where, for

2742

end of the data file within a transaction, you can have cases where, for

2743

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2743

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2744

to `n - 1`'s sidedata being written after `n`'s data.

2744

to `n - 1`'s sidedata being written after `n`'s data.

2745

2746

TODO cache this in a docket file before getting out of experimental."""

2746

TODO cache this in a docket file before getting out of experimental."""

2747

if self._docket is None:

2747

if self._docket is None:

2748

return self.end(prev)

2748

return self.end(prev)

2749

else:

2749

else:

2750

return self._docket.data_end

2750

return self._docket.data_end

2751

2752

def _writeentry(

2752

def _writeentry(

2753

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2753

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2754

):

2754

):

2755

# Files opened in a+ mode have inconsistent behavior on various

2755

# Files opened in a+ mode have inconsistent behavior on various

2756

# platforms. Windows requires that a file positioning call be made

2756

# platforms. Windows requires that a file positioning call be made

2757

# when the file handle transitions between reads and writes. See

2757

# when the file handle transitions between reads and writes. See

2758

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2758

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2759

# platforms, Python or the platform itself can be buggy. Some versions

2759

# platforms, Python or the platform itself can be buggy. Some versions

2760

# of Solaris have been observed to not append at the end of the file

2760

# of Solaris have been observed to not append at the end of the file

2761

# if the file was seeked to before the end. See issue4943 for more.

2761

# if the file was seeked to before the end. See issue4943 for more.

2762

#

2762

#

2763

# We work around this issue by inserting a seek() before writing.

2763

# We work around this issue by inserting a seek() before writing.

2764

# Note: This is likely not necessary on Python 3. However, because

2764

# Note: This is likely not necessary on Python 3. However, because

2765

# the file handle is reused for reads and may be seeked there, we need

2765

# the file handle is reused for reads and may be seeked there, we need

2766

# to be careful before changing this.

2766

# to be careful before changing this.

2767

if self._writinghandles is None:

2767

if self._writinghandles is None:

2768

msg = b'adding revision outside `revlog._writing` context'

2768

msg = b'adding revision outside `revlog._writing` context'

2769

raise error.ProgrammingError(msg)

2769

raise error.ProgrammingError(msg)

2770

ifh, dfh, sdfh = self._writinghandles

2770

ifh, dfh, sdfh = self._writinghandles

2771

if self._docket is None:

2771

if self._docket is None:

2772

ifh.seek(0, os.SEEK_END)

2772

ifh.seek(0, os.SEEK_END)

2773

else:

2773

else:

2774

ifh.seek(self._docket.index_end, os.SEEK_SET)

2774

ifh.seek(self._docket.index_end, os.SEEK_SET)

2775

if dfh:

2775

if dfh:

2776

if self._docket is None:

2776

if self._docket is None:

2777

dfh.seek(0, os.SEEK_END)

2777

dfh.seek(0, os.SEEK_END)

2778

else:

2778

else:

2779

dfh.seek(self._docket.data_end, os.SEEK_SET)

2779

dfh.seek(self._docket.data_end, os.SEEK_SET)

2780

if sdfh:

2780

if sdfh:

2781

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2781

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2782

2783

curr = len(self) - 1

2783

curr = len(self) - 1

2784

if not self._inline:

2784

if not self._inline:

2785

transaction.add(self._datafile, offset)

2785

transaction.add(self._datafile, offset)

2786

if self._sidedatafile:

2786

if self._sidedatafile:

2787

transaction.add(self._sidedatafile, sidedata_offset)

2787

transaction.add(self._sidedatafile, sidedata_offset)

2788

transaction.add(self._indexfile, curr * len(entry))

2788

transaction.add(self._indexfile, curr * len(entry))

2789

if data[0]:

2789

if data[0]:

2790

dfh.write(data[0])

2790

dfh.write(data[0])

2791

dfh.write(data[1])

2791

dfh.write(data[1])

2792

if sidedata:

2792

if sidedata:

2793

sdfh.write(sidedata)

2793

sdfh.write(sidedata)

2794

ifh.write(entry)

2794

ifh.write(entry)

2795

else:

2795

else:

2796

offset += curr * self.index.entry_size

2796

offset += curr * self.index.entry_size

2797

transaction.add(self._indexfile, offset)

2797

transaction.add(self._indexfile, offset)

2798

ifh.write(entry)

2798

ifh.write(entry)

2799

ifh.write(data[0])

2799

ifh.write(data[0])

2800

ifh.write(data[1])

2800

ifh.write(data[1])

2801

assert not sidedata

2801

assert not sidedata

2802

self._enforceinlinesize(transaction)

2802

self._enforceinlinesize(transaction)

2803

if self._docket is not None:

2803

if self._docket is not None:

2804

# revlog-v2 always has 3 writing handles, help Pytype

2804

# revlog-v2 always has 3 writing handles, help Pytype

2805

wh1 = self._writinghandles[0]

2805

wh1 = self._writinghandles[0]

2806

wh2 = self._writinghandles[1]

2806

wh2 = self._writinghandles[1]

2807

wh3 = self._writinghandles[2]

2807

wh3 = self._writinghandles[2]

2808

assert wh1 is not None

2808

assert wh1 is not None

2809

assert wh2 is not None

2809

assert wh2 is not None

2810

assert wh3 is not None

2810

assert wh3 is not None

2811

self._docket.index_end = wh1.tell()

2811

self._docket.index_end = wh1.tell()

2812

self._docket.data_end = wh2.tell()

2812

self._docket.data_end = wh2.tell()

2813

self._docket.sidedata_end = wh3.tell()

2813

self._docket.sidedata_end = wh3.tell()

2814

2815

nodemaputil.setup_persistent_nodemap(transaction, self)

2815

nodemaputil.setup_persistent_nodemap(transaction, self)

2816

2817

def addgroup(

2817

def addgroup(

2818

self,

2818

self,

2819

deltas,

2819

deltas,

2820

linkmapper,

2820

linkmapper,

2821

transaction,

2821

transaction,

2822

alwayscache=False,

2822

alwayscache=False,

2823

addrevisioncb=None,

2823

addrevisioncb=None,

2824

duplicaterevisioncb=None,

2824

duplicaterevisioncb=None,

2825

debug_info=None,

2825

debug_info=None,

2826

delta_base_reuse_policy=None,

2826

delta_base_reuse_policy=None,

2827

):

2827

):

2828

"""

2828

"""

2829

add a delta group

2829

add a delta group

2830

2831

given a set of deltas, add them to the revision log. the

2831

given a set of deltas, add them to the revision log. the

2832

first delta is against its parent, which should be in our

2832

first delta is against its parent, which should be in our

2833

log, the rest are against the previous delta.

2833

log, the rest are against the previous delta.

2834

2835

If ``addrevisioncb`` is defined, it will be called with arguments of

2835

If ``addrevisioncb`` is defined, it will be called with arguments of

2836

this revlog and the node that was added.

2836

this revlog and the node that was added.

2837

"""

2837

"""

2838

2839

if self._adding_group:

2839

if self._adding_group:

2840

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2840

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2841

2842

# read the default delta-base reuse policy from revlog config if the

2842

# read the default delta-base reuse policy from revlog config if the

2843

# group did not specify one.

2843

# group did not specify one.

2844

if delta_base_reuse_policy is None:

2844

if delta_base_reuse_policy is None:

2845

if self._generaldelta and self._lazydeltabase:

2845

if self._generaldelta and self._lazydeltabase:

2846

delta_base_reuse_policy = DELTA_BASE_REUSE_TRY

2846

delta_base_reuse_policy = DELTA_BASE_REUSE_TRY

2847

else:

2847

else:

2848

delta_base_reuse_policy = DELTA_BASE_REUSE_NO

2848

delta_base_reuse_policy = DELTA_BASE_REUSE_NO

2849

2850

self._adding_group = True

2850

self._adding_group = True

2851

empty = True

2851

empty = True

2852

try:

2852

try:

2853

with self._writing(transaction):

2853

with self._writing(transaction):

2854

write_debug = None

2854

write_debug = None

2855

if self._debug_delta:

2855

if self._debug_delta:

2856

write_debug = transaction._report

2856

write_debug = transaction._report

2857

deltacomputer = deltautil.deltacomputer(

2857

deltacomputer = deltautil.deltacomputer(

2858

self,

2858

self,

2859

write_debug=write_debug,

2859

write_debug=write_debug,

2860

debug_info=debug_info,

2860

debug_info=debug_info,

2861

)

2861

)

2862

# loop through our set of deltas

2862

# loop through our set of deltas

2863

for data in deltas:

2863

for data in deltas:

2864

(

2864

(

2865

node,

2865

node,

2866

p1,

2866

p1,

2867

p2,

2867

p2,

2868

linknode,

2868

linknode,

2869

deltabase,

2869

deltabase,

2870

delta,

2870

delta,

2871

flags,

2871

flags,

2872

sidedata,

2872

sidedata,

2873

) = data

2873

) = data

2874

link = linkmapper(linknode)

2874

link = linkmapper(linknode)

2875

flags = flags or REVIDX_DEFAULT_FLAGS

2875

flags = flags or REVIDX_DEFAULT_FLAGS

2876

2877

rev = self.index.get_rev(node)

2877

rev = self.index.get_rev(node)

2878

if rev is not None:

2878

if rev is not None:

2879

# this can happen if two branches make the same change

2879

# this can happen if two branches make the same change

2880

self._nodeduplicatecallback(transaction, rev)

2880

self._nodeduplicatecallback(transaction, rev)

2881

if duplicaterevisioncb:

2881

if duplicaterevisioncb:

2882

duplicaterevisioncb(self, rev)

2882

duplicaterevisioncb(self, rev)

2883

empty = False

2883

empty = False

2884

continue

2884

continue

2885

2886

for p in (p1, p2):

2886

for p in (p1, p2):

2887

if not self.index.has_node(p):

2887

if not self.index.has_node(p):

2888

raise error.LookupError(

2888

raise error.LookupError(

2889

p, self.radix, _(b'unknown parent')

2889

p, self.radix, _(b'unknown parent')

2890

)

2890

)

2891

2892

if not self.index.has_node(deltabase):

2892

if not self.index.has_node(deltabase):

2893

raise error.LookupError(

2893

raise error.LookupError(

2894

deltabase, self.display_id, _(b'unknown delta base')

2894

deltabase, self.display_id, _(b'unknown delta base')

2895

)

2895

)

2896

2897

baserev = self.rev(deltabase)

2897

baserev = self.rev(deltabase)

2898

2899

if baserev != nullrev and self.iscensored(baserev):

2899

if baserev != nullrev and self.iscensored(baserev):

2900

# if base is censored, delta must be full replacement in a

2900

# if base is censored, delta must be full replacement in a

2901

# single patch operation

2901

# single patch operation

2902

hlen = struct.calcsize(b">lll")

2902

hlen = struct.calcsize(b">lll")

2903

oldlen = self.rawsize(baserev)

2903

oldlen = self.rawsize(baserev)

2904

newlen = len(delta) - hlen

2904

newlen = len(delta) - hlen

2905

if delta[:hlen] != mdiff.replacediffheader(

2905

if delta[:hlen] != mdiff.replacediffheader(

2906

oldlen, newlen

2906

oldlen, newlen

2907

):

2907

):

2908

raise error.CensoredBaseError(

2908

raise error.CensoredBaseError(

2909

self.display_id, self.node(baserev)

2909

self.display_id, self.node(baserev)

2910

)

2910

)

2911

2912

if not flags and self._peek_iscensored(baserev, delta):

2912

if not flags and self._peek_iscensored(baserev, delta):

2913

flags |= REVIDX_ISCENSORED

2913

flags |= REVIDX_ISCENSORED

2914

2915

# We assume consumers of addrevisioncb will want to retrieve

2915

# We assume consumers of addrevisioncb will want to retrieve

2916

# the added revision, which will require a call to

2916

# the added revision, which will require a call to

2917

# revision(). revision() will fast path if there is a cache

2917

# revision(). revision() will fast path if there is a cache

2918

# hit. So, we tell _addrevision() to always cache in this case.

2918

# hit. So, we tell _addrevision() to always cache in this case.

2919

# We're only using addgroup() in the context of changegroup

2919

# We're only using addgroup() in the context of changegroup

2920

# generation so the revision data can always be handled as raw

2920

# generation so the revision data can always be handled as raw

2921

# by the flagprocessor.

2921

# by the flagprocessor.

2922

rev = self._addrevision(

2922

rev = self._addrevision(

2923

node,

2923

node,

2924

None,

2924

None,

2925

transaction,

2925

transaction,

2926

link,

2926

link,

2927

p1,

2927

p1,

2928

p2,

2928

p2,

2929

flags,

2929

flags,

2930

(baserev, delta, delta_base_reuse_policy),

2930

(baserev, delta, delta_base_reuse_policy),

2931

alwayscache=alwayscache,

2931

alwayscache=alwayscache,

2932

deltacomputer=deltacomputer,

2932

deltacomputer=deltacomputer,

2933

sidedata=sidedata,

2933

sidedata=sidedata,

2934

)

2934

)

2935

2936

if addrevisioncb:

2936

if addrevisioncb:

2937

addrevisioncb(self, rev)

2937

addrevisioncb(self, rev)

2938

empty = False

2938

empty = False

2939

finally:

2939

finally:

2940

self._adding_group = False

2940

self._adding_group = False

2941

return not empty

2941

return not empty

2942

2943

def iscensored(self, rev):

2943

def iscensored(self, rev):

2944

"""Check if a file revision is censored."""

2944

"""Check if a file revision is censored."""

2945

if not self._censorable:

2945

if not self._censorable:

2946

return False

2946

return False

2947

2948

return self.flags(rev) & REVIDX_ISCENSORED

2948

return self.flags(rev) & REVIDX_ISCENSORED

2949

2950

def _peek_iscensored(self, baserev, delta):

2950

def _peek_iscensored(self, baserev, delta):

2951

"""Quickly check if a delta produces a censored revision."""

2951

"""Quickly check if a delta produces a censored revision."""

2952

if not self._censorable:

2952

if not self._censorable:

2953

return False

2953

return False

2954

2955

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2955

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2956

2957

def getstrippoint(self, minlink):

2957

def getstrippoint(self, minlink):

2958

"""find the minimum rev that must be stripped to strip the linkrev

2958

"""find the minimum rev that must be stripped to strip the linkrev

2959

2960

Returns a tuple containing the minimum rev and a set of all revs that

2960

Returns a tuple containing the minimum rev and a set of all revs that

2961

have linkrevs that will be broken by this strip.

2961

have linkrevs that will be broken by this strip.

2962

"""

2962

"""

2963

return storageutil.resolvestripinfo(

2963

return storageutil.resolvestripinfo(

2964

minlink,

2964

minlink,

2965

len(self) - 1,

2965

len(self) - 1,

2966

self.headrevs(),

2966

self.headrevs(),

2967

self.linkrev,

2967

self.linkrev,

2968

self.parentrevs,

2968

self.parentrevs,

2969

)

2969

)

2970

2971

def strip(self, minlink, transaction):

2971

def strip(self, minlink, transaction):

2972

"""truncate the revlog on the first revision with a linkrev >= minlink

2972

"""truncate the revlog on the first revision with a linkrev >= minlink

2973

2974

This function is called when we're stripping revision minlink and

2974

This function is called when we're stripping revision minlink and

2975

its descendants from the repository.

2975

its descendants from the repository.

2976

2977

We have to remove all revisions with linkrev >= minlink, because

2977

We have to remove all revisions with linkrev >= minlink, because

2978

the equivalent changelog revisions will be renumbered after the

2978

the equivalent changelog revisions will be renumbered after the

2979

strip.

2979

strip.

2980

2981

So we truncate the revlog on the first of these revisions, and

2981

So we truncate the revlog on the first of these revisions, and

2982

trust that the caller has saved the revisions that shouldn't be

2982

trust that the caller has saved the revisions that shouldn't be

2983

removed and that it'll re-add them after this truncation.

2983

removed and that it'll re-add them after this truncation.

2984

"""

2984

"""

2985

if len(self) == 0:

2985

if len(self) == 0:

2986

return

2986

return

2987

2988

rev, _ = self.getstrippoint(minlink)

2988

rev, _ = self.getstrippoint(minlink)

2989

if rev == len(self):

2989

if rev == len(self):

2990

return

2990

return

2991

2992

# first truncate the files on disk

2992

# first truncate the files on disk

2993

data_end = self.start(rev)

2993

data_end = self.start(rev)

2994

if not self._inline:

2994

if not self._inline:

2995

transaction.add(self._datafile, data_end)

2995

transaction.add(self._datafile, data_end)

2996

end = rev * self.index.entry_size

2996

end = rev * self.index.entry_size

2997

else:

2997

else:

2998

end = data_end + (rev * self.index.entry_size)

2998

end = data_end + (rev * self.index.entry_size)

2999

3000

if self._sidedatafile:

3000

if self._sidedatafile:

3001

sidedata_end = self.sidedata_cut_off(rev)

3001

sidedata_end = self.sidedata_cut_off(rev)

3002

transaction.add(self._sidedatafile, sidedata_end)

3002

transaction.add(self._sidedatafile, sidedata_end)

3003

3004

transaction.add(self._indexfile, end)

3004

transaction.add(self._indexfile, end)

3005

if self._docket is not None:

3005

if self._docket is not None:

3006

# XXX we could, leverage the docket while stripping. However it is

3006

# XXX we could, leverage the docket while stripping. However it is

3007

# not powerfull enough at the time of this comment

3007

# not powerfull enough at the time of this comment

3008

self._docket.index_end = end

3008

self._docket.index_end = end

3009

self._docket.data_end = data_end

3009

self._docket.data_end = data_end

3010

self._docket.sidedata_end = sidedata_end

3010

self._docket.sidedata_end = sidedata_end

3011

self._docket.write(transaction, stripping=True)

3011

self._docket.write(transaction, stripping=True)

3012

3013

# then reset internal state in memory to forget those revisions

3013

# then reset internal state in memory to forget those revisions

3014

self._revisioncache = None

3014

self._revisioncache = None

3015

self._chaininfocache = util.lrucachedict(500)

3015

self._chaininfocache = util.lrucachedict(500)

3016

self._segmentfile.clear_cache()

3016

self._segmentfile.clear_cache()

3017

self._segmentfile_sidedata.clear_cache()

3017

self._segmentfile_sidedata.clear_cache()

3018

3019

del self.index[rev:-1]

3019

del self.index[rev:-1]

3020

3021

def checksize(self):

3021

def checksize(self):

3022

"""Check size of index and data files

3022

"""Check size of index and data files

3023

3024

return a (dd, di) tuple.

3024

return a (dd, di) tuple.

3025

- dd: extra bytes for the "data" file

3025

- dd: extra bytes for the "data" file

3026

- di: extra bytes for the "index" file

3026

- di: extra bytes for the "index" file

3027

3028

A healthy revlog will return (0, 0).

3028

A healthy revlog will return (0, 0).

3029

"""

3029

"""

3030

expected = 0

3030

expected = 0

3031

if len(self):

3031

if len(self):

3032

expected = max(0, self.end(len(self) - 1))

3032

expected = max(0, self.end(len(self) - 1))

3033

3034

try:

3034

try:

3035

with self._datafp() as f:

3035

with self._datafp() as f:

3036

f.seek(0, io.SEEK_END)

3036

f.seek(0, io.SEEK_END)

3037

actual = f.tell()

3037

actual = f.tell()

3038

dd = actual - expected

3038

dd = actual - expected

3039

except FileNotFoundError:

3039

except FileNotFoundError:

3040

dd = 0

3040

dd = 0

3041

3042

try:

3042

try:

3043

f = self.opener(self._indexfile)

3043

f = self.opener(self._indexfile)

3044

f.seek(0, io.SEEK_END)

3044

f.seek(0, io.SEEK_END)

3045

actual = f.tell()

3045

actual = f.tell()

3046

f.close()

3046

f.close()

3047

s = self.index.entry_size

3047

s = self.index.entry_size

3048

i = max(0, actual // s)

3048

i = max(0, actual // s)

3049

di = actual - (i * s)

3049

di = actual - (i * s)

3050

if self._inline:

3050

if self._inline:

3051

databytes = 0

3051

databytes = 0

3052

for r in self:

3052

for r in self:

3053

databytes += max(0, self.length(r))

3053

databytes += max(0, self.length(r))

3054

dd = 0

3054

dd = 0

3055

di = actual - len(self) * s - databytes

3055

di = actual - len(self) * s - databytes

3056

except FileNotFoundError:

3056

except FileNotFoundError:

3057

di = 0

3057

di = 0

3058

3059

return (dd, di)

3059

return (dd, di)

3060

3061

def files(self):

3061

def files(self):

3062

res = [self._indexfile]

3062

res = [self._indexfile]

3063

if self._docket_file is None:

3063

if self._docket_file is None:

3064

if not self._inline:

3064

if not self._inline:

3065

res.append(self._datafile)

3065

res.append(self._datafile)

3066

else:

3066

else:

3067

res.append(self._docket_file)

3067

res.append(self._docket_file)

3068

res.extend(self._docket.old_index_filepaths(include_empty=False))

3068

res.extend(self._docket.old_index_filepaths(include_empty=False))

3069

if self._docket.data_end:

3069

if self._docket.data_end:

3070

res.append(self._datafile)

3070

res.append(self._datafile)

3071

res.extend(self._docket.old_data_filepaths(include_empty=False))

3071

res.extend(self._docket.old_data_filepaths(include_empty=False))

3072

if self._docket.sidedata_end:

3072

if self._docket.sidedata_end:

3073

res.append(self._sidedatafile)

3073

res.append(self._sidedatafile)

3074

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

3074

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

3075

return res

3075

return res

3076

3077

def emitrevisions(

3077

def emitrevisions(

3078

self,

3078

self,

3079

nodes,

3079

nodes,

3080

nodesorder=None,

3080

nodesorder=None,

3081

revisiondata=False,

3081

revisiondata=False,

3082

assumehaveparentrevisions=False,

3082

assumehaveparentrevisions=False,

3083

deltamode=repository.CG_DELTAMODE_STD,

3083

deltamode=repository.CG_DELTAMODE_STD,

3084

sidedata_helpers=None,

3084

sidedata_helpers=None,

3085

debug_info=None,

3085

debug_info=None,

3086

):

3086

):

3087

if nodesorder not in (b'nodes', b'storage', b'linear', None):

3087

if nodesorder not in (b'nodes', b'storage', b'linear', None):

3088

raise error.ProgrammingError(

3088

raise error.ProgrammingError(

3089

b'unhandled value for nodesorder: %s' % nodesorder

3089

b'unhandled value for nodesorder: %s' % nodesorder

3090

)

3090

)

3091

3092

if nodesorder is None and not self._generaldelta:

3092

if nodesorder is None and not self._generaldelta:

3093

nodesorder = b'storage'

3093

nodesorder = b'storage'

3094

3095

if (

3095

if (

3096

not self._storedeltachains

3096

not self._storedeltachains

3097

and deltamode != repository.CG_DELTAMODE_PREV

3097

and deltamode != repository.CG_DELTAMODE_PREV

3098

):

3098

):

3099

deltamode = repository.CG_DELTAMODE_FULL

3099

deltamode = repository.CG_DELTAMODE_FULL

3100

3101

return storageutil.emitrevisions(

3101

return storageutil.emitrevisions(

3102

self,

3102

self,

3103

nodes,

3103

nodes,

3104

nodesorder,

3104

nodesorder,

3105

revlogrevisiondelta,

3105

revlogrevisiondelta,

3106

deltaparentfn=self.deltaparent,

3106

deltaparentfn=self.deltaparent,

3107

candeltafn=self._candelta,

3107

candeltafn=self._candelta,

3108

rawsizefn=self.rawsize,

3108

rawsizefn=self.rawsize,

3109

revdifffn=self.revdiff,

3109

revdifffn=self.revdiff,

3110

flagsfn=self.flags,

3110

flagsfn=self.flags,

3111

deltamode=deltamode,

3111

deltamode=deltamode,

3112

revisiondata=revisiondata,

3112

revisiondata=revisiondata,

3113

assumehaveparentrevisions=assumehaveparentrevisions,

3113

assumehaveparentrevisions=assumehaveparentrevisions,

3114

sidedata_helpers=sidedata_helpers,

3114

sidedata_helpers=sidedata_helpers,

3115

debug_info=debug_info,

3115

debug_info=debug_info,

3116

)

3116

)

3117

3118

DELTAREUSEALWAYS = b'always'

3118

DELTAREUSEALWAYS = b'always'

3119

DELTAREUSESAMEREVS = b'samerevs'

3119

DELTAREUSESAMEREVS = b'samerevs'

3120

DELTAREUSENEVER = b'never'

3120

DELTAREUSENEVER = b'never'

3121

3122

DELTAREUSEFULLADD = b'fulladd'

3122

DELTAREUSEFULLADD = b'fulladd'

3123

3124

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

3124

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

3125

3126

def clone(

3126

def clone(

3127

self,

3127

self,

3128

tr,

3128

tr,

3129

destrevlog,

3129

destrevlog,

3130

addrevisioncb=None,

3130

addrevisioncb=None,

3131

deltareuse=DELTAREUSESAMEREVS,

3131

deltareuse=DELTAREUSESAMEREVS,

3132

forcedeltabothparents=None,

3132

forcedeltabothparents=None,

3133

sidedata_helpers=None,

3133

sidedata_helpers=None,

3134

):

3134

):

3135

"""Copy this revlog to another, possibly with format changes.

3135

"""Copy this revlog to another, possibly with format changes.

3136

3137

The destination revlog will contain the same revisions and nodes.

3137

The destination revlog will contain the same revisions and nodes.

3138

However, it may not be bit-for-bit identical due to e.g. delta encoding

3138

However, it may not be bit-for-bit identical due to e.g. delta encoding

3139

differences.

3139

differences.

3140

3141

The ``deltareuse`` argument control how deltas from the existing revlog

3141

The ``deltareuse`` argument control how deltas from the existing revlog

3142

are preserved in the destination revlog. The argument can have the

3142

are preserved in the destination revlog. The argument can have the

3143

following values:

3143

following values:

3144

3145

DELTAREUSEALWAYS

3145

DELTAREUSEALWAYS

3146

Deltas will always be reused (if possible), even if the destination

3146

Deltas will always be reused (if possible), even if the destination

3147

revlog would not select the same revisions for the delta. This is the

3147

revlog would not select the same revisions for the delta. This is the

3148

fastest mode of operation.

3148

fastest mode of operation.

3149

DELTAREUSESAMEREVS

3149

DELTAREUSESAMEREVS

3150

Deltas will be reused if the destination revlog would pick the same

3150

Deltas will be reused if the destination revlog would pick the same

3151

revisions for the delta. This mode strikes a balance between speed

3151

revisions for the delta. This mode strikes a balance between speed

3152

and optimization.

3152

and optimization.

3153

DELTAREUSENEVER

3153

DELTAREUSENEVER

3154

Deltas will never be reused. This is the slowest mode of execution.

3154

Deltas will never be reused. This is the slowest mode of execution.

3155

This mode can be used to recompute deltas (e.g. if the diff/delta

3155

This mode can be used to recompute deltas (e.g. if the diff/delta

3156

algorithm changes).

3156

algorithm changes).

3157

DELTAREUSEFULLADD

3157

DELTAREUSEFULLADD

3158

Revision will be re-added as if their were new content. This is

3158

Revision will be re-added as if their were new content. This is

3159

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3159

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3160

eg: large file detection and handling.

3160

eg: large file detection and handling.

3161

3162

Delta computation can be slow, so the choice of delta reuse policy can

3162

Delta computation can be slow, so the choice of delta reuse policy can

3163

significantly affect run time.

3163

significantly affect run time.

3164

3165

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3165

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3166

two extremes. Deltas will be reused if they are appropriate. But if the

3166

two extremes. Deltas will be reused if they are appropriate. But if the

3167

delta could choose a better revision, it will do so. This means if you

3167

delta could choose a better revision, it will do so. This means if you

3168

are converting a non-generaldelta revlog to a generaldelta revlog,

3168

are converting a non-generaldelta revlog to a generaldelta revlog,

3169

deltas will be recomputed if the delta's parent isn't a parent of the

3169

deltas will be recomputed if the delta's parent isn't a parent of the

3170

revision.

3170

revision.

3171

3172

In addition to the delta policy, the ``forcedeltabothparents``

3172

In addition to the delta policy, the ``forcedeltabothparents``

3173

argument controls whether to force compute deltas against both parents

3173

argument controls whether to force compute deltas against both parents

3174

for merges. By default, the current default is used.

3174

for merges. By default, the current default is used.

3175

3176

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3176

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3177

`sidedata_helpers`.

3177

`sidedata_helpers`.

3178

"""

3178

"""

3179

if deltareuse not in self.DELTAREUSEALL:

3179

if deltareuse not in self.DELTAREUSEALL:

3180

raise ValueError(

3180

raise ValueError(

3181

_(b'value for deltareuse invalid: %s') % deltareuse

3181

_(b'value for deltareuse invalid: %s') % deltareuse

3182

)

3182

)

3183

3184

if len(destrevlog):

3184

if len(destrevlog):

3185

raise ValueError(_(b'destination revlog is not empty'))

3185

raise ValueError(_(b'destination revlog is not empty'))

3186

3187

if getattr(self, 'filteredrevs', None):

3187

if getattr(self, 'filteredrevs', None):

3188

raise ValueError(_(b'source revlog has filtered revisions'))

3188

raise ValueError(_(b'source revlog has filtered revisions'))

3189

if getattr(destrevlog, 'filteredrevs', None):

3189

if getattr(destrevlog, 'filteredrevs', None):

3190

raise ValueError(_(b'destination revlog has filtered revisions'))

3190

raise ValueError(_(b'destination revlog has filtered revisions'))

3191

3192

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3192

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3193

# if possible.

3193

# if possible.

3194

oldlazydelta = destrevlog._lazydelta

3194

oldlazydelta = destrevlog._lazydelta

3195

oldlazydeltabase = destrevlog._lazydeltabase

3195

oldlazydeltabase = destrevlog._lazydeltabase

3196

oldamd = destrevlog._deltabothparents

3196

oldamd = destrevlog._deltabothparents

3197

3198

try:

3198

try:

3199

if deltareuse == self.DELTAREUSEALWAYS:

3199

if deltareuse == self.DELTAREUSEALWAYS:

3200

destrevlog._lazydeltabase = True

3200

destrevlog._lazydeltabase = True

3201

destrevlog._lazydelta = True

3201

destrevlog._lazydelta = True

3202

elif deltareuse == self.DELTAREUSESAMEREVS:

3202

elif deltareuse == self.DELTAREUSESAMEREVS:

3203

destrevlog._lazydeltabase = False

3203

destrevlog._lazydeltabase = False

3204

destrevlog._lazydelta = True

3204

destrevlog._lazydelta = True

3205

elif deltareuse == self.DELTAREUSENEVER:

3205

elif deltareuse == self.DELTAREUSENEVER:

3206

destrevlog._lazydeltabase = False

3206

destrevlog._lazydeltabase = False

3207

destrevlog._lazydelta = False

3207

destrevlog._lazydelta = False

3208

3209

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3209

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3210

3211

with self.reading():

3211

with self.reading():

3212

self._clone(

3212

self._clone(

3213

tr,

3213

tr,

3214

destrevlog,

3214

destrevlog,

3215

addrevisioncb,

3215

addrevisioncb,

3216

deltareuse,

3216

deltareuse,

3217

forcedeltabothparents,

3217

forcedeltabothparents,

3218

sidedata_helpers,

3218

sidedata_helpers,

3219

)

3219

)

3220

3221

finally:

3221

finally:

3222

destrevlog._lazydelta = oldlazydelta

3222

destrevlog._lazydelta = oldlazydelta

3223

destrevlog._lazydeltabase = oldlazydeltabase

3223

destrevlog._lazydeltabase = oldlazydeltabase

3224

destrevlog._deltabothparents = oldamd

3224

destrevlog._deltabothparents = oldamd

3225

3226

def _clone(

3226

def _clone(

3227

self,

3227

self,

3228

tr,

3228

tr,

3229

destrevlog,

3229

destrevlog,

3230

addrevisioncb,

3230

addrevisioncb,

3231

deltareuse,

3231

deltareuse,

3232

forcedeltabothparents,

3232

forcedeltabothparents,

3233

sidedata_helpers,

3233

sidedata_helpers,

3234

):

3234

):

3235

"""perform the core duty of `revlog.clone` after parameter processing"""

3235

"""perform the core duty of `revlog.clone` after parameter processing"""

3236

write_debug = None

3236

write_debug = None

3237

if self._debug_delta:

3237

if self._debug_delta:

3238

write_debug = tr._report

3238

write_debug = tr._report

3239

deltacomputer = deltautil.deltacomputer(

3239

deltacomputer = deltautil.deltacomputer(

3240

destrevlog,

3240

destrevlog,

3241

write_debug=write_debug,

3241

write_debug=write_debug,

3242

)

3242

)

3243

index = self.index

3243

index = self.index

3244

for rev in self:

3244

for rev in self:

3245

entry = index[rev]

3245

entry = index[rev]

3246

3247

# Some classes override linkrev to take filtered revs into

3247

# Some classes override linkrev to take filtered revs into

3248

# account. Use raw entry from index.

3248

# account. Use raw entry from index.

3249

flags = entry[0] & 0xFFFF

3249

flags = entry[0] & 0xFFFF

3250

linkrev = entry[4]

3250

linkrev = entry[4]

3251

p1 = index[entry[5]][7]

3251

p1 = index[entry[5]][7]

3252

p2 = index[entry[6]][7]

3252

p2 = index[entry[6]][7]

3253

node = entry[7]

3253

node = entry[7]

3254

3255

# (Possibly) reuse the delta from the revlog if allowed and

3255

# (Possibly) reuse the delta from the revlog if allowed and

3256

# the revlog chunk is a delta.

3256

# the revlog chunk is a delta.

3257

cachedelta = None

3257

cachedelta = None

3258

rawtext = None

3258

rawtext = None

3259

if deltareuse == self.DELTAREUSEFULLADD:

3259

if deltareuse == self.DELTAREUSEFULLADD:

3260

text = self._revisiondata(rev)

3260

text = self._revisiondata(rev)

3261

sidedata = self.sidedata(rev)

3261

sidedata = self.sidedata(rev)

3262

3263

if sidedata_helpers is not None:

3263

if sidedata_helpers is not None:

3264

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3264

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3265

self, sidedata_helpers, sidedata, rev

3265

self, sidedata_helpers, sidedata, rev

3266

)

3266

)

3267

flags = flags | new_flags[0] & ~new_flags[1]

3267

flags = flags | new_flags[0] & ~new_flags[1]

3268

3269

destrevlog.addrevision(

3269

destrevlog.addrevision(

3270

text,

3270

text,

3271

tr,

3271

tr,

3272

linkrev,

3272

linkrev,

3273

p1,

3273

p1,

3274

p2,

3274

p2,

3275

cachedelta=cachedelta,

3275

cachedelta=cachedelta,

3276

node=node,

3276

node=node,

3277

flags=flags,

3277

flags=flags,

3278

deltacomputer=deltacomputer,

3278

deltacomputer=deltacomputer,

3279

sidedata=sidedata,

3279

sidedata=sidedata,

3280

)

3280

)

3281

else:

3281

else:

3282

if destrevlog._lazydelta:

3282

if destrevlog._lazydelta:

3283

dp = self.deltaparent(rev)

3283

dp = self.deltaparent(rev)

3284

if dp != nullrev:

3284

if dp != nullrev:

3285

cachedelta = (dp, bytes(self._chunk(rev)))

3285

cachedelta = (dp, bytes(self._chunk(rev)))

3286

3287

sidedata = None

3287

sidedata = None

3288

if not cachedelta:

3288

if not cachedelta:

3289

rawtext = self._revisiondata(rev)

3289

rawtext = self._revisiondata(rev)

3290

sidedata = self.sidedata(rev)

3290

sidedata = self.sidedata(rev)

3291

if sidedata is None:

3291

if sidedata is None:

3292

sidedata = self.sidedata(rev)

3292

sidedata = self.sidedata(rev)

3293

3294

if sidedata_helpers is not None:

3294

if sidedata_helpers is not None:

3295

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3295

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3296

self, sidedata_helpers, sidedata, rev

3296

self, sidedata_helpers, sidedata, rev

3297

)

3297

)

3298

flags = flags | new_flags[0] & ~new_flags[1]

3298

flags = flags | new_flags[0] & ~new_flags[1]

3299

3300

with destrevlog._writing(tr):

3300

with destrevlog._writing(tr):

3301

destrevlog._addrevision(

3301

destrevlog._addrevision(

3302

node,

3302

node,

3303

rawtext,

3303

rawtext,

3304

tr,

3304

tr,

3305

linkrev,

3305

linkrev,

3306

p1,

3306

p1,

3307

p2,

3307

p2,

3308

flags,

3308

flags,

3309

cachedelta,

3309

cachedelta,

3310

deltacomputer=deltacomputer,

3310

deltacomputer=deltacomputer,

3311

sidedata=sidedata,

3311

sidedata=sidedata,

3312

)

3312

)

3313

3314

if addrevisioncb:

3314

if addrevisioncb:

3315

addrevisioncb(self, rev, node)

3315

addrevisioncb(self, rev, node)

3316

3317

def censorrevision(self, tr, censornode, tombstone=b''):

3317

def censorrevision(self, tr, censornode, tombstone=b''):

3318

if self._format_version == REVLOGV0:

3318

if self._format_version == REVLOGV0:

3319

raise error.RevlogError(

3319

raise error.RevlogError(

3320

_(b'cannot censor with version %d revlogs')

3320

_(b'cannot censor with version %d revlogs')

3321

% self._format_version

3321

% self._format_version

3322

)

3322

)

3323

elif self._format_version == REVLOGV1:

3323

elif self._format_version == REVLOGV1:

3324

rewrite.v1_censor(self, tr, censornode, tombstone)

3324

rewrite.v1_censor(self, tr, censornode, tombstone)

3325

else:

3325

else:

3326

rewrite.v2_censor(self, tr, censornode, tombstone)

3326

rewrite.v2_censor(self, tr, censornode, tombstone)

3327

3328

def verifyintegrity(self, state):

3328

def verifyintegrity(self, state):

3329

"""Verifies the integrity of the revlog.

3329

"""Verifies the integrity of the revlog.

3330

3331

Yields ``revlogproblem`` instances describing problems that are

3331

Yields ``revlogproblem`` instances describing problems that are

3332

found.

3332

found.

3333

"""

3333

"""

3334

dd, di = self.checksize()

3334

dd, di = self.checksize()

3335

if dd:

3335

if dd:

3336

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3336

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3337

if di:

3337

if di:

3338

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3338

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3339

3340

version = self._format_version

3340

version = self._format_version

3341

3342

# The verifier tells us what version revlog we should be.

3342

# The verifier tells us what version revlog we should be.

3343

if version != state[b'expectedversion']:

3343

if version != state[b'expectedversion']:

3344

yield revlogproblem(

3344

yield revlogproblem(

3345

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3345

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3346

% (self.display_id, version, state[b'expectedversion'])

3346

% (self.display_id, version, state[b'expectedversion'])

3347

)

3347

)

3348

3349

state[b'skipread'] = set()

3349

state[b'skipread'] = set()

3350

state[b'safe_renamed'] = set()

3350

state[b'safe_renamed'] = set()

3351

3352

for rev in self:

3352

for rev in self:

3353

node = self.node(rev)

3353

node = self.node(rev)

3354

3355

# Verify contents. 4 cases to care about:

3355

# Verify contents. 4 cases to care about:

3356

#

3356

#

3357

# common: the most common case

3357

# common: the most common case

3358

# rename: with a rename

3358

# rename: with a rename

3359

# meta: file content starts with b'\1\n', the metadata

3359

# meta: file content starts with b'\1\n', the metadata

3360

# header defined in filelog.py, but without a rename

3360

# header defined in filelog.py, but without a rename

3361

# ext: content stored externally

3361

# ext: content stored externally

3362

#

3362

#

3363

# More formally, their differences are shown below:

3363

# More formally, their differences are shown below:

3364

#

3364

#

3365

# | common | rename | meta | ext

3365

# | common | rename | meta | ext

3366

# -------------------------------------------------------

3366

# -------------------------------------------------------

3367

# flags() | 0 | 0 | 0 | not 0

3367

# flags() | 0 | 0 | 0 | not 0

3368

# renamed() | False | True | False | ?

3368

# renamed() | False | True | False | ?

3369

# rawtext[0:2]=='\1\n'| False | True | True | ?

3369

# rawtext[0:2]=='\1\n'| False | True | True | ?

3370

#

3370

#

3371

# "rawtext" means the raw text stored in revlog data, which

3371

# "rawtext" means the raw text stored in revlog data, which

3372

# could be retrieved by "rawdata(rev)". "text"

3372

# could be retrieved by "rawdata(rev)". "text"

3373

# mentioned below is "revision(rev)".

3373

# mentioned below is "revision(rev)".

3374

#

3374

#

3375

# There are 3 different lengths stored physically:

3375

# There are 3 different lengths stored physically:

3376

# 1. L1: rawsize, stored in revlog index

3376

# 1. L1: rawsize, stored in revlog index

3377

# 2. L2: len(rawtext), stored in revlog data

3377

# 2. L2: len(rawtext), stored in revlog data

3378

# 3. L3: len(text), stored in revlog data if flags==0, or

3378

# 3. L3: len(text), stored in revlog data if flags==0, or

3379

# possibly somewhere else if flags!=0

3379

# possibly somewhere else if flags!=0

3380

#

3380

#

3381

# L1 should be equal to L2. L3 could be different from them.

3381

# L1 should be equal to L2. L3 could be different from them.

3382

# "text" may or may not affect commit hash depending on flag

3382

# "text" may or may not affect commit hash depending on flag

3383

# processors (see flagutil.addflagprocessor).

3383

# processors (see flagutil.addflagprocessor).

3384

#

3384

#

3385

# | common | rename | meta | ext

3385

# | common | rename | meta | ext

3386

# -------------------------------------------------

3386

# -------------------------------------------------

3387

# rawsize() | L1 | L1 | L1 | L1

3387

# rawsize() | L1 | L1 | L1 | L1

3388

# size() | L1 | L2-LM | L1(*) | L1 (?)

3388

# size() | L1 | L2-LM | L1(*) | L1 (?)

3389

# len(rawtext) | L2 | L2 | L2 | L2

3389

# len(rawtext) | L2 | L2 | L2 | L2

3390

# len(text) | L2 | L2 | L2 | L3

3390

# len(text) | L2 | L2 | L2 | L3

3391

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3391

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3392

#

3392

#

3393

# LM: length of metadata, depending on rawtext

3393

# LM: length of metadata, depending on rawtext

3394

# (*): not ideal, see comment in filelog.size

3394

# (*): not ideal, see comment in filelog.size

3395

# (?): could be "- len(meta)" if the resolved content has

3395

# (?): could be "- len(meta)" if the resolved content has

3396

# rename metadata

3396

# rename metadata

3397

#

3397

#

3398

# Checks needed to be done:

3398

# Checks needed to be done:

3399

# 1. length check: L1 == L2, in all cases.

3399

# 1. length check: L1 == L2, in all cases.

3400

# 2. hash check: depending on flag processor, we may need to

3400

# 2. hash check: depending on flag processor, we may need to

3401

# use either "text" (external), or "rawtext" (in revlog).

3401

# use either "text" (external), or "rawtext" (in revlog).

3402

3403

try:

3403

try:

3404

skipflags = state.get(b'skipflags', 0)

3404

skipflags = state.get(b'skipflags', 0)

3405

if skipflags:

3405

if skipflags:

3406

skipflags &= self.flags(rev)

3406

skipflags &= self.flags(rev)

3407

3408

_verify_revision(self, skipflags, state, node)

3408

_verify_revision(self, skipflags, state, node)

3409

3410

l1 = self.rawsize(rev)

3410

l1 = self.rawsize(rev)

3411

l2 = len(self.rawdata(node))

3411

l2 = len(self.rawdata(node))

3412

3413

if l1 != l2:

3413

if l1 != l2:

3414

yield revlogproblem(

3414

yield revlogproblem(

3415

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3415

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3416

node=node,

3416

node=node,

3417

)

3417

)

3418

3419

except error.CensoredNodeError:

3419

except error.CensoredNodeError:

3420

if state[b'erroroncensored']:

3420

if state[b'erroroncensored']:

3421

yield revlogproblem(

3421

yield revlogproblem(

3422

error=_(b'censored file data'), node=node

3422

error=_(b'censored file data'), node=node

3423

)

3423

)

3424

state[b'skipread'].add(node)

3424

state[b'skipread'].add(node)

3425

except Exception as e:

3425

except Exception as e:

3426

yield revlogproblem(

3426

yield revlogproblem(

3427

error=_(b'unpacking %s: %s')

3427

error=_(b'unpacking %s: %s')

3428

% (short(node), stringutil.forcebytestr(e)),

3428

% (short(node), stringutil.forcebytestr(e)),

3429

node=node,

3429

node=node,

3430

)

3430

)

3431

state[b'skipread'].add(node)

3431

state[b'skipread'].add(node)

3432

3433

def storageinfo(

3433

def storageinfo(

3434

self,

3434

self,

3435

exclusivefiles=False,

3435

exclusivefiles=False,

3436

sharedfiles=False,

3436

sharedfiles=False,

3437

revisionscount=False,

3437

revisionscount=False,

3438

trackedsize=False,

3438

trackedsize=False,

3439

storedsize=False,

3439

storedsize=False,

3440

):

3440

):

3441

d = {}

3441

d = {}

3442

3443

if exclusivefiles:

3443

if exclusivefiles:

3444

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3444

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3445

if not self._inline:

3445

if not self._inline:

3446

d[b'exclusivefiles'].append((self.opener, self._datafile))

3446

d[b'exclusivefiles'].append((self.opener, self._datafile))

3447

3448

if sharedfiles:

3448

if sharedfiles:

3449

d[b'sharedfiles'] = []

3449

d[b'sharedfiles'] = []

3450

3451

if revisionscount:

3451

if revisionscount:

3452

d[b'revisionscount'] = len(self)

3452

d[b'revisionscount'] = len(self)

3453

3454

if trackedsize:

3454

if trackedsize:

3455

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3455

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3456

3457

if storedsize:

3457

if storedsize:

3458

d[b'storedsize'] = sum(

3458

d[b'storedsize'] = sum(

3459

self.opener.stat(path).st_size for path in self.files()

3459

self.opener.stat(path).st_size for path in self.files()

3460

)

3460

)

3461

3462

return d

3462

return d

3463

3464

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3464

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3465

if not self.hassidedata:

3465

if not self.hassidedata:

3466

return

3466

return

3467

# revlog formats with sidedata support does not support inline

3467

# revlog formats with sidedata support does not support inline

3468

assert not self._inline

3468

assert not self._inline

3469

if not helpers[1] and not helpers[2]:

3469

if not helpers[1] and not helpers[2]:

3470

# Nothing to generate or remove

3470

# Nothing to generate or remove

3471

return

3471

return

3472

3473

new_entries = []

3473

new_entries = []

3474

# append the new sidedata

3474

# append the new sidedata

3475

with self._writing(transaction):

3475

with self._writing(transaction):

3476

ifh, dfh, sdfh = self._writinghandles

3476

ifh, dfh, sdfh = self._writinghandles

3477

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3477

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3478

3479

current_offset = sdfh.tell()

3479

current_offset = sdfh.tell()

3480

for rev in range(startrev, endrev + 1):

3480

for rev in range(startrev, endrev + 1):

3481

entry = self.index[rev]

3481

entry = self.index[rev]

3482

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3482

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3483

store=self,

3483

store=self,

3484

sidedata_helpers=helpers,

3484

sidedata_helpers=helpers,

3485

sidedata={},

3485

sidedata={},

3486

rev=rev,

3486

rev=rev,

3487

)

3487

)

3488

3489

serialized_sidedata = sidedatautil.serialize_sidedata(

3489

serialized_sidedata = sidedatautil.serialize_sidedata(

3490

new_sidedata

3490

new_sidedata

3491

)

3491

)

3492

3493

sidedata_compression_mode = COMP_MODE_INLINE

3493

sidedata_compression_mode = COMP_MODE_INLINE

3494

if serialized_sidedata and self.hassidedata:

3494

if serialized_sidedata and self.hassidedata:

3495

sidedata_compression_mode = COMP_MODE_PLAIN

3495

sidedata_compression_mode = COMP_MODE_PLAIN

3496

h, comp_sidedata = self.compress(serialized_sidedata)

3496

h, comp_sidedata = self.compress(serialized_sidedata)

3497

if (

3497

if (

3498

h != b'u'

3498

h != b'u'

3499

and comp_sidedata[0] != b'\0'

3499

and comp_sidedata[0] != b'\0'

3500

and len(comp_sidedata) < len(serialized_sidedata)

3500

and len(comp_sidedata) < len(serialized_sidedata)

3501

):

3501

):

3502

assert not h

3502

assert not h

3503

if (

3503

if (

3504

comp_sidedata[0]

3504

comp_sidedata[0]

3505

== self._docket.default_compression_header

3505

== self._docket.default_compression_header

3506

):

3506

):

3507

sidedata_compression_mode = COMP_MODE_DEFAULT

3507

sidedata_compression_mode = COMP_MODE_DEFAULT

3508

serialized_sidedata = comp_sidedata

3508

serialized_sidedata = comp_sidedata

3509

else:

3509

else:

3510

sidedata_compression_mode = COMP_MODE_INLINE

3510

sidedata_compression_mode = COMP_MODE_INLINE

3511

serialized_sidedata = comp_sidedata

3511

serialized_sidedata = comp_sidedata

3512

if entry[8] != 0 or entry[9] != 0:

3512

if entry[8] != 0 or entry[9] != 0:

3513

# rewriting entries that already have sidedata is not

3513

# rewriting entries that already have sidedata is not

3514

# supported yet, because it introduces garbage data in the

3514

# supported yet, because it introduces garbage data in the

3515

# revlog.

3515

# revlog.

3516

msg = b"rewriting existing sidedata is not supported yet"

3516

msg = b"rewriting existing sidedata is not supported yet"

3517

raise error.Abort(msg)

3517

raise error.Abort(msg)

3518

3519

# Apply (potential) flags to add and to remove after running

3519

# Apply (potential) flags to add and to remove after running

3520

# the sidedata helpers

3520

# the sidedata helpers

3521

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3521

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3522

entry_update = (

3522

entry_update = (

3523

current_offset,

3523

current_offset,

3524

len(serialized_sidedata),

3524

len(serialized_sidedata),

3525

new_offset_flags,

3525

new_offset_flags,

3526

sidedata_compression_mode,

3526

sidedata_compression_mode,

3527

)

3527

)

3528

3529

# the sidedata computation might have move the file cursors around

3529

# the sidedata computation might have move the file cursors around

3530

sdfh.seek(current_offset, os.SEEK_SET)

3530

sdfh.seek(current_offset, os.SEEK_SET)

3531

sdfh.write(serialized_sidedata)

3531

sdfh.write(serialized_sidedata)

3532

new_entries.append(entry_update)

3532

new_entries.append(entry_update)

3533

current_offset += len(serialized_sidedata)

3533

current_offset += len(serialized_sidedata)

3534

self._docket.sidedata_end = sdfh.tell()

3534

self._docket.sidedata_end = sdfh.tell()

3535

3536

# rewrite the new index entries

3536

# rewrite the new index entries

3537

ifh.seek(startrev * self.index.entry_size)

3537

ifh.seek(startrev * self.index.entry_size)

3538

for i, e in enumerate(new_entries):

3538

for i, e in enumerate(new_entries):

3539

rev = startrev + i

3539

rev = startrev + i

3540

self.index.replace_sidedata_info(rev, *e)

3540

self.index.replace_sidedata_info(rev, *e)

3541

packed = self.index.entry_binary(rev)

3541

packed = self.index.entry_binary(rev)

3542

if rev == 0 and self._docket is None:

3542

if rev == 0 and self._docket is None:

3543

header = self._format_flags | self._format_version

3543

header = self._format_flags | self._format_version

3544

header = self.index.pack_header(header)

3544

header = self.index.pack_header(header)

3545

packed = header + packed

3545

packed = header + packed

3546

ifh.write(packed)

3546

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             # coding: utf8
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             import binascii
             import collections
             import contextlib
             import io
             import os
             import struct
             import weakref
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_NO,
                 DELTA_BASE_REUSE_TRY,
                 ENTRY_RANK,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 RANK_UNKNOWN,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 revlogutils,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 randomaccessfile,
                 revlogv0,
                 rewrite,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             CHANGELOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of inline data embedded into a revlog
             _maxinline = 131072
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
                 parsers, 'BaseIndexObject'
             )
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta:
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem:
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
                 return index, cache
             if hasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             FILE_TOO_SHORT_MSG = _(
                 b'cannot read from revlog %s;'
                 b'  expected %d bytes from offset %d, data size is %d'
             )
             hexdigits = b'0123456789abcdefABCDEF'
             class revlog:
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 See mercurial/revlogutils/contants.py for details about the content of an
                 index entry.
                 """
                 _flagserrorclass = error.RevlogError
                 @staticmethod
                 def is_inline_index(header_bytes):
                     """Determine if a revlog is inline from the initial bytes of the index"""
                     header = INDEX_HEADER.unpack(header_bytes)[0]
                     _format_flags = header & ~0xFFFF
                     _format_version = header & 0xFFFF
                     features = FEATURES_BY_VERSION[_format_version]
                     return features[b'inline'](_format_flags)
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                     try_split=False,
                     canonical_parent_order=True,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._sidedatafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self._try_split = try_split
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self._candidate_group_chunk_size = 0
                     self._debug_delta = False
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # other optionnals features
                     # might remove rank configuration once the computation has no impact
                     self._compute_rank = False
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 3-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                     # parent order is supposed to be semantically irrelevant, so we
                     # normally resort parents to ensure that the first parent is non-null,
                     # if there is a non-null parent at all.
                     # filelog abuses the parent order as flag to mark some instances of
                     # meta-encoded files, so allow it to disable this behavior.
                     self.canonical_parent_order = canonical_parent_order
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                         self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
                     if dps_cgds:
                         self._candidate_group_chunk_size = dps_cgds
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'debug-delta' in opts:
                         self._debug_delta = opts[b'debug-delta']
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in opts.get(b'flagprocessors', {}).items():
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except FileNotFoundError:
                         return b''
                 def get_streams(self, max_linkrev, force_inline=False):
                     """return a list of streams that represent this revlog
                     This is used by stream-clone to do bytes to bytes copies of a repository.
                     This streams data for all revisions that refer to a changelog revision up
                     to `max_linkrev`.
                     If `force_inline` is set, it enforces that the stream will represent an inline revlog.
                     It returns is a list of three-tuple:
                         [
                             (filename, bytes_stream, stream_size),
                             …
                         ]
                     """
                     n = len(self)
                     index = self.index
                     while n > 0:
                         linkrev = index[n - 1][4]
                         if linkrev < max_linkrev:
                             break
                         # note: this loop will rarely go through multiple iterations, since
                         # it only traverses commits created during the current streaming
                         # pull operation.
                         #
                         # If this become a problem, using a binary search should cap the
                         # runtime of this.
                         n = n - 1
                     if n == 0:
                         # no data to send
                         return []
                     index_size = n * index.entry_size
                     data_size = self.end(n - 1)
                     # XXX we might have been split (or stripped) since the object
                     # initialization, We need to close this race too, but having a way to
                     # pre-open the file we feed to the revlog and never closing them before
                     # we are done streaming.
                     if self._inline:
                         def get_stream():
                             with self._indexfp() as fp:
                                 yield None
                                 size = index_size + data_size
                                 if size <= 65536:
                                     yield fp.read(size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=size)
                         inline_stream = get_stream()
                         next(inline_stream)
                         return [
                             (self._indexfile, inline_stream, index_size + data_size),
                         ]
                     elif force_inline:
                         def get_stream():
                             with self.reading():
                                 yield None
                                 for rev in range(n):
                                     idx = self.index.entry_binary(rev)
                                     if rev == 0 and self._docket is None:
                                         # re-inject the inline flag
                                         header = self._format_flags
                                         header |= self._format_version
                                         header |= FLAG_INLINE_DATA
                                         header = self.index.pack_header(header)
                                         idx = header + idx
                                     yield idx
                                     yield self._getsegmentforrevs(rev, rev)[1]
                         inline_stream = get_stream()
                         next(inline_stream)
                         return [
                             (self._indexfile, inline_stream, index_size + data_size),
                         ]
                     else:
                         def get_index_stream():
                             with self._indexfp() as fp:
                                 yield None
                                 if index_size <= 65536:
                                     yield fp.read(index_size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=index_size)
                         def get_data_stream():
                             with self._datafp() as fp:
                                 yield None
                                 if data_size <= 65536:
                                     yield fp.read(data_size)
                                 else:
                                     yield from util.filechunkiter(fp, limit=data_size)
                         index_stream = get_index_stream()
                         next(index_stream)
                         data_stream = get_data_stream()
                         next(data_stream)
                         return [
                             (self._datafile, data_stream, data_size),
                             (self._indexfile, index_stream, index_size),
                         ]
                 def _loadindex(self, docket=None):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     elif self._try_split and self.opener.exists(self._split_index_file):
                         entry_point = self._split_index_file
                     else:
                         entry_point = b'%s.i' % self.radix
                     if docket is not None:
                         self._docket = docket
                         self._docket_file = entry_point
                     else:
                         self._initempty = True
                         entry_data = self._get_data(entry_point, mmapindexthreshold)
                         if len(entry_data) > 0:
                             header = INDEX_HEADER.unpack(entry_data[:4])[0]
                             self._initempty = False
                         else:
                             header = new_header
                         self._format_flags = header & ~0xFFFF
                         self._format_version = header & 0xFFFF
                         supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                         if supported_flags is None:
                             msg = _(b'unknown version (%d) in revlog %s')
                             msg %= (self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         elif self._format_flags & ~supported_flags:
                             msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                             display_flag = self._format_flags >> 16
                             msg %= (display_flag, self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         features = FEATURES_BY_VERSION[self._format_version]
                         self._inline = features[b'inline'](self._format_flags)
                         self._generaldelta = features[b'generaldelta'](self._format_flags)
                         self.hassidedata = features[b'sidedata']
                         if not features[b'docket']:
                             self._indexfile = entry_point
                             index_data = entry_data
                         else:
                             self._docket_file = entry_point
                             if self._initempty:
                                 self._docket = docketutil.default_docket(self, header)
                             else:
                                 self._docket = docketutil.parse_docket(
                                     self, entry_data, use_pending=self._trypending
                                 )
                     if self._docket is not None:
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self._docket is not None:
                         self._datafile = self._docket.data_filepath()
                         self._sidedatafile = self._docket.sidedata_filepath()
                     elif self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and hasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index = index
                     self._segmentfile = randomaccessfile.randomaccessfile(
                         self.opener,
                         (self._indexfile if self._inline else self._datafile),
                         self._chunkcachesize,
                         chunkcache,
                     )
                     self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
                         self.opener,
                         self._sidedatafile,
                         self._chunkcachesize,
                     )
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 def get_revlog(self):
                     """simple function to mirror API of other not-really-revlog API"""
                     return self
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     if self.revlog_kind == KIND_FILELOG:
                         # Reference the file without the "data/" prefix, so it is familiar
                         # to the user.
                         return self.target[1]
                     else:
                         return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except FileNotFoundError:
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _sidedatareadfp(self):
                     """file object suitable to read sidedata"""
                     if self._writinghandles:
                         yield self._writinghandles[2]
                     else:
                         with self.opener(self._sidedatafile) as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(range(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def _candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     """update on disk cache
                     If a transaction is passed, the update may be delayed to transaction
                     commit."""
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     """Clear in-memory caches"""
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and hasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     """return the revision number associated with a <nodeid>"""
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def sidedata_cut_off(self, rev):
                     sd_cut_off = self.index[rev][8]
                     if sd_cut_off != 0:
                         return sd_cut_off
                     # This is some annoying dance, because entries without sidedata
                     # currently use 0 as their ofsset. (instead of previous-offset +
                     # previous-size)
                     #
                     # We should reconsider this sidedata → 0 sidata_offset policy.
                     # In the meantime, we need this.
                     while 0 <= rev:
                         e = self.index[rev]
                         if e[9] != 0:
                             return e[8] + e[9]
                         rev -= 1
                     return 0
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev))
                 def fast_rank(self, rev):
                     """Return the rank of a revision if already known, or None otherwise.
                     The rank of a revision is the size of the sub-graph it defines as a
                     head. Equivalently, the rank of a revision `r` is the size of the set
                     `ancestors(r)`, `r` included.
                     This method returns the rank retrieved from the revlog in constant
                     time. It makes no attempt at computing unknown values for versions of
                     the revlog which do not persist the rank.
                     """
                     rank = self.index[rev][ENTRY_RANK]
                     if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
                         return None
                     if rev == nullrev:
                         return 0  # convention
                     return rank
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if self.canonical_parent_order and entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if self.canonical_parent_order and d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset:
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.items() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (binascii.Error, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 ambiguous = True
                             else:
                                 return partial
                         elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             ambiguous = True
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key is not hex
                         pass
                     if ambiguous:
                         raise error.AmbiguousPrefixLookupError(
                             id, self.display_id, _(b'ambiguous identifier')
                         )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         # hex(node)[:...]
                         l = len(id) // 2 * 2  # grab an even number of digits
                         try:
                             # we're dropping the last digit, so let's check that it's hex,
                             # to avoid the expensive computation below if it's not
                             if len(id) % 2 > 0:
                                 if not (id[-1] in hexdigits):
                                     return None
                             prefix = bin(id[:l])
                         except binascii.Error:
                             pass
                         else:
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._segmentfile.read_chunk(start, length, df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = b'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif hasattr(self.index, 'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     while self.length(p1) == 0:
                         b = self.deltaparent(p1)
                         if b == p1:
                             break
                         p1 = b
                     p2 = entry[6]
                     while self.length(p2) == 0:
                         b = self.deltaparent(p2)
                         if b == p2:
                             break
                         p2 = b
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def revision(self, nodeorrev):
                     """return an uncompressed revision of a given node or revision
                     number.
                     """
                     return self._revisiondata(nodeorrev)
                 def sidedata(self, nodeorrev):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                     else:
                         rev = self.rev(nodeorrev)
                     return self._sidedata(rev)
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b""
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     if self._docket.sidedata_end < sidedata_offset + sidedata_size:
                         filename = self._sidedatafile
                         end = self._docket.sidedata_end
                         offset = sidedata_offset
                         length = sidedata_size
                         m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
                         raise error.RevlogError(m)
                     comp_segment = self._segmentfile_sidedata.read_chunk(
                         sidedata_offset, sidedata_size
                     )
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev):
                     """return an uncompressed raw data of a given node or revision number."""
                     return self._revisiondata(nodeorrev, raw=True)
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 @property
                 def _split_index_file(self):
                     """the path where to expect the index of an ongoing splitting operation
                     The file will only exist if a splitting operation is in progress, but
                     it is always expected at the same location."""
                     parts = self.radix.split(b'/')
                     if len(parts) > 1:
                         # adds a '-s' prefix to the ``data/` or `meta/` base
                         head = parts[0] + b'-s'
                         mids = parts[1:-1]
                         tail = parts[-1] + b'.i'
                         pieces = [head] + mids + [tail]
                         return b'/'.join(pieces)
                     else:
                         # the revlog is stored at the root of the store (changelog or
                         # manifest), no risk of collision.
                         return self.radix + b'.i.s'
                 def _enforceinlinesize(self, tr, side_write=True):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     if troffset:
                         tr.addbackup(self._indexfile, for_offset=True)
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                         self._segmentfile.writing_handle = None
                         # No need to deal with sidedata writing handle as it is only
                         # relevant with revlog-v2 which is never inline, not reaching
                         # this code
                     if side_write:
                         old_index_file_path = self._indexfile
                         new_index_file_path = self._split_index_file
                         opener = self.opener
                         weak_self = weakref.ref(self)
                         # the "split" index replace the real index when the transaction is finalized
                         def finalize_callback(tr):
                             opener.rename(
                                 new_index_file_path,
                                 old_index_file_path,
                                 checkambig=True,
                             )
                             maybe_self = weak_self()
                             if maybe_self is not None:
                                 maybe_self._indexfile = old_index_file_path
                         def abort_callback(tr):
                             maybe_self = weak_self()
                             if maybe_self is not None:
                                 maybe_self._indexfile = old_index_file_path
                         tr.registertmp(new_index_file_path)
                         if self.target[1] is not None:
                             callback_id = b'000-revlog-split-%d-%s' % self.target
                         else:
                             callback_id = b'000-revlog-split-%d' % self.target[0]
                         tr.addfinalize(callback_id, finalize_callback)
                         tr.addabort(callback_id, abort_callback)
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
-                        with self._indexfp() as read_ifh:
+                        with self.reading():
                             for r in self:
-                                new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
+                                new_dfh.write(self._getsegmentforrevs(r, r)[1])
                             new_dfh.flush()
                         if side_write:
                             self._indexfile = new_index_file_path
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # If we don't use side-write, the temp file replace the real
                             # index when we exit the context manager
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._segmentfile = randomaccessfile.randomaccessfile(
                             self.opener,
                             self._datafile,
                             self._chunkcachesize,
                         )
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh, None)
                             self._segmentfile.writing_handle = new_dfh
                             new_dfh = None
                             # No need to deal with sidedata writing handle as it is only
                             # relevant with revlog-v2 which is never inline, not reaching
                             # this code
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def reading(self):
                     """Context manager that keeps data and sidedata files open for reading"""
                     if len(self.index) == 0:
                         yield  # nothing to be read
                     else:
                         with self._segmentfile.reading():
                             with self._segmentfile_sidedata.reading():
                                 yield
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         ifh = dfh = sdfh = None
                         try:
                             r = len(self)
                             # opening the data file.
                             dsize = 0
                             if r:
                                 dsize = self.end(r - 1)
                             dfh = None
                             if not self._inline:
                                 try:
                                     dfh = self._datafp(b"r+")
                                     if self._docket is None:
                                         dfh.seek(0, os.SEEK_END)
                                     else:
                                         dfh.seek(self._docket.data_end, os.SEEK_SET)
                                 except FileNotFoundError:
                                     dfh = self._datafp(b"w+")
                                 transaction.add(self._datafile, dsize)
                             if self._sidedatafile is not None:
                                 # revlog-v2 does not inline, help Pytype
                                 assert dfh is not None
                                 try:
                                     sdfh = self.opener(self._sidedatafile, mode=b"r+")
                                     dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                                 except FileNotFoundError:
                                     sdfh = self.opener(self._sidedatafile, mode=b"w+")
                                 transaction.add(
                                     self._sidedatafile, self._docket.sidedata_end
                                 )
                             # opening the index file.
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             # exposing all file handle for writing.
                             self._writinghandles = (ifh, dfh, sdfh)
                             self._segmentfile.writing_handle = ifh if self._inline else dfh
                             self._segmentfile_sidedata.writing_handle = sdfh
                             yield
                             if self._docket is not None:
                                 self._write_docket(transaction)
                         finally:
                             self._writinghandles = None
                             self._segmentfile.writing_handle = None
                             self._segmentfile_sidedata.writing_handle = None
                             if dfh is not None:
                                 dfh.close()
                             if sdfh is not None:
                                 sdfh.close()
                             # closing the index file last to avoid exposing referent to
                             # potential unflushed data content.
                             if ifh is not None:
                                 ifh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh, sdfh = self._writinghandles
                         # XXX no checking for the sidedata file
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         write_debug = None
                         if self._debug_delta:
                             write_debug = transaction._report
                         deltacomputer = deltautil.deltacomputer(
                             self, write_debug=write_debug
                         )
                     if cachedelta is not None and len(cachedelta) == 2:
                         # If the cached delta has no information about how it should be
                         # reused, add the default reuse instruction according to the
                         # revlog's configuration.
                         if self._generaldelta and self._lazydeltabase:
                             delta_base_reuse = DELTA_BASE_REUSE_TRY
                         else:
                             delta_base_reuse = DELTA_BASE_REUSE_NO
                         cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
                     revinfo = revlogutils.revisioninfo(
                         node,
                         p1,
                         p2,
                         btext,
                         textlen,
                         cachedelta,
                         flags,
                     )
                     deltainfo = deltacomputer.finddeltainfo(revinfo)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         default_comp = self._docket.default_compression_header
                         r = deltautil.delta_compression(default_comp, deltainfo)
                         compression_mode, deltainfo = r
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = self._docket.sidedata_end
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     rank = RANK_UNKNOWN
                     if self._compute_rank:
                         if (p1r, p2r) == (nullrev, nullrev):
                             rank = 1
                         elif p1r != nullrev and p2r == nullrev:
                             rank = 1 + self.fast_rank(p1r)
                         elif p1r == nullrev and p2r != nullrev:
                             rank = 1 + self.fast_rank(p2r)
                         else:  # merge node
                             if rustdagop is not None and self.index.rust_ext_compat:
                                 rank = rustdagop.rank(self.index, p1r, p2r)
                             else:
                                 pmin, pmax = sorted((p1r, p2r))
                                 rank = 1 + self.fast_rank(pmax)
                                 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
                     e = revlogutils.entry(
                         flags=flags,
                         data_offset=offset,
                         data_compressed_length=deltainfo.deltalen,
                         data_uncompressed_length=textlen,
                         data_compression_mode=compression_mode,
                         data_delta_base=deltainfo.base,
                         link_rev=link,
                         parent_rev_1=p1r,
                         parent_rev_2=p2r,
                         node_id=node,
                         sidedata_offset=sidedata_offset,
                         sidedata_compressed_length=len(serialized_sidedata),
                         sidedata_compression_mode=sidedata_compression_mode,
                         rank=rank,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                         sidedata_offset,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(
                     self, transaction, entry, data, link, offset, sidedata, sidedata_offset
                 ):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh, sdfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     if sdfh:
                         sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         if self._sidedatafile:
                             transaction.add(self._sidedatafile, sidedata_offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             sdfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         assert not sidedata
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         # revlog-v2 always has 3 writing handles, help Pytype
                         wh1 = self._writinghandles[0]
                         wh2 = self._writinghandles[1]
                         wh3 = self._writinghandles[2]
                         assert wh1 is not None
                         assert wh2 is not None
                         assert wh3 is not None
                         self._docket.index_end = wh1.tell()
                         self._docket.data_end = wh2.tell()
                         self._docket.sidedata_end = wh3.tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                     debug_info=None,
                     delta_base_reuse_policy=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     # read the default delta-base reuse policy from revlog config if the
                     # group did not specify one.
                     if delta_base_reuse_policy is None:
                         if self._generaldelta and self._lazydeltabase:
                             delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
                         else:
                             delta_base_reuse_policy = DELTA_BASE_REUSE_NO
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             write_debug = None
                             if self._debug_delta:
                                 write_debug = transaction._report
                             deltacomputer = deltautil.deltacomputer(
                                 self,
                                 write_debug=write_debug,
                                 debug_info=debug_info,
                             )
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta, delta_base_reuse_policy),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     if self._sidedatafile:
                         sidedata_end = self.sidedata_cut_off(rev)
                         transaction.add(self._sidedatafile, sidedata_end)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.sidedata_end = sidedata_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except FileNotFoundError:
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except FileNotFoundError:
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if self._docket_file is None:
                         if not self._inline:
                             res.append(self._datafile)
                     else:
                         res.append(self._docket_file)
                         res.extend(self._docket.old_index_filepaths(include_empty=False))
                         if self._docket.data_end:
                             res.append(self._datafile)
                         res.extend(self._docket.old_data_filepaths(include_empty=False))
                         if self._docket.sidedata_end:
                             res.append(self._sidedatafile)
                         res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                     debug_info=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self._candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                         debug_info=debug_info,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         with self.reading():
                             self._clone(
                                 tr,
                                 destrevlog,
                                 addrevisioncb,
                                 deltareuse,
                                 forcedeltabothparents,
                                 sidedata_helpers,
                             )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     write_debug = None
                     if self._debug_delta:
                         write_debug = tr._report
                     deltacomputer = deltautil.deltacomputer(
                         destrevlog,
                         write_debug=write_debug,
                     )
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text = self._revisiondata(rev)
                             sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext = self._revisiondata(rev)
                                 sidedata = self.sidedata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     elif self._format_version == REVLOGV1:
                         rewrite.v1_censor(self, tr, censornode, tombstone)
                     else:
                         rewrite.v2_censor(self, tr, censornode, tombstone)
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh, sdfh = self._writinghandles
                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                         current_offset = sdfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             sdfh.seek(current_offset, os.SEEK_SET)
                             sdfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             self._docket.sidedata_end = sdfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)