upstream/mercurial-mirror Commit - r48218:e0a314bc

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

# coding: utf8

2

# coding: utf8

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

"""Storage back-end for Mercurial.

9

"""Storage back-end for Mercurial.

10

11

This provides efficient delta storage with O(1) retrieve and append

11

This provides efficient delta storage with O(1) retrieve and append

12

and O(changes) merge between branches.

12

and O(changes) merge between branches.

13

"""

13

"""

14

15

from __future__ import absolute_import

15

from __future__ import absolute_import

16

17

import binascii

17

import binascii

18

import collections

18

import collections

19

import contextlib

19

import contextlib

20

import errno

20

import errno

21

import io

21

import io

22

import os

22

import os

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullrev,

30

nullrev,

31

sha1nodeconstants,

31

sha1nodeconstants,

32

short,

32

short,

33

wdirrev,

33

wdirrev,

34

)

34

)

35

from .i18n import _

35

from .i18n import _

36

from .pycompat import getattr

36

from .pycompat import getattr

37

from .revlogutils.constants import (

37

from .revlogutils.constants import (

38

ALL_KINDS,

38

ALL_KINDS,

39

CHANGELOGV2,

39

CHANGELOGV2,

40

COMP_MODE_DEFAULT,

40

COMP_MODE_DEFAULT,

41

COMP_MODE_INLINE,

41

COMP_MODE_INLINE,

42

COMP_MODE_PLAIN,

42

COMP_MODE_PLAIN,

43

FEATURES_BY_VERSION,

43

FEATURES_BY_VERSION,

44

FLAG_GENERALDELTA,

44

FLAG_GENERALDELTA,

45

FLAG_INLINE_DATA,

45

FLAG_INLINE_DATA,

46

INDEX_HEADER,

46

INDEX_HEADER,

47

KIND_CHANGELOG,

47

KIND_CHANGELOG,

48

REVLOGV0,

48

REVLOGV0,

49

REVLOGV1,

49

REVLOGV1,

50

REVLOGV1_FLAGS,

50

REVLOGV1_FLAGS,

51

REVLOGV2,

51

REVLOGV2,

52

REVLOGV2_FLAGS,

52

REVLOGV2_FLAGS,

53

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FLAGS,

54

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_FORMAT,

55

REVLOG_DEFAULT_VERSION,

55

REVLOG_DEFAULT_VERSION,

56

SUPPORTED_FLAGS,

56

SUPPORTED_FLAGS,

57

)

57

)

58

from .revlogutils.flagutil import (

58

from .revlogutils.flagutil import (

59

REVIDX_DEFAULT_FLAGS,

59

REVIDX_DEFAULT_FLAGS,

60

REVIDX_ELLIPSIS,

60

REVIDX_ELLIPSIS,

61

REVIDX_EXTSTORED,

61

REVIDX_EXTSTORED,

62

REVIDX_FLAGS_ORDER,

62

REVIDX_FLAGS_ORDER,

63

REVIDX_HASCOPIESINFO,

63

REVIDX_HASCOPIESINFO,

64

REVIDX_ISCENSORED,

64

REVIDX_ISCENSORED,

65

REVIDX_RAWTEXT_CHANGING_FLAGS,

65

REVIDX_RAWTEXT_CHANGING_FLAGS,

66

)

66

)

67

from .thirdparty import attr

67

from .thirdparty import attr

68

from . import (

68

from . import (

69

ancestor,

69

ancestor,

70

dagop,

70

dagop,

71

error,

71

error,

72

mdiff,

72

mdiff,

73

policy,

73

policy,

74

pycompat,

74

pycompat,

75

revlogutils,

75

revlogutils,

76

templatefilters,

76

templatefilters,

77

util,

77

util,

78

)

78

)

79

from .interfaces import (

79

from .interfaces import (

80

repository,

80

repository,

81

util as interfaceutil,

81

util as interfaceutil,

82

)

82

)

83

from .revlogutils import (

83

from .revlogutils import (

84

censor,

84

censor,

85

deltas as deltautil,

85

deltas as deltautil,

86

docket as docketutil,

86

docket as docketutil,

87

flagutil,

87

flagutil,

88

nodemap as nodemaputil,

88

nodemap as nodemaputil,

89

randomaccessfile,

89

revlogv0,

90

revlogv0,

90

sidedata as sidedatautil,

91

sidedata as sidedatautil,

91

)

92

)

92

from .utils import (

93

from .utils import (

93

storageutil,

94

storageutil,

94

stringutil,

95

stringutil,

95

)

96

)

96

97

# blanked usage of all the name to prevent pyflakes constraints

98

# blanked usage of all the name to prevent pyflakes constraints

98

# We need these name available in the module for extensions.

99

# We need these name available in the module for extensions.

99

100

REVLOGV0

101

REVLOGV0

101

REVLOGV1

102

REVLOGV1

102

REVLOGV2

103

REVLOGV2

103

FLAG_INLINE_DATA

104

FLAG_INLINE_DATA

104

FLAG_GENERALDELTA

105

FLAG_GENERALDELTA

105

REVLOG_DEFAULT_FLAGS

106

REVLOG_DEFAULT_FLAGS

106

REVLOG_DEFAULT_FORMAT

107

REVLOG_DEFAULT_FORMAT

107

REVLOG_DEFAULT_VERSION

108

REVLOG_DEFAULT_VERSION

108

REVLOGV1_FLAGS

109

REVLOGV1_FLAGS

109

REVLOGV2_FLAGS

110

REVLOGV2_FLAGS

110

REVIDX_ISCENSORED

111

REVIDX_ISCENSORED

111

REVIDX_ELLIPSIS

112

REVIDX_ELLIPSIS

112

REVIDX_HASCOPIESINFO

113

REVIDX_HASCOPIESINFO

113

REVIDX_EXTSTORED

114

REVIDX_EXTSTORED

114

REVIDX_DEFAULT_FLAGS

115

REVIDX_DEFAULT_FLAGS

115

REVIDX_FLAGS_ORDER

116

REVIDX_FLAGS_ORDER

116

REVIDX_RAWTEXT_CHANGING_FLAGS

117

REVIDX_RAWTEXT_CHANGING_FLAGS

117

118

parsers = policy.importmod('parsers')

119

parsers = policy.importmod('parsers')

119

rustancestor = policy.importrust('ancestor')

120

rustancestor = policy.importrust('ancestor')

120

rustdagop = policy.importrust('dagop')

121

rustdagop = policy.importrust('dagop')

121

rustrevlog = policy.importrust('revlog')

122

rustrevlog = policy.importrust('revlog')

122

123

# Aliased for performance.

124

# Aliased for performance.

124

_zlibdecompress = zlib.decompress

125

_zlibdecompress = zlib.decompress

125

126

# max size of revlog with inline data

127

# max size of revlog with inline data

127

_maxinline = 131072

128

_maxinline = 131072

128

_chunksize = 1048576

129

130

# Flag processors for REVIDX_ELLIPSIS.

130

# Flag processors for REVIDX_ELLIPSIS.

131

def ellipsisreadprocessor(rl, text):

131

def ellipsisreadprocessor(rl, text):

132

return text, False

132

return text, False

133

134

135

def ellipsiswriteprocessor(rl, text):

135

def ellipsiswriteprocessor(rl, text):

136

return text, False

136

return text, False

137

138

139

def ellipsisrawprocessor(rl, text):

139

def ellipsisrawprocessor(rl, text):

140

return False

140

return False

141

142

143

ellipsisprocessor = (

143

ellipsisprocessor = (

144

ellipsisreadprocessor,

144

ellipsisreadprocessor,

145

ellipsiswriteprocessor,

145

ellipsiswriteprocessor,

146

ellipsisrawprocessor,

146

ellipsisrawprocessor,

147

)

147

)

148

149

150

def _verify_revision(rl, skipflags, state, node):

150

def _verify_revision(rl, skipflags, state, node):

151

"""Verify the integrity of the given revlog ``node`` while providing a hook

151

"""Verify the integrity of the given revlog ``node`` while providing a hook

152

point for extensions to influence the operation."""

152

point for extensions to influence the operation."""

153

if skipflags:

153

if skipflags:

154

state[b'skipread'].add(node)

154

state[b'skipread'].add(node)

155

else:

155

else:

156

# Side-effect: read content and verify hash.

156

# Side-effect: read content and verify hash.

157

rl.revision(node)

157

rl.revision(node)

158

159

160

# True if a fast implementation for persistent-nodemap is available

160

# True if a fast implementation for persistent-nodemap is available

161

#

161

#

162

# We also consider we have a "fast" implementation in "pure" python because

162

# We also consider we have a "fast" implementation in "pure" python because

163

# people using pure don't really have performance consideration (and a

163

# people using pure don't really have performance consideration (and a

164

# wheelbarrow of other slowness source)

164

# wheelbarrow of other slowness source)

165

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

165

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

166

parsers, 'BaseIndexObject'

166

parsers, 'BaseIndexObject'

167

)

167

)

168

169

170

@interfaceutil.implementer(repository.irevisiondelta)

170

@interfaceutil.implementer(repository.irevisiondelta)

171

@attr.s(slots=True)

171

@attr.s(slots=True)

172

class revlogrevisiondelta(object):

172

class revlogrevisiondelta(object):

173

node = attr.ib()

173

node = attr.ib()

174

p1node = attr.ib()

174

p1node = attr.ib()

175

p2node = attr.ib()

175

p2node = attr.ib()

176

basenode = attr.ib()

176

basenode = attr.ib()

177

flags = attr.ib()

177

flags = attr.ib()

178

baserevisionsize = attr.ib()

178

baserevisionsize = attr.ib()

179

revision = attr.ib()

179

revision = attr.ib()

180

delta = attr.ib()

180

delta = attr.ib()

181

sidedata = attr.ib()

181

sidedata = attr.ib()

182

protocol_flags = attr.ib()

182

protocol_flags = attr.ib()

183

linknode = attr.ib(default=None)

183

linknode = attr.ib(default=None)

184

185

186

@interfaceutil.implementer(repository.iverifyproblem)

186

@interfaceutil.implementer(repository.iverifyproblem)

187

@attr.s(frozen=True)

187

@attr.s(frozen=True)

188

class revlogproblem(object):

188

class revlogproblem(object):

189

warning = attr.ib(default=None)

189

warning = attr.ib(default=None)

190

error = attr.ib(default=None)

190

error = attr.ib(default=None)

191

node = attr.ib(default=None)

191

node = attr.ib(default=None)

192

193

194

def parse_index_v1(data, inline):

194

def parse_index_v1(data, inline):

195

# call the C implementation to parse the index data

195

# call the C implementation to parse the index data

196

index, cache = parsers.parse_index2(data, inline)

196

index, cache = parsers.parse_index2(data, inline)

197

return index, cache

197

return index, cache

198

199

200

def parse_index_v2(data, inline):

200

def parse_index_v2(data, inline):

201

# call the C implementation to parse the index data

201

# call the C implementation to parse the index data

202

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

202

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

203

return index, cache

203

return index, cache

204

205

206

def parse_index_cl_v2(data, inline):

206

def parse_index_cl_v2(data, inline):

207

# call the C implementation to parse the index data

207

# call the C implementation to parse the index data

208

assert not inline

208

assert not inline

209

from .pure.parsers import parse_index_cl_v2

209

from .pure.parsers import parse_index_cl_v2

210

211

index, cache = parse_index_cl_v2(data)

211

index, cache = parse_index_cl_v2(data)

212

return index, cache

212

return index, cache

213

214

215

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

215

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

216

217

def parse_index_v1_nodemap(data, inline):

217

def parse_index_v1_nodemap(data, inline):

218

index, cache = parsers.parse_index_devel_nodemap(data, inline)

218

index, cache = parsers.parse_index_devel_nodemap(data, inline)

219

return index, cache

219

return index, cache

220

221

222

else:

222

else:

223

parse_index_v1_nodemap = None

223

parse_index_v1_nodemap = None

224

225

226

def parse_index_v1_mixed(data, inline):

226

def parse_index_v1_mixed(data, inline):

227

index, cache = parse_index_v1(data, inline)

227

index, cache = parse_index_v1(data, inline)

228

return rustrevlog.MixedIndex(index), cache

228

return rustrevlog.MixedIndex(index), cache

229

230

231

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

231

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

232

# signed integer)

232

# signed integer)

233

_maxentrysize = 0x7FFFFFFF

233

_maxentrysize = 0x7FFFFFFF

234

235

PARTIAL_READ_MSG = _(

236

b'partial read of revlog %s; expected %d bytes from offset %d, got %d'

237

)

238

239

FILE_TOO_SHORT_MSG = _(

235

FILE_TOO_SHORT_MSG = _(

240

b'cannot read from revlog %s;'

236

b'cannot read from revlog %s;'

241

b' expected %d bytes from offset %d, data size is %d'

237

b' expected %d bytes from offset %d, data size is %d'

242

)

238

)

243

239

244

240

245

class revlog(object):

241

class revlog(object):

246

"""

242

"""

247

the underlying revision storage object

243

the underlying revision storage object

248

244

249

A revlog consists of two parts, an index and the revision data.

245

A revlog consists of two parts, an index and the revision data.

250

246

251

The index is a file with a fixed record size containing

247

The index is a file with a fixed record size containing

252

information on each revision, including its nodeid (hash), the

248

information on each revision, including its nodeid (hash), the

253

nodeids of its parents, the position and offset of its data within

249

nodeids of its parents, the position and offset of its data within

254

the data file, and the revision it's based on. Finally, each entry

250

the data file, and the revision it's based on. Finally, each entry

255

contains a linkrev entry that can serve as a pointer to external

251

contains a linkrev entry that can serve as a pointer to external

256

data.

252

data.

257

253

258

The revision data itself is a linear collection of data chunks.

254

The revision data itself is a linear collection of data chunks.

259

Each chunk represents a revision and is usually represented as a

255

Each chunk represents a revision and is usually represented as a

260

delta against the previous chunk. To bound lookup time, runs of

256

delta against the previous chunk. To bound lookup time, runs of

261

deltas are limited to about 2 times the length of the original

257

deltas are limited to about 2 times the length of the original

262

version data. This makes retrieval of a version proportional to

258

version data. This makes retrieval of a version proportional to

263

its size, or O(1) relative to the number of revisions.

259

its size, or O(1) relative to the number of revisions.

264

260

265

Both pieces of the revlog are written to in an append-only

261

Both pieces of the revlog are written to in an append-only

266

fashion, which means we never need to rewrite a file to insert or

262

fashion, which means we never need to rewrite a file to insert or

267

remove data, and can use some simple techniques to avoid the need

263

remove data, and can use some simple techniques to avoid the need

268

for locking while reading.

264

for locking while reading.

269

265

270

If checkambig, indexfile is opened with checkambig=True at

266

If checkambig, indexfile is opened with checkambig=True at

271

writing, to avoid file stat ambiguity.

267

writing, to avoid file stat ambiguity.

272

268

273

If mmaplargeindex is True, and an mmapindexthreshold is set, the

269

If mmaplargeindex is True, and an mmapindexthreshold is set, the

274

index will be mmapped rather than read if it is larger than the

270

index will be mmapped rather than read if it is larger than the

275

configured threshold.

271

configured threshold.

276

272

277

If censorable is True, the revlog can have censored revisions.

273

If censorable is True, the revlog can have censored revisions.

278

274

279

If `upperboundcomp` is not None, this is the expected maximal gain from

275

If `upperboundcomp` is not None, this is the expected maximal gain from

280

compression for the data content.

276

compression for the data content.

281

277

282

`concurrencychecker` is an optional function that receives 3 arguments: a

278

`concurrencychecker` is an optional function that receives 3 arguments: a

283

file handle, a filename, and an expected position. It should check whether

279

file handle, a filename, and an expected position. It should check whether

284

the current position in the file handle is valid, and log/warn/fail (by

280

the current position in the file handle is valid, and log/warn/fail (by

285

raising).

281

raising).

286

282

287

See mercurial/revlogutils/contants.py for details about the content of an

283

See mercurial/revlogutils/contants.py for details about the content of an

288

index entry.

284

index entry.

289

"""

285

"""

290

286

291

_flagserrorclass = error.RevlogError

287

_flagserrorclass = error.RevlogError

292

288

293

def __init__(

289

def __init__(

294

self,

290

self,

295

opener,

291

opener,

296

target,

292

target,

297

radix,

293

radix,

298

postfix=None, # only exist for `tmpcensored` now

294

postfix=None, # only exist for `tmpcensored` now

299

checkambig=False,

295

checkambig=False,

300

mmaplargeindex=False,

296

mmaplargeindex=False,

301

censorable=False,

297

censorable=False,

302

upperboundcomp=None,

298

upperboundcomp=None,

303

persistentnodemap=False,

299

persistentnodemap=False,

304

concurrencychecker=None,

300

concurrencychecker=None,

305

trypending=False,

301

trypending=False,

306

):

302

):

307

"""

303

"""

308

create a revlog object

304

create a revlog object

309

305

310

opener is a function that abstracts the file opening operation

306

opener is a function that abstracts the file opening operation

311

and can be used to implement COW semantics or the like.

307

and can be used to implement COW semantics or the like.

312

308

313

`target`: a (KIND, ID) tuple that identify the content stored in

309

`target`: a (KIND, ID) tuple that identify the content stored in

314

this revlog. It help the rest of the code to understand what the revlog

310

this revlog. It help the rest of the code to understand what the revlog

315

is about without having to resort to heuristic and index filename

311

is about without having to resort to heuristic and index filename

316

analysis. Note: that this must be reliably be set by normal code, but

312

analysis. Note: that this must be reliably be set by normal code, but

317

that test, debug, or performance measurement code might not set this to

313

that test, debug, or performance measurement code might not set this to

318

accurate value.

314

accurate value.

319

"""

315

"""

320

self.upperboundcomp = upperboundcomp

316

self.upperboundcomp = upperboundcomp

321

317

322

self.radix = radix

318

self.radix = radix

323

319

324

self._docket_file = None

320

self._docket_file = None

325

self._indexfile = None

321

self._indexfile = None

326

self._datafile = None

322

self._datafile = None

327

self._sidedatafile = None

323

self._sidedatafile = None

328

self._nodemap_file = None

324

self._nodemap_file = None

329

self.postfix = postfix

325

self.postfix = postfix

330

self._trypending = trypending

326

self._trypending = trypending

331

self.opener = opener

327

self.opener = opener

332

if persistentnodemap:

328

if persistentnodemap:

333

self._nodemap_file = nodemaputil.get_nodemap_file(self)

329

self._nodemap_file = nodemaputil.get_nodemap_file(self)

334

330

335

assert target[0] in ALL_KINDS

331

assert target[0] in ALL_KINDS

336

assert len(target) == 2

332

assert len(target) == 2

337

self.target = target

333

self.target = target

338

# When True, indexfile is opened with checkambig=True at writing, to

334

# When True, indexfile is opened with checkambig=True at writing, to

339

# avoid file stat ambiguity.

335

# avoid file stat ambiguity.

340

self._checkambig = checkambig

336

self._checkambig = checkambig

341

self._mmaplargeindex = mmaplargeindex

337

self._mmaplargeindex = mmaplargeindex

342

self._censorable = censorable

338

self._censorable = censorable

343

# 3-tuple of (node, rev, text) for a raw revision.

339

# 3-tuple of (node, rev, text) for a raw revision.

344

self._revisioncache = None

340

self._revisioncache = None

345

# Maps rev to chain base rev.

341

# Maps rev to chain base rev.

346

self._chainbasecache = util.lrucachedict(100)

342

self._chainbasecache = util.lrucachedict(100)

347

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

343

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

348

self._chunkcache = (0, b'')

344

self._chunkcache = (0, b'')

349

# How much data to read and cache into the raw revlog data cache.

345

# How much data to read and cache into the raw revlog data cache.

350

self._chunkcachesize = 65536

346

self._chunkcachesize = 65536

351

self._maxchainlen = None

347

self._maxchainlen = None

352

self._deltabothparents = True

348

self._deltabothparents = True

353

self.index = None

349

self.index = None

354

self._docket = None

350

self._docket = None

355

self._nodemap_docket = None

351

self._nodemap_docket = None

356

# Mapping of partial identifiers to full nodes.

352

# Mapping of partial identifiers to full nodes.

357

self._pcache = {}

353

self._pcache = {}

358

# Mapping of revision integer to full node.

354

# Mapping of revision integer to full node.

359

self._compengine = b'zlib'

355

self._compengine = b'zlib'

360

self._compengineopts = {}

356

self._compengineopts = {}

361

self._maxdeltachainspan = -1

357

self._maxdeltachainspan = -1

362

self._withsparseread = False

358

self._withsparseread = False

363

self._sparserevlog = False

359

self._sparserevlog = False

364

self.hassidedata = False

360

self.hassidedata = False

365

self._srdensitythreshold = 0.50

361

self._srdensitythreshold = 0.50

366

self._srmingapsize = 262144

362

self._srmingapsize = 262144

367

363

368

# Make copy of flag processors so each revlog instance can support

364

# Make copy of flag processors so each revlog instance can support

369

# custom flags.

365

# custom flags.

370

self._flagprocessors = dict(flagutil.flagprocessors)

366

self._flagprocessors = dict(flagutil.flagprocessors)

371

367

372

# 3-tuple of file handles being used for active writing.

368

# 3-tuple of file handles being used for active writing.

373

self._writinghandles = None

369

self._writinghandles = None

374

# prevent nesting of addgroup

370

# prevent nesting of addgroup

375

self._adding_group = None

371

self._adding_group = None

376

372

377

self._loadindex()

373

self._loadindex()

378

374

379

self._concurrencychecker = concurrencychecker

375

self._concurrencychecker = concurrencychecker

380

376

381

def _init_opts(self):

377

def _init_opts(self):

382

"""process options (from above/config) to setup associated default revlog mode

378

"""process options (from above/config) to setup associated default revlog mode

383

379

384

These values might be affected when actually reading on disk information.

380

These values might be affected when actually reading on disk information.

385

381

386

The relevant values are returned for use in _loadindex().

382

The relevant values are returned for use in _loadindex().

387

383

388

* newversionflags:

384

* newversionflags:

389

version header to use if we need to create a new revlog

385

version header to use if we need to create a new revlog

390

386

391

* mmapindexthreshold:

387

* mmapindexthreshold:

392

minimal index size for start to use mmap

388

minimal index size for start to use mmap

393

389

394

* force_nodemap:

390

* force_nodemap:

395

force the usage of a "development" version of the nodemap code

391

force the usage of a "development" version of the nodemap code

396

"""

392

"""

397

mmapindexthreshold = None

393

mmapindexthreshold = None

398

opts = self.opener.options

394

opts = self.opener.options

399

395

400

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

396

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

401

new_header = CHANGELOGV2

397

new_header = CHANGELOGV2

402

elif b'revlogv2' in opts:

398

elif b'revlogv2' in opts:

403

new_header = REVLOGV2

399

new_header = REVLOGV2

404

elif b'revlogv1' in opts:

400

elif b'revlogv1' in opts:

405

new_header = REVLOGV1 | FLAG_INLINE_DATA

401

new_header = REVLOGV1 | FLAG_INLINE_DATA

406

if b'generaldelta' in opts:

402

if b'generaldelta' in opts:

407

new_header |= FLAG_GENERALDELTA

403

new_header |= FLAG_GENERALDELTA

408

elif b'revlogv0' in self.opener.options:

404

elif b'revlogv0' in self.opener.options:

409

new_header = REVLOGV0

405

new_header = REVLOGV0

410

else:

406

else:

411

new_header = REVLOG_DEFAULT_VERSION

407

new_header = REVLOG_DEFAULT_VERSION

412

408

413

if b'chunkcachesize' in opts:

409

if b'chunkcachesize' in opts:

414

self._chunkcachesize = opts[b'chunkcachesize']

410

self._chunkcachesize = opts[b'chunkcachesize']

415

if b'maxchainlen' in opts:

411

if b'maxchainlen' in opts:

416

self._maxchainlen = opts[b'maxchainlen']

412

self._maxchainlen = opts[b'maxchainlen']

417

if b'deltabothparents' in opts:

413

if b'deltabothparents' in opts:

418

self._deltabothparents = opts[b'deltabothparents']

414

self._deltabothparents = opts[b'deltabothparents']

419

self._lazydelta = bool(opts.get(b'lazydelta', True))

415

self._lazydelta = bool(opts.get(b'lazydelta', True))

420

self._lazydeltabase = False

416

self._lazydeltabase = False

421

if self._lazydelta:

417

if self._lazydelta:

422

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

418

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

423

if b'compengine' in opts:

419

if b'compengine' in opts:

424

self._compengine = opts[b'compengine']

420

self._compengine = opts[b'compengine']

425

if b'zlib.level' in opts:

421

if b'zlib.level' in opts:

426

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

422

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

427

if b'zstd.level' in opts:

423

if b'zstd.level' in opts:

428

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

424

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

429

if b'maxdeltachainspan' in opts:

425

if b'maxdeltachainspan' in opts:

430

self._maxdeltachainspan = opts[b'maxdeltachainspan']

426

self._maxdeltachainspan = opts[b'maxdeltachainspan']

431

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

427

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

432

mmapindexthreshold = opts[b'mmapindexthreshold']

428

mmapindexthreshold = opts[b'mmapindexthreshold']

433

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

429

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

434

withsparseread = bool(opts.get(b'with-sparse-read', False))

430

withsparseread = bool(opts.get(b'with-sparse-read', False))

435

# sparse-revlog forces sparse-read

431

# sparse-revlog forces sparse-read

436

self._withsparseread = self._sparserevlog or withsparseread

432

self._withsparseread = self._sparserevlog or withsparseread

437

if b'sparse-read-density-threshold' in opts:

433

if b'sparse-read-density-threshold' in opts:

438

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

434

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

439

if b'sparse-read-min-gap-size' in opts:

435

if b'sparse-read-min-gap-size' in opts:

440

self._srmingapsize = opts[b'sparse-read-min-gap-size']

436

self._srmingapsize = opts[b'sparse-read-min-gap-size']

441

if opts.get(b'enableellipsis'):

437

if opts.get(b'enableellipsis'):

442

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

438

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

443

439

444

# revlog v0 doesn't have flag processors

440

# revlog v0 doesn't have flag processors

445

for flag, processor in pycompat.iteritems(

441

for flag, processor in pycompat.iteritems(

446

opts.get(b'flagprocessors', {})

442

opts.get(b'flagprocessors', {})

447

):

443

):

448

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

444

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

449

445

450

if self._chunkcachesize <= 0:

446

if self._chunkcachesize <= 0:

451

raise error.RevlogError(

447

raise error.RevlogError(

452

_(b'revlog chunk cache size %r is not greater than 0')

448

_(b'revlog chunk cache size %r is not greater than 0')

453

% self._chunkcachesize

449

% self._chunkcachesize

454

)

450

)

455

elif self._chunkcachesize & (self._chunkcachesize - 1):

451

elif self._chunkcachesize & (self._chunkcachesize - 1):

456

raise error.RevlogError(

452

raise error.RevlogError(

457

_(b'revlog chunk cache size %r is not a power of 2')

453

_(b'revlog chunk cache size %r is not a power of 2')

458

% self._chunkcachesize

454

% self._chunkcachesize

459

)

455

)

460

force_nodemap = opts.get(b'devel-force-nodemap', False)

456

force_nodemap = opts.get(b'devel-force-nodemap', False)

461

return new_header, mmapindexthreshold, force_nodemap

457

return new_header, mmapindexthreshold, force_nodemap

462

458

463

def _get_data(self, filepath, mmap_threshold, size=None):

459

def _get_data(self, filepath, mmap_threshold, size=None):

464

"""return a file content with or without mmap

460

"""return a file content with or without mmap

465

461

466

If the file is missing return the empty string"""

462

If the file is missing return the empty string"""

467

try:

463

try:

468

with self.opener(filepath) as fp:

464

with self.opener(filepath) as fp:

469

if mmap_threshold is not None:

465

if mmap_threshold is not None:

470

file_size = self.opener.fstat(fp).st_size

466

file_size = self.opener.fstat(fp).st_size

471

if file_size >= mmap_threshold:

467

if file_size >= mmap_threshold:

472

if size is not None:

468

if size is not None:

473

# avoid potentiel mmap crash

469

# avoid potentiel mmap crash

474

size = min(file_size, size)

470

size = min(file_size, size)

475

# TODO: should .close() to release resources without

471

# TODO: should .close() to release resources without

476

# relying on Python GC

472

# relying on Python GC

477

if size is None:

473

if size is None:

478

return util.buffer(util.mmapread(fp))

474

return util.buffer(util.mmapread(fp))

479

else:

475

else:

480

return util.buffer(util.mmapread(fp, size))

476

return util.buffer(util.mmapread(fp, size))

481

if size is None:

477

if size is None:

482

return fp.read()

478

return fp.read()

483

else:

479

else:

484

return fp.read(size)

480

return fp.read(size)

485

except IOError as inst:

481

except IOError as inst:

486

if inst.errno != errno.ENOENT:

482

if inst.errno != errno.ENOENT:

487

raise

483

raise

488

return b''

484

return b''

489

485

490

def _loadindex(self, docket=None):

486

def _loadindex(self, docket=None):

491

487

492

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

488

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

493

489

494

if self.postfix is not None:

490

if self.postfix is not None:

495

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

491

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

496

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

492

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

497

entry_point = b'%s.i.a' % self.radix

493

entry_point = b'%s.i.a' % self.radix

498

else:

494

else:

499

entry_point = b'%s.i' % self.radix

495

entry_point = b'%s.i' % self.radix

500

496

501

if docket is not None:

497

if docket is not None:

502

self._docket = docket

498

self._docket = docket

503

self._docket_file = entry_point

499

self._docket_file = entry_point

504

else:

500

else:

505

entry_data = b''

501

entry_data = b''

506

self._initempty = True

502

self._initempty = True

507

entry_data = self._get_data(entry_point, mmapindexthreshold)

503

entry_data = self._get_data(entry_point, mmapindexthreshold)

508

if len(entry_data) > 0:

504

if len(entry_data) > 0:

509

header = INDEX_HEADER.unpack(entry_data[:4])[0]

505

header = INDEX_HEADER.unpack(entry_data[:4])[0]

510

self._initempty = False

506

self._initempty = False

511

else:

507

else:

512

header = new_header

508

header = new_header

513

509

514

self._format_flags = header & ~0xFFFF

510

self._format_flags = header & ~0xFFFF

515

self._format_version = header & 0xFFFF

511

self._format_version = header & 0xFFFF

516

512

517

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

513

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

518

if supported_flags is None:

514

if supported_flags is None:

519

msg = _(b'unknown version (%d) in revlog %s')

515

msg = _(b'unknown version (%d) in revlog %s')

520

msg %= (self._format_version, self.display_id)

516

msg %= (self._format_version, self.display_id)

521

raise error.RevlogError(msg)

517

raise error.RevlogError(msg)

522

elif self._format_flags & ~supported_flags:

518

elif self._format_flags & ~supported_flags:

523

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

519

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

524

display_flag = self._format_flags >> 16

520

display_flag = self._format_flags >> 16

525

msg %= (display_flag, self._format_version, self.display_id)

521

msg %= (display_flag, self._format_version, self.display_id)

526

raise error.RevlogError(msg)

522

raise error.RevlogError(msg)

527

523

528

features = FEATURES_BY_VERSION[self._format_version]

524

features = FEATURES_BY_VERSION[self._format_version]

529

self._inline = features[b'inline'](self._format_flags)

525

self._inline = features[b'inline'](self._format_flags)

530

self._generaldelta = features[b'generaldelta'](self._format_flags)

526

self._generaldelta = features[b'generaldelta'](self._format_flags)

531

self.hassidedata = features[b'sidedata']

527

self.hassidedata = features[b'sidedata']

532

528

533

if not features[b'docket']:

529

if not features[b'docket']:

534

self._indexfile = entry_point

530

self._indexfile = entry_point

535

index_data = entry_data

531

index_data = entry_data

536

else:

532

else:

537

self._docket_file = entry_point

533

self._docket_file = entry_point

538

if self._initempty:

534

if self._initempty:

539

self._docket = docketutil.default_docket(self, header)

535

self._docket = docketutil.default_docket(self, header)

540

else:

536

else:

541

self._docket = docketutil.parse_docket(

537

self._docket = docketutil.parse_docket(

542

self, entry_data, use_pending=self._trypending

538

self, entry_data, use_pending=self._trypending

543

)

539

)

544

540

545

if self._docket is not None:

541

if self._docket is not None:

546

self._indexfile = self._docket.index_filepath()

542

self._indexfile = self._docket.index_filepath()

547

index_data = b''

543

index_data = b''

548

index_size = self._docket.index_end

544

index_size = self._docket.index_end

549

if index_size > 0:

545

if index_size > 0:

550

index_data = self._get_data(

546

index_data = self._get_data(

551

self._indexfile, mmapindexthreshold, size=index_size

547

self._indexfile, mmapindexthreshold, size=index_size

552

)

548

)

553

if len(index_data) < index_size:

549

if len(index_data) < index_size:

554

msg = _(b'too few index data for %s: got %d, expected %d')

550

msg = _(b'too few index data for %s: got %d, expected %d')

555

msg %= (self.display_id, len(index_data), index_size)

551

msg %= (self.display_id, len(index_data), index_size)

556

raise error.RevlogError(msg)

552

raise error.RevlogError(msg)

557

553

558

self._inline = False

554

self._inline = False

559

# generaldelta implied by version 2 revlogs.

555

# generaldelta implied by version 2 revlogs.

560

self._generaldelta = True

556

self._generaldelta = True

561

# the logic for persistent nodemap will be dealt with within the

557

# the logic for persistent nodemap will be dealt with within the

562

# main docket, so disable it for now.

558

# main docket, so disable it for now.

563

self._nodemap_file = None

559

self._nodemap_file = None

564

560

565

if self._docket is not None:

561

if self._docket is not None:

566

self._datafile = self._docket.data_filepath()

562

self._datafile = self._docket.data_filepath()

567

self._sidedatafile = self._docket.sidedata_filepath()

563

self._sidedatafile = self._docket.sidedata_filepath()

568

elif self.postfix is None:

564

elif self.postfix is None:

569

self._datafile = b'%s.d' % self.radix

565

self._datafile = b'%s.d' % self.radix

570

else:

566

else:

571

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

567

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

572

568

573

self.nodeconstants = sha1nodeconstants

569

self.nodeconstants = sha1nodeconstants

574

self.nullid = self.nodeconstants.nullid

570

self.nullid = self.nodeconstants.nullid

575

571

576

# sparse-revlog can't be on without general-delta (issue6056)

572

# sparse-revlog can't be on without general-delta (issue6056)

577

if not self._generaldelta:

573

if not self._generaldelta:

578

self._sparserevlog = False

574

self._sparserevlog = False

579

575

580

self._storedeltachains = True

576

self._storedeltachains = True

581

577

582

devel_nodemap = (

578

devel_nodemap = (

583

self._nodemap_file

579

self._nodemap_file

584

and force_nodemap

580

and force_nodemap

585

and parse_index_v1_nodemap is not None

581

and parse_index_v1_nodemap is not None

586

)

582

)

587

583

588

use_rust_index = False

584

use_rust_index = False

589

if rustrevlog is not None:

585

if rustrevlog is not None:

590

if self._nodemap_file is not None:

586

if self._nodemap_file is not None:

591

use_rust_index = True

587

use_rust_index = True

592

else:

588

else:

593

use_rust_index = self.opener.options.get(b'rust.index')

589

use_rust_index = self.opener.options.get(b'rust.index')

594

590

595

self._parse_index = parse_index_v1

591

self._parse_index = parse_index_v1

596

if self._format_version == REVLOGV0:

592

if self._format_version == REVLOGV0:

597

self._parse_index = revlogv0.parse_index_v0

593

self._parse_index = revlogv0.parse_index_v0

598

elif self._format_version == REVLOGV2:

594

elif self._format_version == REVLOGV2:

599

self._parse_index = parse_index_v2

595

self._parse_index = parse_index_v2

600

elif self._format_version == CHANGELOGV2:

596

elif self._format_version == CHANGELOGV2:

601

self._parse_index = parse_index_cl_v2

597

self._parse_index = parse_index_cl_v2

602

elif devel_nodemap:

598

elif devel_nodemap:

603

self._parse_index = parse_index_v1_nodemap

599

self._parse_index = parse_index_v1_nodemap

604

elif use_rust_index:

600

elif use_rust_index:

605

self._parse_index = parse_index_v1_mixed

601

self._parse_index = parse_index_v1_mixed

606

try:

602

try:

607

d = self._parse_index(index_data, self._inline)

603

d = self._parse_index(index_data, self._inline)

608

index, _chunkcache = d

604

index, chunkcache = d

609

use_nodemap = (

605

use_nodemap = (

610

not self._inline

606

not self._inline

611

and self._nodemap_file is not None

607

and self._nodemap_file is not None

612

and util.safehasattr(index, 'update_nodemap_data')

608

and util.safehasattr(index, 'update_nodemap_data')

613

)

609

)

614

if use_nodemap:

610

if use_nodemap:

615

nodemap_data = nodemaputil.persisted_data(self)

611

nodemap_data = nodemaputil.persisted_data(self)

616

if nodemap_data is not None:

612

if nodemap_data is not None:

617

docket = nodemap_data[0]

613

docket = nodemap_data[0]

618

if (

614

if (

619

len(d[0]) > docket.tip_rev

615

len(d[0]) > docket.tip_rev

620

and d[0][docket.tip_rev][7] == docket.tip_node

616

and d[0][docket.tip_rev][7] == docket.tip_node

621

):

617

):

622

# no changelog tampering

618

# no changelog tampering

623

self._nodemap_docket = docket

619

self._nodemap_docket = docket

624

index.update_nodemap_data(*nodemap_data)

620

index.update_nodemap_data(*nodemap_data)

625

except (ValueError, IndexError):

621

except (ValueError, IndexError):

626

raise error.RevlogError(

622

raise error.RevlogError(

627

_(b"index %s is corrupted") % self.display_id

623

_(b"index %s is corrupted") % self.display_id

628

)

624

)

629

self.index, ~~self~~.~~_chunkcache~~ = d

625

self.index = index

630

if not self._chunkcache:

626

self._segmentfile = randomaccessfile.randomaccessfile(

631

self.~~_chunkclear~~()

627

self.opener,

628

(self._indexfile if self._inline else self._datafile),

629

self._chunkcachesize,

630

chunkcache,

631

)

632

# revnum -> (chain-length, sum-delta-length)

632

# revnum -> (chain-length, sum-delta-length)

633

self._chaininfocache = util.lrucachedict(500)

633

self._chaininfocache = util.lrucachedict(500)

634

# revlog header -> revlog compressor

634

# revlog header -> revlog compressor

635

self._decompressors = {}

635

self._decompressors = {}

636

637

@util.propertycache

637

@util.propertycache

638

def revlog_kind(self):

638

def revlog_kind(self):

639

return self.target[0]

639

return self.target[0]

640

641

@util.propertycache

641

@util.propertycache

642

def display_id(self):

642

def display_id(self):

643

"""The public facing "ID" of the revlog that we use in message"""

643

"""The public facing "ID" of the revlog that we use in message"""

644

# Maybe we should build a user facing representation of

644

# Maybe we should build a user facing representation of

645

# revlog.target instead of using `self.radix`

645

# revlog.target instead of using `self.radix`

646

return self.radix

646

return self.radix

647

648

def _get_decompressor(self, t):

648

def _get_decompressor(self, t):

649

try:

649

try:

650

compressor = self._decompressors[t]

650

compressor = self._decompressors[t]

651

except KeyError:

651

except KeyError:

652

try:

652

try:

653

engine = util.compengines.forrevlogheader(t)

653

engine = util.compengines.forrevlogheader(t)

654

compressor = engine.revlogcompressor(self._compengineopts)

654

compressor = engine.revlogcompressor(self._compengineopts)

655

self._decompressors[t] = compressor

655

self._decompressors[t] = compressor

656

except KeyError:

656

except KeyError:

657

raise error.RevlogError(

657

raise error.RevlogError(

658

_(b'unknown compression type %s') % binascii.hexlify(t)

658

_(b'unknown compression type %s') % binascii.hexlify(t)

659

)

659

)

660

return compressor

660

return compressor

661

662

@util.propertycache

662

@util.propertycache

663

def _compressor(self):

663

def _compressor(self):

664

engine = util.compengines[self._compengine]

664

engine = util.compengines[self._compengine]

665

return engine.revlogcompressor(self._compengineopts)

665

return engine.revlogcompressor(self._compengineopts)

666

667

@util.propertycache

667

@util.propertycache

668

def _decompressor(self):

668

def _decompressor(self):

669

"""the default decompressor"""

669

"""the default decompressor"""

670

if self._docket is None:

670

if self._docket is None:

671

return None

671

return None

672

t = self._docket.default_compression_header

672

t = self._docket.default_compression_header

673

c = self._get_decompressor(t)

673

c = self._get_decompressor(t)

674

return c.decompress

674

return c.decompress

675

676

def _indexfp(self):

676

def _indexfp(self):

677

"""file object for the revlog's index file"""

677

"""file object for the revlog's index file"""

678

return self.opener(self._indexfile, mode=b"r")

678

return self.opener(self._indexfile, mode=b"r")

679

680

def __index_write_fp(self):

680

def __index_write_fp(self):

681

# You should not use this directly and use `_writing` instead

681

# You should not use this directly and use `_writing` instead

682

try:

682

try:

683

f = self.opener(

683

f = self.opener(

684

self._indexfile, mode=b"r+", checkambig=self._checkambig

684

self._indexfile, mode=b"r+", checkambig=self._checkambig

685

)

685

)

686

if self._docket is None:

686

if self._docket is None:

687

f.seek(0, os.SEEK_END)

687

f.seek(0, os.SEEK_END)

688

else:

688

else:

689

f.seek(self._docket.index_end, os.SEEK_SET)

689

f.seek(self._docket.index_end, os.SEEK_SET)

690

return f

690

return f

691

except IOError as inst:

691

except IOError as inst:

692

if inst.errno != errno.ENOENT:

692

if inst.errno != errno.ENOENT:

693

raise

693

raise

694

return self.opener(

694

return self.opener(

695

self._indexfile, mode=b"w+", checkambig=self._checkambig

695

self._indexfile, mode=b"w+", checkambig=self._checkambig

696

)

696

)

697

698

def __index_new_fp(self):

698

def __index_new_fp(self):

699

# You should not use this unless you are upgrading from inline revlog

699

# You should not use this unless you are upgrading from inline revlog

700

return self.opener(

700

return self.opener(

701

self._indexfile,

701

self._indexfile,

702

mode=b"w",

702

mode=b"w",

703

checkambig=self._checkambig,

703

checkambig=self._checkambig,

704

atomictemp=True,

704

atomictemp=True,

705

)

705

)

706

707

def _datafp(self, mode=b'r'):

707

def _datafp(self, mode=b'r'):

708

"""file object for the revlog's data file"""

708

"""file object for the revlog's data file"""

709

return self.opener(self._datafile, mode=mode)

709

return self.opener(self._datafile, mode=mode)

710

711

@contextlib.contextmanager

711

@contextlib.contextmanager

712

def _datareadfp(self, existingfp=None):

713

"""file object suitable to read data"""

714

# Use explicit file handle, if given.

715

if existingfp is not None:

716

yield existingfp

717

718

# Use a file handle being actively used for writes, if available.

719

# There is some danger to doing this because reads will seek the

720

# file. However, _writeentry() performs a SEEK_END before all writes,

721

# so we should be safe.

722

elif self._writinghandles:

723

if self._inline:

724

yield self._writinghandles[0]

725

else:

726

yield self._writinghandles[1]

727

728

# Otherwise open a new file handle.

729

else:

730

if self._inline:

731

func = self._indexfp

732

else:

733

func = self._datafp

734

with func() as fp:

735

yield fp

736

737

@contextlib.contextmanager

738

def _sidedatareadfp(self):

712

def _sidedatareadfp(self):

739

"""file object suitable to read sidedata"""

713

"""file object suitable to read sidedata"""

740

if self._writinghandles:

714

if self._writinghandles:

741

yield self._writinghandles[2]

715

yield self._writinghandles[2]

742

else:

716

else:

743

with self.opener(self._sidedatafile) as fp:

717

with self.opener(self._sidedatafile) as fp:

744

yield fp

718

yield fp

745

719

746

def tiprev(self):

720

def tiprev(self):

747

return len(self.index) - 1

721

return len(self.index) - 1

748

722

749

def tip(self):

723

def tip(self):

750

return self.node(self.tiprev())

724

return self.node(self.tiprev())

751

725

752

def __contains__(self, rev):

726

def __contains__(self, rev):

753

return 0 <= rev < len(self)

727

return 0 <= rev < len(self)

754

728

755

def __len__(self):

729

def __len__(self):

756

return len(self.index)

730

return len(self.index)

757

731

758

def __iter__(self):

732

def __iter__(self):

759

return iter(pycompat.xrange(len(self)))

733

return iter(pycompat.xrange(len(self)))

760

734

761

def revs(self, start=0, stop=None):

735

def revs(self, start=0, stop=None):

762

"""iterate over all rev in this revlog (from start to stop)"""

736

"""iterate over all rev in this revlog (from start to stop)"""

763

return storageutil.iterrevs(len(self), start=start, stop=stop)

737

return storageutil.iterrevs(len(self), start=start, stop=stop)

764

738

765

@property

739

@property

766

def nodemap(self):

740

def nodemap(self):

767

msg = (

741

msg = (

768

b"revlog.nodemap is deprecated, "

742

b"revlog.nodemap is deprecated, "

769

b"use revlog.index.[has_node|rev|get_rev]"

743

b"use revlog.index.[has_node|rev|get_rev]"

770

)

744

)

771

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

745

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

772

return self.index.nodemap

746

return self.index.nodemap

773

747

774

@property

748

@property

775

def _nodecache(self):

749

def _nodecache(self):

776

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

750

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

777

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

751

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

778

return self.index.nodemap

752

return self.index.nodemap

779

753

780

def hasnode(self, node):

754

def hasnode(self, node):

781

try:

755

try:

782

self.rev(node)

756

self.rev(node)

783

return True

757

return True

784

except KeyError:

758

except KeyError:

785

return False

759

return False

786

760

787

def candelta(self, baserev, rev):

761

def candelta(self, baserev, rev):

788

"""whether two revisions (baserev, rev) can be delta-ed or not"""

762

"""whether two revisions (baserev, rev) can be delta-ed or not"""

789

# Disable delta if either rev requires a content-changing flag

763

# Disable delta if either rev requires a content-changing flag

790

# processor (ex. LFS). This is because such flag processor can alter

764

# processor (ex. LFS). This is because such flag processor can alter

791

# the rawtext content that the delta will be based on, and two clients

765

# the rawtext content that the delta will be based on, and two clients

792

# could have a same revlog node with different flags (i.e. different

766

# could have a same revlog node with different flags (i.e. different

793

# rawtext contents) and the delta could be incompatible.

767

# rawtext contents) and the delta could be incompatible.

794

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

768

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

795

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

769

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

796

):

770

):

797

return False

771

return False

798

return True

772

return True

799

773

800

def update_caches(self, transaction):

774

def update_caches(self, transaction):

801

if self._nodemap_file is not None:

775

if self._nodemap_file is not None:

802

if transaction is None:

776

if transaction is None:

803

nodemaputil.update_persistent_nodemap(self)

777

nodemaputil.update_persistent_nodemap(self)

804

else:

778

else:

805

nodemaputil.setup_persistent_nodemap(transaction, self)

779

nodemaputil.setup_persistent_nodemap(transaction, self)

806

780

807

def clearcaches(self):

781

def clearcaches(self):

808

self._revisioncache = None

782

self._revisioncache = None

809

self._chainbasecache.clear()

783

self._chainbasecache.clear()

810

self._chunkcache = (0, b'')

784

self._segmentfile.clear_cache()

811

self._pcache = {}

785

self._pcache = {}

812

self._nodemap_docket = None

786

self._nodemap_docket = None

813

self.index.clearcaches()

787

self.index.clearcaches()

814

# The python code is the one responsible for validating the docket, we

788

# The python code is the one responsible for validating the docket, we

815

# end up having to refresh it here.

789

# end up having to refresh it here.

816

use_nodemap = (

790

use_nodemap = (

817

not self._inline

791

not self._inline

818

and self._nodemap_file is not None

792

and self._nodemap_file is not None

819

and util.safehasattr(self.index, 'update_nodemap_data')

793

and util.safehasattr(self.index, 'update_nodemap_data')

820

)

794

)

821

if use_nodemap:

795

if use_nodemap:

822

nodemap_data = nodemaputil.persisted_data(self)

796

nodemap_data = nodemaputil.persisted_data(self)

823

if nodemap_data is not None:

797

if nodemap_data is not None:

824

self._nodemap_docket = nodemap_data[0]

798

self._nodemap_docket = nodemap_data[0]

825

self.index.update_nodemap_data(*nodemap_data)

799

self.index.update_nodemap_data(*nodemap_data)

826

800

827

def rev(self, node):

801

def rev(self, node):

828

try:

802

try:

829

return self.index.rev(node)

803

return self.index.rev(node)

830

except TypeError:

804

except TypeError:

831

raise

805

raise

832

except error.RevlogError:

806

except error.RevlogError:

833

# parsers.c radix tree lookup failed

807

# parsers.c radix tree lookup failed

834

if (

808

if (

835

node == self.nodeconstants.wdirid

809

node == self.nodeconstants.wdirid

836

or node in self.nodeconstants.wdirfilenodeids

810

or node in self.nodeconstants.wdirfilenodeids

837

):

811

):

838

raise error.WdirUnsupported

812

raise error.WdirUnsupported

839

raise error.LookupError(node, self.display_id, _(b'no node'))

813

raise error.LookupError(node, self.display_id, _(b'no node'))

840

814

841

# Accessors for index entries.

815

# Accessors for index entries.

842

816

843

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

817

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

844

# are flags.

818

# are flags.

845

def start(self, rev):

819

def start(self, rev):

846

return int(self.index[rev][0] >> 16)

820

return int(self.index[rev][0] >> 16)

847

821

848

def sidedata_cut_off(self, rev):

822

def sidedata_cut_off(self, rev):

849

sd_cut_off = self.index[rev][8]

823

sd_cut_off = self.index[rev][8]

850

if sd_cut_off != 0:

824

if sd_cut_off != 0:

851

return sd_cut_off

825

return sd_cut_off

852

# This is some annoying dance, because entries without sidedata

826

# This is some annoying dance, because entries without sidedata

853

# currently use 0 as their ofsset. (instead of previous-offset +

827

# currently use 0 as their ofsset. (instead of previous-offset +

854

# previous-size)

828

# previous-size)

855

#

829

#

856

# We should reconsider this sidedata → 0 sidata_offset policy.

830

# We should reconsider this sidedata → 0 sidata_offset policy.

857

# In the meantime, we need this.

831

# In the meantime, we need this.

858

while 0 <= rev:

832

while 0 <= rev:

859

e = self.index[rev]

833

e = self.index[rev]

860

if e[9] != 0:

834

if e[9] != 0:

861

return e[8] + e[9]

835

return e[8] + e[9]

862

rev -= 1

836

rev -= 1

863

return 0

837

return 0

864

838

865

def flags(self, rev):

839

def flags(self, rev):

866

return self.index[rev][0] & 0xFFFF

840

return self.index[rev][0] & 0xFFFF

867

841

868

def length(self, rev):

842

def length(self, rev):

869

return self.index[rev][1]

843

return self.index[rev][1]

870

844

871

def sidedata_length(self, rev):

845

def sidedata_length(self, rev):

872

if not self.hassidedata:

846

if not self.hassidedata:

873

return 0

847

return 0

874

return self.index[rev][9]

848

return self.index[rev][9]

875

849

876

def rawsize(self, rev):

850

def rawsize(self, rev):

877

"""return the length of the uncompressed text for a given revision"""

851

"""return the length of the uncompressed text for a given revision"""

878

l = self.index[rev][2]

852

l = self.index[rev][2]

879

if l >= 0:

853

if l >= 0:

880

return l

854

return l

881

855

882

t = self.rawdata(rev)

856

t = self.rawdata(rev)

883

return len(t)

857

return len(t)

884

858

885

def size(self, rev):

859

def size(self, rev):

886

"""length of non-raw text (processed by a "read" flag processor)"""

860

"""length of non-raw text (processed by a "read" flag processor)"""

887

# fast path: if no "read" flag processor could change the content,

861

# fast path: if no "read" flag processor could change the content,

888

# size is rawsize. note: ELLIPSIS is known to not change the content.

862

# size is rawsize. note: ELLIPSIS is known to not change the content.

889

flags = self.flags(rev)

863

flags = self.flags(rev)

890

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

864

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

891

return self.rawsize(rev)

865

return self.rawsize(rev)

892

866

893

return len(self.revision(rev, raw=False))

867

return len(self.revision(rev, raw=False))

894

868

895

def chainbase(self, rev):

869

def chainbase(self, rev):

896

base = self._chainbasecache.get(rev)

870

base = self._chainbasecache.get(rev)

897

if base is not None:

871

if base is not None:

898

return base

872

return base

899

873

900

index = self.index

874

index = self.index

901

iterrev = rev

875

iterrev = rev

902

base = index[iterrev][3]

876

base = index[iterrev][3]

903

while base != iterrev:

877

while base != iterrev:

904

iterrev = base

878

iterrev = base

905

base = index[iterrev][3]

879

base = index[iterrev][3]

906

880

907

self._chainbasecache[rev] = base

881

self._chainbasecache[rev] = base

908

return base

882

return base

909

883

910

def linkrev(self, rev):

884

def linkrev(self, rev):

911

return self.index[rev][4]

885

return self.index[rev][4]

912

886

913

def parentrevs(self, rev):

887

def parentrevs(self, rev):

914

try:

888

try:

915

entry = self.index[rev]

889

entry = self.index[rev]

916

except IndexError:

890

except IndexError:

917

if rev == wdirrev:

891

if rev == wdirrev:

918

raise error.WdirUnsupported

892

raise error.WdirUnsupported

919

raise

893

raise

920

if entry[5] == nullrev:

894

if entry[5] == nullrev:

921

return entry[6], entry[5]

895

return entry[6], entry[5]

922

else:

896

else:

923

return entry[5], entry[6]

897

return entry[5], entry[6]

924

898

925

# fast parentrevs(rev) where rev isn't filtered

899

# fast parentrevs(rev) where rev isn't filtered

926

_uncheckedparentrevs = parentrevs

900

_uncheckedparentrevs = parentrevs

927

901

928

def node(self, rev):

902

def node(self, rev):

929

try:

903

try:

930

return self.index[rev][7]

904

return self.index[rev][7]

931

except IndexError:

905

except IndexError:

932

if rev == wdirrev:

906

if rev == wdirrev:

933

raise error.WdirUnsupported

907

raise error.WdirUnsupported

934

raise

908

raise

935

909

936

# Derived from index values.

910

# Derived from index values.

937

911

938

def end(self, rev):

912

def end(self, rev):

939

return self.start(rev) + self.length(rev)

913

return self.start(rev) + self.length(rev)

940

914

941

def parents(self, node):

915

def parents(self, node):

942

i = self.index

916

i = self.index

943

d = i[self.rev(node)]

917

d = i[self.rev(node)]

944

# inline node() to avoid function call overhead

918

# inline node() to avoid function call overhead

945

if d[5] == self.nullid:

919

if d[5] == self.nullid:

946

return i[d[6]][7], i[d[5]][7]

920

return i[d[6]][7], i[d[5]][7]

947

else:

921

else:

948

return i[d[5]][7], i[d[6]][7]

922

return i[d[5]][7], i[d[6]][7]

949

923

950

def chainlen(self, rev):

924

def chainlen(self, rev):

951

return self._chaininfo(rev)[0]

925

return self._chaininfo(rev)[0]

952

926

953

def _chaininfo(self, rev):

927

def _chaininfo(self, rev):

954

chaininfocache = self._chaininfocache

928

chaininfocache = self._chaininfocache

955

if rev in chaininfocache:

929

if rev in chaininfocache:

956

return chaininfocache[rev]

930

return chaininfocache[rev]

957

index = self.index

931

index = self.index

958

generaldelta = self._generaldelta

932

generaldelta = self._generaldelta

959

iterrev = rev

933

iterrev = rev

960

e = index[iterrev]

934

e = index[iterrev]

961

clen = 0

935

clen = 0

962

compresseddeltalen = 0

936

compresseddeltalen = 0

963

while iterrev != e[3]:

937

while iterrev != e[3]:

964

clen += 1

938

clen += 1

965

compresseddeltalen += e[1]

939

compresseddeltalen += e[1]

966

if generaldelta:

940

if generaldelta:

967

iterrev = e[3]

941

iterrev = e[3]

968

else:

942

else:

969

iterrev -= 1

943

iterrev -= 1

970

if iterrev in chaininfocache:

944

if iterrev in chaininfocache:

971

t = chaininfocache[iterrev]

945

t = chaininfocache[iterrev]

972

clen += t[0]

946

clen += t[0]

973

compresseddeltalen += t[1]

947

compresseddeltalen += t[1]

974

break

948

break

975

e = index[iterrev]

949

e = index[iterrev]

976

else:

950

else:

977

# Add text length of base since decompressing that also takes

951

# Add text length of base since decompressing that also takes

978

# work. For cache hits the length is already included.

952

# work. For cache hits the length is already included.

979

compresseddeltalen += e[1]

953

compresseddeltalen += e[1]

980

r = (clen, compresseddeltalen)

954

r = (clen, compresseddeltalen)

981

chaininfocache[rev] = r

955

chaininfocache[rev] = r

982

return r

956

return r

983

957

984

def _deltachain(self, rev, stoprev=None):

958

def _deltachain(self, rev, stoprev=None):

985

"""Obtain the delta chain for a revision.

959

"""Obtain the delta chain for a revision.

986

960

987

``stoprev`` specifies a revision to stop at. If not specified, we

961

``stoprev`` specifies a revision to stop at. If not specified, we

988

stop at the base of the chain.

962

stop at the base of the chain.

989

963

990

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

964

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

991

revs in ascending order and ``stopped`` is a bool indicating whether

965

revs in ascending order and ``stopped`` is a bool indicating whether

992

``stoprev`` was hit.

966

``stoprev`` was hit.

993

"""

967

"""

994

# Try C implementation.

968

# Try C implementation.

995

try:

969

try:

996

return self.index.deltachain(rev, stoprev, self._generaldelta)

970

return self.index.deltachain(rev, stoprev, self._generaldelta)

997

except AttributeError:

971

except AttributeError:

998

pass

972

pass

999

973

1000

chain = []

974

chain = []

1001

975

1002

# Alias to prevent attribute lookup in tight loop.

976

# Alias to prevent attribute lookup in tight loop.

1003

index = self.index

977

index = self.index

1004

generaldelta = self._generaldelta

978

generaldelta = self._generaldelta

1005

979

1006

iterrev = rev

980

iterrev = rev

1007

e = index[iterrev]

981

e = index[iterrev]

1008

while iterrev != e[3] and iterrev != stoprev:

982

while iterrev != e[3] and iterrev != stoprev:

1009

chain.append(iterrev)

983

chain.append(iterrev)

1010

if generaldelta:

984

if generaldelta:

1011

iterrev = e[3]

985

iterrev = e[3]

1012

else:

986

else:

1013

iterrev -= 1

987

iterrev -= 1

1014

e = index[iterrev]

988

e = index[iterrev]

1015

989

1016

if iterrev == stoprev:

990

if iterrev == stoprev:

1017

stopped = True

991

stopped = True

1018

else:

992

else:

1019

chain.append(iterrev)

993

chain.append(iterrev)

1020

stopped = False

994

stopped = False

1021

995

1022

chain.reverse()

996

chain.reverse()

1023

return chain, stopped

997

return chain, stopped

1024

998

1025

def ancestors(self, revs, stoprev=0, inclusive=False):

999

def ancestors(self, revs, stoprev=0, inclusive=False):

1026

"""Generate the ancestors of 'revs' in reverse revision order.

1000

"""Generate the ancestors of 'revs' in reverse revision order.

1027

Does not generate revs lower than stoprev.

1001

Does not generate revs lower than stoprev.

1028

1002

1029

See the documentation for ancestor.lazyancestors for more details."""

1003

See the documentation for ancestor.lazyancestors for more details."""

1030

1004

1031

# first, make sure start revisions aren't filtered

1005

# first, make sure start revisions aren't filtered

1032

revs = list(revs)

1006

revs = list(revs)

1033

checkrev = self.node

1007

checkrev = self.node

1034

for r in revs:

1008

for r in revs:

1035

checkrev(r)

1009

checkrev(r)

1036

# and we're sure ancestors aren't filtered as well

1010

# and we're sure ancestors aren't filtered as well

1037

1011

1038

if rustancestor is not None and self.index.rust_ext_compat:

1012

if rustancestor is not None and self.index.rust_ext_compat:

1039

lazyancestors = rustancestor.LazyAncestors

1013

lazyancestors = rustancestor.LazyAncestors

1040

arg = self.index

1014

arg = self.index

1041

else:

1015

else:

1042

lazyancestors = ancestor.lazyancestors

1016

lazyancestors = ancestor.lazyancestors

1043

arg = self._uncheckedparentrevs

1017

arg = self._uncheckedparentrevs

1044

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1018

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1045

1019

1046

def descendants(self, revs):

1020

def descendants(self, revs):

1047

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1021

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1048

1022

1049

def findcommonmissing(self, common=None, heads=None):

1023

def findcommonmissing(self, common=None, heads=None):

1050

"""Return a tuple of the ancestors of common and the ancestors of heads

1024

"""Return a tuple of the ancestors of common and the ancestors of heads

1051

that are not ancestors of common. In revset terminology, we return the

1025

that are not ancestors of common. In revset terminology, we return the

1052

tuple:

1026

tuple:

1053

1027

1054

::common, (::heads) - (::common)

1028

::common, (::heads) - (::common)

1055

1029

1056

The list is sorted by revision number, meaning it is

1030

The list is sorted by revision number, meaning it is

1057

topologically sorted.

1031

topologically sorted.

1058

1032

1059

'heads' and 'common' are both lists of node IDs. If heads is

1033

'heads' and 'common' are both lists of node IDs. If heads is

1060

not supplied, uses all of the revlog's heads. If common is not

1034

not supplied, uses all of the revlog's heads. If common is not

1061

supplied, uses nullid."""

1035

supplied, uses nullid."""

1062

if common is None:

1036

if common is None:

1063

common = [self.nullid]

1037

common = [self.nullid]

1064

if heads is None:

1038

if heads is None:

1065

heads = self.heads()

1039

heads = self.heads()

1066

1040

1067

common = [self.rev(n) for n in common]

1041

common = [self.rev(n) for n in common]

1068

heads = [self.rev(n) for n in heads]

1042

heads = [self.rev(n) for n in heads]

1069

1043

1070

# we want the ancestors, but inclusive

1044

# we want the ancestors, but inclusive

1071

class lazyset(object):

1045

class lazyset(object):

1072

def __init__(self, lazyvalues):

1046

def __init__(self, lazyvalues):

1073

self.addedvalues = set()

1047

self.addedvalues = set()

1074

self.lazyvalues = lazyvalues

1048

self.lazyvalues = lazyvalues

1075

1049

1076

def __contains__(self, value):

1050

def __contains__(self, value):

1077

return value in self.addedvalues or value in self.lazyvalues

1051

return value in self.addedvalues or value in self.lazyvalues

1078

1052

1079

def __iter__(self):

1053

def __iter__(self):

1080

added = self.addedvalues

1054

added = self.addedvalues

1081

for r in added:

1055

for r in added:

1082

yield r

1056

yield r

1083

for r in self.lazyvalues:

1057

for r in self.lazyvalues:

1084

if not r in added:

1058

if not r in added:

1085

yield r

1059

yield r

1086

1060

1087

def add(self, value):

1061

def add(self, value):

1088

self.addedvalues.add(value)

1062

self.addedvalues.add(value)

1089

1063

1090

def update(self, values):

1064

def update(self, values):

1091

self.addedvalues.update(values)

1065

self.addedvalues.update(values)

1092

1066

1093

has = lazyset(self.ancestors(common))

1067

has = lazyset(self.ancestors(common))

1094

has.add(nullrev)

1068

has.add(nullrev)

1095

has.update(common)

1069

has.update(common)

1096

1070

1097

# take all ancestors from heads that aren't in has

1071

# take all ancestors from heads that aren't in has

1098

missing = set()

1072

missing = set()

1099

visit = collections.deque(r for r in heads if r not in has)

1073

visit = collections.deque(r for r in heads if r not in has)

1100

while visit:

1074

while visit:

1101

r = visit.popleft()

1075

r = visit.popleft()

1102

if r in missing:

1076

if r in missing:

1103

continue

1077

continue

1104

else:

1078

else:

1105

missing.add(r)

1079

missing.add(r)

1106

for p in self.parentrevs(r):

1080

for p in self.parentrevs(r):

1107

if p not in has:

1081

if p not in has:

1108

visit.append(p)

1082

visit.append(p)

1109

missing = list(missing)

1083

missing = list(missing)

1110

missing.sort()

1084

missing.sort()

1111

return has, [self.node(miss) for miss in missing]

1085

return has, [self.node(miss) for miss in missing]

1112

1086

1113

def incrementalmissingrevs(self, common=None):

1087

def incrementalmissingrevs(self, common=None):

1114

"""Return an object that can be used to incrementally compute the

1088

"""Return an object that can be used to incrementally compute the

1115

revision numbers of the ancestors of arbitrary sets that are not

1089

revision numbers of the ancestors of arbitrary sets that are not

1116

ancestors of common. This is an ancestor.incrementalmissingancestors

1090

ancestors of common. This is an ancestor.incrementalmissingancestors

1117

object.

1091

object.

1118

1092

1119

'common' is a list of revision numbers. If common is not supplied, uses

1093

'common' is a list of revision numbers. If common is not supplied, uses

1120

nullrev.

1094

nullrev.

1121

"""

1095

"""

1122

if common is None:

1096

if common is None:

1123

common = [nullrev]

1097

common = [nullrev]

1124

1098

1125

if rustancestor is not None and self.index.rust_ext_compat:

1099

if rustancestor is not None and self.index.rust_ext_compat:

1126

return rustancestor.MissingAncestors(self.index, common)

1100

return rustancestor.MissingAncestors(self.index, common)

1127

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1101

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1128

1102

1129

def findmissingrevs(self, common=None, heads=None):

1103

def findmissingrevs(self, common=None, heads=None):

1130

"""Return the revision numbers of the ancestors of heads that

1104

"""Return the revision numbers of the ancestors of heads that

1131

are not ancestors of common.

1105

are not ancestors of common.

1132

1106

1133

More specifically, return a list of revision numbers corresponding to

1107

More specifically, return a list of revision numbers corresponding to

1134

nodes N such that every N satisfies the following constraints:

1108

nodes N such that every N satisfies the following constraints:

1135

1109

1136

1. N is an ancestor of some node in 'heads'

1110

1. N is an ancestor of some node in 'heads'

1137

2. N is not an ancestor of any node in 'common'

1111

2. N is not an ancestor of any node in 'common'

1138

1112

1139

The list is sorted by revision number, meaning it is

1113

The list is sorted by revision number, meaning it is

1140

topologically sorted.

1114

topologically sorted.

1141

1115

1142

'heads' and 'common' are both lists of revision numbers. If heads is

1116

'heads' and 'common' are both lists of revision numbers. If heads is

1143

not supplied, uses all of the revlog's heads. If common is not

1117

not supplied, uses all of the revlog's heads. If common is not

1144

supplied, uses nullid."""

1118

supplied, uses nullid."""

1145

if common is None:

1119

if common is None:

1146

common = [nullrev]

1120

common = [nullrev]

1147

if heads is None:

1121

if heads is None:

1148

heads = self.headrevs()

1122

heads = self.headrevs()

1149

1123

1150

inc = self.incrementalmissingrevs(common=common)

1124

inc = self.incrementalmissingrevs(common=common)

1151

return inc.missingancestors(heads)

1125

return inc.missingancestors(heads)

1152

1126

1153

def findmissing(self, common=None, heads=None):

1127

def findmissing(self, common=None, heads=None):

1154

"""Return the ancestors of heads that are not ancestors of common.

1128

"""Return the ancestors of heads that are not ancestors of common.

1155

1129

1156

More specifically, return a list of nodes N such that every N

1130

More specifically, return a list of nodes N such that every N

1157

satisfies the following constraints:

1131

satisfies the following constraints:

1158

1132

1159

1. N is an ancestor of some node in 'heads'

1133

1. N is an ancestor of some node in 'heads'

1160

2. N is not an ancestor of any node in 'common'

1134

2. N is not an ancestor of any node in 'common'

1161

1135

1162

The list is sorted by revision number, meaning it is

1136

The list is sorted by revision number, meaning it is

1163

topologically sorted.

1137

topologically sorted.

1164

1138

1165

'heads' and 'common' are both lists of node IDs. If heads is

1139

'heads' and 'common' are both lists of node IDs. If heads is

1166

not supplied, uses all of the revlog's heads. If common is not

1140

not supplied, uses all of the revlog's heads. If common is not

1167

supplied, uses nullid."""

1141

supplied, uses nullid."""

1168

if common is None:

1142

if common is None:

1169

common = [self.nullid]

1143

common = [self.nullid]

1170

if heads is None:

1144

if heads is None:

1171

heads = self.heads()

1145

heads = self.heads()

1172

1146

1173

common = [self.rev(n) for n in common]

1147

common = [self.rev(n) for n in common]

1174

heads = [self.rev(n) for n in heads]

1148

heads = [self.rev(n) for n in heads]

1175

1149

1176

inc = self.incrementalmissingrevs(common=common)

1150

inc = self.incrementalmissingrevs(common=common)

1177

return [self.node(r) for r in inc.missingancestors(heads)]

1151

return [self.node(r) for r in inc.missingancestors(heads)]

1178

1152

1179

def nodesbetween(self, roots=None, heads=None):

1153

def nodesbetween(self, roots=None, heads=None):

1180

"""Return a topological path from 'roots' to 'heads'.

1154

"""Return a topological path from 'roots' to 'heads'.

1181

1155

1182

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1156

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1183

topologically sorted list of all nodes N that satisfy both of

1157

topologically sorted list of all nodes N that satisfy both of

1184

these constraints:

1158

these constraints:

1185

1159

1186

1. N is a descendant of some node in 'roots'

1160

1. N is a descendant of some node in 'roots'

1187

2. N is an ancestor of some node in 'heads'

1161

2. N is an ancestor of some node in 'heads'

1188

1162

1189

Every node is considered to be both a descendant and an ancestor

1163

Every node is considered to be both a descendant and an ancestor

1190

of itself, so every reachable node in 'roots' and 'heads' will be

1164

of itself, so every reachable node in 'roots' and 'heads' will be

1191

included in 'nodes'.

1165

included in 'nodes'.

1192

1166

1193

'outroots' is the list of reachable nodes in 'roots', i.e., the

1167

'outroots' is the list of reachable nodes in 'roots', i.e., the

1194

subset of 'roots' that is returned in 'nodes'. Likewise,

1168

subset of 'roots' that is returned in 'nodes'. Likewise,

1195

'outheads' is the subset of 'heads' that is also in 'nodes'.

1169

'outheads' is the subset of 'heads' that is also in 'nodes'.

1196

1170

1197

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1171

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1198

unspecified, uses nullid as the only root. If 'heads' is

1172

unspecified, uses nullid as the only root. If 'heads' is

1199

unspecified, uses list of all of the revlog's heads."""

1173

unspecified, uses list of all of the revlog's heads."""

1200

nonodes = ([], [], [])

1174

nonodes = ([], [], [])

1201

if roots is not None:

1175

if roots is not None:

1202

roots = list(roots)

1176

roots = list(roots)

1203

if not roots:

1177

if not roots:

1204

return nonodes

1178

return nonodes

1205

lowestrev = min([self.rev(n) for n in roots])

1179

lowestrev = min([self.rev(n) for n in roots])

1206

else:

1180

else:

1207

roots = [self.nullid] # Everybody's a descendant of nullid

1181

roots = [self.nullid] # Everybody's a descendant of nullid

1208

lowestrev = nullrev

1182

lowestrev = nullrev

1209

if (lowestrev == nullrev) and (heads is None):

1183

if (lowestrev == nullrev) and (heads is None):

1210

# We want _all_ the nodes!

1184

# We want _all_ the nodes!

1211

return (

1185

return (

1212

[self.node(r) for r in self],

1186

[self.node(r) for r in self],

1213

[self.nullid],

1187

[self.nullid],

1214

list(self.heads()),

1188

list(self.heads()),

1215

)

1189

)

1216

if heads is None:

1190

if heads is None:

1217

# All nodes are ancestors, so the latest ancestor is the last

1191

# All nodes are ancestors, so the latest ancestor is the last

1218

# node.

1192

# node.

1219

highestrev = len(self) - 1

1193

highestrev = len(self) - 1

1220

# Set ancestors to None to signal that every node is an ancestor.

1194

# Set ancestors to None to signal that every node is an ancestor.

1221

ancestors = None

1195

ancestors = None

1222

# Set heads to an empty dictionary for later discovery of heads

1196

# Set heads to an empty dictionary for later discovery of heads

1223

heads = {}

1197

heads = {}

1224

else:

1198

else:

1225

heads = list(heads)

1199

heads = list(heads)

1226

if not heads:

1200

if not heads:

1227

return nonodes

1201

return nonodes

1228

ancestors = set()

1202

ancestors = set()

1229

# Turn heads into a dictionary so we can remove 'fake' heads.

1203

# Turn heads into a dictionary so we can remove 'fake' heads.

1230

# Also, later we will be using it to filter out the heads we can't

1204

# Also, later we will be using it to filter out the heads we can't

1231

# find from roots.

1205

# find from roots.

1232

heads = dict.fromkeys(heads, False)

1206

heads = dict.fromkeys(heads, False)

1233

# Start at the top and keep marking parents until we're done.

1207

# Start at the top and keep marking parents until we're done.

1234

nodestotag = set(heads)

1208

nodestotag = set(heads)

1235

# Remember where the top was so we can use it as a limit later.

1209

# Remember where the top was so we can use it as a limit later.

1236

highestrev = max([self.rev(n) for n in nodestotag])

1210

highestrev = max([self.rev(n) for n in nodestotag])

1237

while nodestotag:

1211

while nodestotag:

1238

# grab a node to tag

1212

# grab a node to tag

1239

n = nodestotag.pop()

1213

n = nodestotag.pop()

1240

# Never tag nullid

1214

# Never tag nullid

1241

if n == self.nullid:

1215

if n == self.nullid:

1242

continue

1216

continue

1243

# A node's revision number represents its place in a

1217

# A node's revision number represents its place in a

1244

# topologically sorted list of nodes.

1218

# topologically sorted list of nodes.

1245

r = self.rev(n)

1219

r = self.rev(n)

1246

if r >= lowestrev:

1220

if r >= lowestrev:

1247

if n not in ancestors:

1221

if n not in ancestors:

1248

# If we are possibly a descendant of one of the roots

1222

# If we are possibly a descendant of one of the roots

1249

# and we haven't already been marked as an ancestor

1223

# and we haven't already been marked as an ancestor

1250

ancestors.add(n) # Mark as ancestor

1224

ancestors.add(n) # Mark as ancestor

1251

# Add non-nullid parents to list of nodes to tag.

1225

# Add non-nullid parents to list of nodes to tag.

1252

nodestotag.update(

1226

nodestotag.update(

1253

[p for p in self.parents(n) if p != self.nullid]

1227

[p for p in self.parents(n) if p != self.nullid]

1254

)

1228

)

1255

elif n in heads: # We've seen it before, is it a fake head?

1229

elif n in heads: # We've seen it before, is it a fake head?

1256

# So it is, real heads should not be the ancestors of

1230

# So it is, real heads should not be the ancestors of

1257

# any other heads.

1231

# any other heads.

1258

heads.pop(n)

1232

heads.pop(n)

1259

if not ancestors:

1233

if not ancestors:

1260

return nonodes

1234

return nonodes

1261

# Now that we have our set of ancestors, we want to remove any

1235

# Now that we have our set of ancestors, we want to remove any

1262

# roots that are not ancestors.

1236

# roots that are not ancestors.

1263

1237

1264

# If one of the roots was nullid, everything is included anyway.

1238

# If one of the roots was nullid, everything is included anyway.

1265

if lowestrev > nullrev:

1239

if lowestrev > nullrev:

1266

# But, since we weren't, let's recompute the lowest rev to not

1240

# But, since we weren't, let's recompute the lowest rev to not

1267

# include roots that aren't ancestors.

1241

# include roots that aren't ancestors.

1268

1242

1269

# Filter out roots that aren't ancestors of heads

1243

# Filter out roots that aren't ancestors of heads

1270

roots = [root for root in roots if root in ancestors]

1244

roots = [root for root in roots if root in ancestors]

1271

# Recompute the lowest revision

1245

# Recompute the lowest revision

1272

if roots:

1246

if roots:

1273

lowestrev = min([self.rev(root) for root in roots])

1247

lowestrev = min([self.rev(root) for root in roots])

1274

else:

1248

else:

1275

# No more roots? Return empty list

1249

# No more roots? Return empty list

1276

return nonodes

1250

return nonodes

1277

else:

1251

else:

1278

# We are descending from nullid, and don't need to care about

1252

# We are descending from nullid, and don't need to care about

1279

# any other roots.

1253

# any other roots.

1280

lowestrev = nullrev

1254

lowestrev = nullrev

1281

roots = [self.nullid]

1255

roots = [self.nullid]

1282

# Transform our roots list into a set.

1256

# Transform our roots list into a set.

1283

descendants = set(roots)

1257

descendants = set(roots)

1284

# Also, keep the original roots so we can filter out roots that aren't

1258

# Also, keep the original roots so we can filter out roots that aren't

1285

# 'real' roots (i.e. are descended from other roots).

1259

# 'real' roots (i.e. are descended from other roots).

1286

roots = descendants.copy()

1260

roots = descendants.copy()

1287

# Our topologically sorted list of output nodes.

1261

# Our topologically sorted list of output nodes.

1288

orderedout = []

1262

orderedout = []

1289

# Don't start at nullid since we don't want nullid in our output list,

1263

# Don't start at nullid since we don't want nullid in our output list,

1290

# and if nullid shows up in descendants, empty parents will look like

1264

# and if nullid shows up in descendants, empty parents will look like

1291

# they're descendants.

1265

# they're descendants.

1292

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1266

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1293

n = self.node(r)

1267

n = self.node(r)

1294

isdescendant = False

1268

isdescendant = False

1295

if lowestrev == nullrev: # Everybody is a descendant of nullid

1269

if lowestrev == nullrev: # Everybody is a descendant of nullid

1296

isdescendant = True

1270

isdescendant = True

1297

elif n in descendants:

1271

elif n in descendants:

1298

# n is already a descendant

1272

# n is already a descendant

1299

isdescendant = True

1273

isdescendant = True

1300

# This check only needs to be done here because all the roots

1274

# This check only needs to be done here because all the roots

1301

# will start being marked is descendants before the loop.

1275

# will start being marked is descendants before the loop.

1302

if n in roots:

1276

if n in roots:

1303

# If n was a root, check if it's a 'real' root.

1277

# If n was a root, check if it's a 'real' root.

1304

p = tuple(self.parents(n))

1278

p = tuple(self.parents(n))

1305

# If any of its parents are descendants, it's not a root.

1279

# If any of its parents are descendants, it's not a root.

1306

if (p[0] in descendants) or (p[1] in descendants):

1280

if (p[0] in descendants) or (p[1] in descendants):

1307

roots.remove(n)

1281

roots.remove(n)

1308

else:

1282

else:

1309

p = tuple(self.parents(n))

1283

p = tuple(self.parents(n))

1310

# A node is a descendant if either of its parents are

1284

# A node is a descendant if either of its parents are

1311

# descendants. (We seeded the dependents list with the roots

1285

# descendants. (We seeded the dependents list with the roots

1312

# up there, remember?)

1286

# up there, remember?)

1313

if (p[0] in descendants) or (p[1] in descendants):

1287

if (p[0] in descendants) or (p[1] in descendants):

1314

descendants.add(n)

1288

descendants.add(n)

1315

isdescendant = True

1289

isdescendant = True

1316

if isdescendant and ((ancestors is None) or (n in ancestors)):

1290

if isdescendant and ((ancestors is None) or (n in ancestors)):

1317

# Only include nodes that are both descendants and ancestors.

1291

# Only include nodes that are both descendants and ancestors.

1318

orderedout.append(n)

1292

orderedout.append(n)

1319

if (ancestors is not None) and (n in heads):

1293

if (ancestors is not None) and (n in heads):

1320

# We're trying to figure out which heads are reachable

1294

# We're trying to figure out which heads are reachable

1321

# from roots.

1295

# from roots.

1322

# Mark this head as having been reached

1296

# Mark this head as having been reached

1323

heads[n] = True

1297

heads[n] = True

1324

elif ancestors is None:

1298

elif ancestors is None:

1325

# Otherwise, we're trying to discover the heads.

1299

# Otherwise, we're trying to discover the heads.

1326

# Assume this is a head because if it isn't, the next step

1300

# Assume this is a head because if it isn't, the next step

1327

# will eventually remove it.

1301

# will eventually remove it.

1328

heads[n] = True

1302

heads[n] = True

1329

# But, obviously its parents aren't.

1303

# But, obviously its parents aren't.

1330

for p in self.parents(n):

1304

for p in self.parents(n):

1331

heads.pop(p, None)

1305

heads.pop(p, None)

1332

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1306

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1333

roots = list(roots)

1307

roots = list(roots)

1334

assert orderedout

1308

assert orderedout

1335

assert roots

1309

assert roots

1336

assert heads

1310

assert heads

1337

return (orderedout, roots, heads)

1311

return (orderedout, roots, heads)

1338

1312

1339

def headrevs(self, revs=None):

1313

def headrevs(self, revs=None):

1340

if revs is None:

1314

if revs is None:

1341

try:

1315

try:

1342

return self.index.headrevs()

1316

return self.index.headrevs()

1343

except AttributeError:

1317

except AttributeError:

1344

return self._headrevs()

1318

return self._headrevs()

1345

if rustdagop is not None and self.index.rust_ext_compat:

1319

if rustdagop is not None and self.index.rust_ext_compat:

1346

return rustdagop.headrevs(self.index, revs)

1320

return rustdagop.headrevs(self.index, revs)

1347

return dagop.headrevs(revs, self._uncheckedparentrevs)

1321

return dagop.headrevs(revs, self._uncheckedparentrevs)

1348

1322

1349

def computephases(self, roots):

1323

def computephases(self, roots):

1350

return self.index.computephasesmapsets(roots)

1324

return self.index.computephasesmapsets(roots)

1351

1325

1352

def _headrevs(self):

1326

def _headrevs(self):

1353

count = len(self)

1327

count = len(self)

1354

if not count:

1328

if not count:

1355

return [nullrev]

1329

return [nullrev]

1356

# we won't iter over filtered rev so nobody is a head at start

1330

# we won't iter over filtered rev so nobody is a head at start

1357

ishead = [0] * (count + 1)

1331

ishead = [0] * (count + 1)

1358

index = self.index

1332

index = self.index

1359

for r in self:

1333

for r in self:

1360

ishead[r] = 1 # I may be an head

1334

ishead[r] = 1 # I may be an head

1361

e = index[r]

1335

e = index[r]

1362

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1336

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1363

return [r for r, val in enumerate(ishead) if val]

1337

return [r for r, val in enumerate(ishead) if val]

1364

1338

1365

def heads(self, start=None, stop=None):

1339

def heads(self, start=None, stop=None):

1366

"""return the list of all nodes that have no children

1340

"""return the list of all nodes that have no children

1367

1341

1368

if start is specified, only heads that are descendants of

1342

if start is specified, only heads that are descendants of

1369

start will be returned

1343

start will be returned

1370

if stop is specified, it will consider all the revs from stop

1344

if stop is specified, it will consider all the revs from stop

1371

as if they had no children

1345

as if they had no children

1372

"""

1346

"""

1373

if start is None and stop is None:

1347

if start is None and stop is None:

1374

if not len(self):

1348

if not len(self):

1375

return [self.nullid]

1349

return [self.nullid]

1376

return [self.node(r) for r in self.headrevs()]

1350

return [self.node(r) for r in self.headrevs()]

1377

1351

1378

if start is None:

1352

if start is None:

1379

start = nullrev

1353

start = nullrev

1380

else:

1354

else:

1381

start = self.rev(start)

1355

start = self.rev(start)

1382

1356

1383

stoprevs = {self.rev(n) for n in stop or []}

1357

stoprevs = {self.rev(n) for n in stop or []}

1384

1358

1385

revs = dagop.headrevssubset(

1359

revs = dagop.headrevssubset(

1386

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1360

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1387

)

1361

)

1388

1362

1389

return [self.node(rev) for rev in revs]

1363

return [self.node(rev) for rev in revs]

1390

1364

1391

def children(self, node):

1365

def children(self, node):

1392

"""find the children of a given node"""

1366

"""find the children of a given node"""

1393

c = []

1367

c = []

1394

p = self.rev(node)

1368

p = self.rev(node)

1395

for r in self.revs(start=p + 1):

1369

for r in self.revs(start=p + 1):

1396

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1370

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1397

if prevs:

1371

if prevs:

1398

for pr in prevs:

1372

for pr in prevs:

1399

if pr == p:

1373

if pr == p:

1400

c.append(self.node(r))

1374

c.append(self.node(r))

1401

elif p == nullrev:

1375

elif p == nullrev:

1402

c.append(self.node(r))

1376

c.append(self.node(r))

1403

return c

1377

return c

1404

1378

1405

def commonancestorsheads(self, a, b):

1379

def commonancestorsheads(self, a, b):

1406

"""calculate all the heads of the common ancestors of nodes a and b"""

1380

"""calculate all the heads of the common ancestors of nodes a and b"""

1407

a, b = self.rev(a), self.rev(b)

1381

a, b = self.rev(a), self.rev(b)

1408

ancs = self._commonancestorsheads(a, b)

1382

ancs = self._commonancestorsheads(a, b)

1409

return pycompat.maplist(self.node, ancs)

1383

return pycompat.maplist(self.node, ancs)

1410

1384

1411

def _commonancestorsheads(self, *revs):

1385

def _commonancestorsheads(self, *revs):

1412

"""calculate all the heads of the common ancestors of revs"""

1386

"""calculate all the heads of the common ancestors of revs"""

1413

try:

1387

try:

1414

ancs = self.index.commonancestorsheads(*revs)

1388

ancs = self.index.commonancestorsheads(*revs)

1415

except (AttributeError, OverflowError): # C implementation failed

1389

except (AttributeError, OverflowError): # C implementation failed

1416

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1390

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1417

return ancs

1391

return ancs

1418

1392

1419

def isancestor(self, a, b):

1393

def isancestor(self, a, b):

1420

"""return True if node a is an ancestor of node b

1394

"""return True if node a is an ancestor of node b

1421

1395

1422

A revision is considered an ancestor of itself."""

1396

A revision is considered an ancestor of itself."""

1423

a, b = self.rev(a), self.rev(b)

1397

a, b = self.rev(a), self.rev(b)

1424

return self.isancestorrev(a, b)

1398

return self.isancestorrev(a, b)

1425

1399

1426

def isancestorrev(self, a, b):

1400

def isancestorrev(self, a, b):

1427

"""return True if revision a is an ancestor of revision b

1401

"""return True if revision a is an ancestor of revision b

1428

1402

1429

A revision is considered an ancestor of itself.

1403

A revision is considered an ancestor of itself.

1430

1404

1431

The implementation of this is trivial but the use of

1405

The implementation of this is trivial but the use of

1432

reachableroots is not."""

1406

reachableroots is not."""

1433

if a == nullrev:

1407

if a == nullrev:

1434

return True

1408

return True

1435

elif a == b:

1409

elif a == b:

1436

return True

1410

return True

1437

elif a > b:

1411

elif a > b:

1438

return False

1412

return False

1439

return bool(self.reachableroots(a, [b], [a], includepath=False))

1413

return bool(self.reachableroots(a, [b], [a], includepath=False))

1440

1414

1441

def reachableroots(self, minroot, heads, roots, includepath=False):

1415

def reachableroots(self, minroot, heads, roots, includepath=False):

1442

"""return (heads(::(<roots> and <roots>::<heads>)))

1416

"""return (heads(::(<roots> and <roots>::<heads>)))

1443

1417

1444

If includepath is True, return (<roots>::<heads>)."""

1418

If includepath is True, return (<roots>::<heads>)."""

1445

try:

1419

try:

1446

return self.index.reachableroots2(

1420

return self.index.reachableroots2(

1447

minroot, heads, roots, includepath

1421

minroot, heads, roots, includepath

1448

)

1422

)

1449

except AttributeError:

1423

except AttributeError:

1450

return dagop._reachablerootspure(

1424

return dagop._reachablerootspure(

1451

self.parentrevs, minroot, roots, heads, includepath

1425

self.parentrevs, minroot, roots, heads, includepath

1452

)

1426

)

1453

1427

1454

def ancestor(self, a, b):

1428

def ancestor(self, a, b):

1455

"""calculate the "best" common ancestor of nodes a and b"""

1429

"""calculate the "best" common ancestor of nodes a and b"""

1456

1430

1457

a, b = self.rev(a), self.rev(b)

1431

a, b = self.rev(a), self.rev(b)

1458

try:

1432

try:

1459

ancs = self.index.ancestors(a, b)

1433

ancs = self.index.ancestors(a, b)

1460

except (AttributeError, OverflowError):

1434

except (AttributeError, OverflowError):

1461

ancs = ancestor.ancestors(self.parentrevs, a, b)

1435

ancs = ancestor.ancestors(self.parentrevs, a, b)

1462

if ancs:

1436

if ancs:

1463

# choose a consistent winner when there's a tie

1437

# choose a consistent winner when there's a tie

1464

return min(map(self.node, ancs))

1438

return min(map(self.node, ancs))

1465

return self.nullid

1439

return self.nullid

1466

1440

1467

def _match(self, id):

1441

def _match(self, id):

1468

if isinstance(id, int):

1442

if isinstance(id, int):

1469

# rev

1443

# rev

1470

return self.node(id)

1444

return self.node(id)

1471

if len(id) == self.nodeconstants.nodelen:

1445

if len(id) == self.nodeconstants.nodelen:

1472

# possibly a binary node

1446

# possibly a binary node

1473

# odds of a binary node being all hex in ASCII are 1 in 10**25

1447

# odds of a binary node being all hex in ASCII are 1 in 10**25

1474

try:

1448

try:

1475

node = id

1449

node = id

1476

self.rev(node) # quick search the index

1450

self.rev(node) # quick search the index

1477

return node

1451

return node

1478

except error.LookupError:

1452

except error.LookupError:

1479

pass # may be partial hex id

1453

pass # may be partial hex id

1480

try:

1454

try:

1481

# str(rev)

1455

# str(rev)

1482

rev = int(id)

1456

rev = int(id)

1483

if b"%d" % rev != id:

1457

if b"%d" % rev != id:

1484

raise ValueError

1458

raise ValueError

1485

if rev < 0:

1459

if rev < 0:

1486

rev = len(self) + rev

1460

rev = len(self) + rev

1487

if rev < 0 or rev >= len(self):

1461

if rev < 0 or rev >= len(self):

1488

raise ValueError

1462

raise ValueError

1489

return self.node(rev)

1463

return self.node(rev)

1490

except (ValueError, OverflowError):

1464

except (ValueError, OverflowError):

1491

pass

1465

pass

1492

if len(id) == 2 * self.nodeconstants.nodelen:

1466

if len(id) == 2 * self.nodeconstants.nodelen:

1493

try:

1467

try:

1494

# a full hex nodeid?

1468

# a full hex nodeid?

1495

node = bin(id)

1469

node = bin(id)

1496

self.rev(node)

1470

self.rev(node)

1497

return node

1471

return node

1498

except (TypeError, error.LookupError):

1472

except (TypeError, error.LookupError):

1499

pass

1473

pass

1500

1474

1501

def _partialmatch(self, id):

1475

def _partialmatch(self, id):

1502

# we don't care wdirfilenodeids as they should be always full hash

1476

# we don't care wdirfilenodeids as they should be always full hash

1503

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1477

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1504

ambiguous = False

1478

ambiguous = False

1505

try:

1479

try:

1506

partial = self.index.partialmatch(id)

1480

partial = self.index.partialmatch(id)

1507

if partial and self.hasnode(partial):

1481

if partial and self.hasnode(partial):

1508

if maybewdir:

1482

if maybewdir:

1509

# single 'ff...' match in radix tree, ambiguous with wdir

1483

# single 'ff...' match in radix tree, ambiguous with wdir

1510

ambiguous = True

1484

ambiguous = True

1511

else:

1485

else:

1512

return partial

1486

return partial

1513

elif maybewdir:

1487

elif maybewdir:

1514

# no 'ff...' match in radix tree, wdir identified

1488

# no 'ff...' match in radix tree, wdir identified

1515

raise error.WdirUnsupported

1489

raise error.WdirUnsupported

1516

else:

1490

else:

1517

return None

1491

return None

1518

except error.RevlogError:

1492

except error.RevlogError:

1519

# parsers.c radix tree lookup gave multiple matches

1493

# parsers.c radix tree lookup gave multiple matches

1520

# fast path: for unfiltered changelog, radix tree is accurate

1494

# fast path: for unfiltered changelog, radix tree is accurate

1521

if not getattr(self, 'filteredrevs', None):

1495

if not getattr(self, 'filteredrevs', None):

1522

ambiguous = True

1496

ambiguous = True

1523

# fall through to slow path that filters hidden revisions

1497

# fall through to slow path that filters hidden revisions

1524

except (AttributeError, ValueError):

1498

except (AttributeError, ValueError):

1525

# we are pure python, or key was too short to search radix tree

1499

# we are pure python, or key was too short to search radix tree

1526

pass

1500

pass

1527

if ambiguous:

1501

if ambiguous:

1528

raise error.AmbiguousPrefixLookupError(

1502

raise error.AmbiguousPrefixLookupError(

1529

id, self.display_id, _(b'ambiguous identifier')

1503

id, self.display_id, _(b'ambiguous identifier')

1530

)

1504

)

1531

1505

1532

if id in self._pcache:

1506

if id in self._pcache:

1533

return self._pcache[id]

1507

return self._pcache[id]

1534

1508

1535

if len(id) <= 40:

1509

if len(id) <= 40:

1536

try:

1510

try:

1537

# hex(node)[:...]

1511

# hex(node)[:...]

1538

l = len(id) // 2 # grab an even number of digits

1512

l = len(id) // 2 # grab an even number of digits

1539

prefix = bin(id[: l * 2])

1513

prefix = bin(id[: l * 2])

1540

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1514

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1541

nl = [

1515

nl = [

1542

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1516

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1543

]

1517

]

1544

if self.nodeconstants.nullhex.startswith(id):

1518

if self.nodeconstants.nullhex.startswith(id):

1545

nl.append(self.nullid)

1519

nl.append(self.nullid)

1546

if len(nl) > 0:

1520

if len(nl) > 0:

1547

if len(nl) == 1 and not maybewdir:

1521

if len(nl) == 1 and not maybewdir:

1548

self._pcache[id] = nl[0]

1522

self._pcache[id] = nl[0]

1549

return nl[0]

1523

return nl[0]

1550

raise error.AmbiguousPrefixLookupError(

1524

raise error.AmbiguousPrefixLookupError(

1551

id, self.display_id, _(b'ambiguous identifier')

1525

id, self.display_id, _(b'ambiguous identifier')

1552

)

1526

)

1553

if maybewdir:

1527

if maybewdir:

1554

raise error.WdirUnsupported

1528

raise error.WdirUnsupported

1555

return None

1529

return None

1556

except TypeError:

1530

except TypeError:

1557

pass

1531

pass

1558

1532

1559

def lookup(self, id):

1533

def lookup(self, id):

1560

"""locate a node based on:

1534

"""locate a node based on:

1561

- revision number or str(revision number)

1535

- revision number or str(revision number)

1562

- nodeid or subset of hex nodeid

1536

- nodeid or subset of hex nodeid

1563

"""

1537

"""

1564

n = self._match(id)

1538

n = self._match(id)

1565

if n is not None:

1539

if n is not None:

1566

return n

1540

return n

1567

n = self._partialmatch(id)

1541

n = self._partialmatch(id)

1568

if n:

1542

if n:

1569

return n

1543

return n

1570

1544

1571

raise error.LookupError(id, self.display_id, _(b'no match found'))

1545

raise error.LookupError(id, self.display_id, _(b'no match found'))

1572

1546

1573

def shortest(self, node, minlength=1):

1547

def shortest(self, node, minlength=1):

1574

"""Find the shortest unambiguous prefix that matches node."""

1548

"""Find the shortest unambiguous prefix that matches node."""

1575

1549

1576

def isvalid(prefix):

1550

def isvalid(prefix):

1577

try:

1551

try:

1578

matchednode = self._partialmatch(prefix)

1552

matchednode = self._partialmatch(prefix)

1579

except error.AmbiguousPrefixLookupError:

1553

except error.AmbiguousPrefixLookupError:

1580

return False

1554

return False

1581

except error.WdirUnsupported:

1555

except error.WdirUnsupported:

1582

# single 'ff...' match

1556

# single 'ff...' match

1583

return True

1557

return True

1584

if matchednode is None:

1558

if matchednode is None:

1585

raise error.LookupError(node, self.display_id, _(b'no node'))

1559

raise error.LookupError(node, self.display_id, _(b'no node'))

1586

return True

1560

return True

1587

1561

1588

def maybewdir(prefix):

1562

def maybewdir(prefix):

1589

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1563

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1590

1564

1591

hexnode = hex(node)

1565

hexnode = hex(node)

1592

1566

1593

def disambiguate(hexnode, minlength):

1567

def disambiguate(hexnode, minlength):

1594

"""Disambiguate against wdirid."""

1568

"""Disambiguate against wdirid."""

1595

for length in range(minlength, len(hexnode) + 1):

1569

for length in range(minlength, len(hexnode) + 1):

1596

prefix = hexnode[:length]

1570

prefix = hexnode[:length]

1597

if not maybewdir(prefix):

1571

if not maybewdir(prefix):

1598

return prefix

1572

return prefix

1599

1573

1600

if not getattr(self, 'filteredrevs', None):

1574

if not getattr(self, 'filteredrevs', None):

1601

try:

1575

try:

1602

length = max(self.index.shortest(node), minlength)

1576

length = max(self.index.shortest(node), minlength)

1603

return disambiguate(hexnode, length)

1577

return disambiguate(hexnode, length)

1604

except error.RevlogError:

1578

except error.RevlogError:

1605

if node != self.nodeconstants.wdirid:

1579

if node != self.nodeconstants.wdirid:

1606

raise error.LookupError(

1580

raise error.LookupError(

1607

node, self.display_id, _(b'no node')

1581

node, self.display_id, _(b'no node')

1608

)

1582

)

1609

except AttributeError:

1583

except AttributeError:

1610

# Fall through to pure code

1584

# Fall through to pure code

1611

pass

1585

pass

1612

1586

1613

if node == self.nodeconstants.wdirid:

1587

if node == self.nodeconstants.wdirid:

1614

for length in range(minlength, len(hexnode) + 1):

1588

for length in range(minlength, len(hexnode) + 1):

1615

prefix = hexnode[:length]

1589

prefix = hexnode[:length]

1616

if isvalid(prefix):

1590

if isvalid(prefix):

1617

return prefix

1591

return prefix

1618

1592

1619

for length in range(minlength, len(hexnode) + 1):

1593

for length in range(minlength, len(hexnode) + 1):

1620

prefix = hexnode[:length]

1594

prefix = hexnode[:length]

1621

if isvalid(prefix):

1595

if isvalid(prefix):

1622

return disambiguate(hexnode, length)

1596

return disambiguate(hexnode, length)

1623

1597

1624

def cmp(self, node, text):

1598

def cmp(self, node, text):

1625

"""compare text with a given file revision

1599

"""compare text with a given file revision

1626

1600

1627

returns True if text is different than what is stored.

1601

returns True if text is different than what is stored.

1628

"""

1602

"""

1629

p1, p2 = self.parents(node)

1603

p1, p2 = self.parents(node)

1630

return storageutil.hashrevisionsha1(text, p1, p2) != node

1604

return storageutil.hashrevisionsha1(text, p1, p2) != node

1631

1605

1632

def _cachesegment(self, offset, data):

1633

"""Add a segment to the revlog cache.

1634

1635

Accepts an absolute offset and the data that is at that location.

1636

"""

1637

o, d = self._chunkcache

1638

# try to add to existing cache

1639

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1640

self._chunkcache = o, d + data

1641

else:

1642

self._chunkcache = offset, data

1643

1644

def _readsegment(self, offset, length, df=None):

1645

"""Load a segment of raw data from the revlog.

1646

1647

Accepts an absolute offset, length to read, and an optional existing

1648

file handle to read from.

1649

1650

If an existing file handle is passed, it will be seeked and the

1651

original seek position will NOT be restored.

1652

1653

Returns a str or buffer of raw byte data.

1654

1655

Raises if the requested number of bytes could not be read.

1656

"""

1657

# Cache data both forward and backward around the requested

1658

# data, in a fixed size window. This helps speed up operations

1659

# involving reading the revlog backwards.

1660

cachesize = self._chunkcachesize

1661

realoffset = offset & ~(cachesize - 1)

1662

reallength = (

1663

(offset + length + cachesize) & ~(cachesize - 1)

1664

) - realoffset

1665

with self._datareadfp(df) as df:

1666

df.seek(realoffset)

1667

d = df.read(reallength)

1668

1669

self._cachesegment(realoffset, d)

1670

if offset != realoffset or reallength != length:

1671

startoffset = offset - realoffset

1672

if len(d) - startoffset < length:

1673

filename = self._indexfile if self._inline else self._datafile

1674

got = len(d) - startoffset

1675

m = PARTIAL_READ_MSG % (filename, length, offset, got)

1676

raise error.RevlogError(m)

1677

return util.buffer(d, startoffset, length)

1678

1679

if len(d) < length:

1680

filename = self._indexfile if self._inline else self._datafile

1681

got = len(d) - startoffset

1682

m = PARTIAL_READ_MSG % (filename, length, offset, got)

1683

raise error.RevlogError(m)

1684

1685

return d

1686

1687

def _getsegment(self, offset, length, df=None):

1688

"""Obtain a segment of raw data from the revlog.

1689

1690

Accepts an absolute offset, length of bytes to obtain, and an

1691

optional file handle to the already-opened revlog. If the file

1692

handle is used, it's original seek position will not be preserved.

1693

1694

Requests for data may be returned from a cache.

1695

1696

Returns a str or a buffer instance of raw byte data.

1697

"""

1698

o, d = self._chunkcache

1699

l = len(d)

1700

1701

# is it in the cache?

1702

cachestart = offset - o

1703

cacheend = cachestart + length

1704

if cachestart >= 0 and cacheend <= l:

1705

if cachestart == 0 and cacheend == l:

1706

return d # avoid a copy

1707

return util.buffer(d, cachestart, cacheend - cachestart)

1708

1709

return self._readsegment(offset, length, df=df)

1710

1711

def _getsegmentforrevs(self, startrev, endrev, df=None):

1606

def _getsegmentforrevs(self, startrev, endrev, df=None):

1712

"""Obtain a segment of raw data corresponding to a range of revisions.

1607

"""Obtain a segment of raw data corresponding to a range of revisions.

1713

1608

1714

Accepts the start and end revisions and an optional already-open

1609

Accepts the start and end revisions and an optional already-open

1715

file handle to be used for reading. If the file handle is read, its

1610

file handle to be used for reading. If the file handle is read, its

1716

seek position will not be preserved.

1611

seek position will not be preserved.

1717

1612

1718

Requests for data may be satisfied by a cache.

1613

Requests for data may be satisfied by a cache.

1719

1614

1720

Returns a 2-tuple of (offset, data) for the requested range of

1615

Returns a 2-tuple of (offset, data) for the requested range of

1721

revisions. Offset is the integer offset from the beginning of the

1616

revisions. Offset is the integer offset from the beginning of the

1722

revlog and data is a str or buffer of the raw byte data.

1617

revlog and data is a str or buffer of the raw byte data.

1723

1618

1724

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1619

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1725

to determine where each revision's data begins and ends.

1620

to determine where each revision's data begins and ends.

1726

"""

1621

"""

1727

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1622

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1728

# (functions are expensive).

1623

# (functions are expensive).

1729

index = self.index

1624

index = self.index

1730

istart = index[startrev]

1625

istart = index[startrev]

1731

start = int(istart[0] >> 16)

1626

start = int(istart[0] >> 16)

1732

if startrev == endrev:

1627

if startrev == endrev:

1733

end = start + istart[1]

1628

end = start + istart[1]

1734

else:

1629

else:

1735

iend = index[endrev]

1630

iend = index[endrev]

1736

end = int(iend[0] >> 16) + iend[1]

1631

end = int(iend[0] >> 16) + iend[1]

1737

1632

1738

if self._inline:

1633

if self._inline:

1739

start += (startrev + 1) * self.index.entry_size

1634

start += (startrev + 1) * self.index.entry_size

1740

end += (endrev + 1) * self.index.entry_size

1635

end += (endrev + 1) * self.index.entry_size

1741

length = end - start

1636

length = end - start

1742

1637

1743

return start, self._~~get~~segment(start, length, df=df)

1638

return start, self._segmentfile.read_chunk(start, length, df)

1744

1639

1745

def _chunk(self, rev, df=None):

1640

def _chunk(self, rev, df=None):

1746

"""Obtain a single decompressed chunk for a revision.

1641

"""Obtain a single decompressed chunk for a revision.

1747

1642

1748

Accepts an integer revision and an optional already-open file handle

1643

Accepts an integer revision and an optional already-open file handle

1749

to be used for reading. If used, the seek position of the file will not

1644

to be used for reading. If used, the seek position of the file will not

1750

be preserved.

1645

be preserved.

1751

1646

1752

Returns a str holding uncompressed data for the requested revision.

1647

Returns a str holding uncompressed data for the requested revision.

1753

"""

1648

"""

1754

compression_mode = self.index[rev][10]

1649

compression_mode = self.index[rev][10]

1755

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1650

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1756

if compression_mode == COMP_MODE_PLAIN:

1651

if compression_mode == COMP_MODE_PLAIN:

1757

return data

1652

return data

1758

elif compression_mode == COMP_MODE_DEFAULT:

1653

elif compression_mode == COMP_MODE_DEFAULT:

1759

return self._decompressor(data)

1654

return self._decompressor(data)

1760

elif compression_mode == COMP_MODE_INLINE:

1655

elif compression_mode == COMP_MODE_INLINE:

1761

return self.decompress(data)

1656

return self.decompress(data)

1762

else:

1657

else:

1763

msg = b'unknown compression mode %d'

1658

msg = b'unknown compression mode %d'

1764

msg %= compression_mode

1659

msg %= compression_mode

1765

raise error.RevlogError(msg)

1660

raise error.RevlogError(msg)

1766

1661

1767

def _chunks(self, revs, df=None, targetsize=None):

1662

def _chunks(self, revs, df=None, targetsize=None):

1768

"""Obtain decompressed chunks for the specified revisions.

1663

"""Obtain decompressed chunks for the specified revisions.

1769

1664

1770

Accepts an iterable of numeric revisions that are assumed to be in

1665

Accepts an iterable of numeric revisions that are assumed to be in

1771

ascending order. Also accepts an optional already-open file handle

1666

ascending order. Also accepts an optional already-open file handle

1772

to be used for reading. If used, the seek position of the file will

1667

to be used for reading. If used, the seek position of the file will

1773

not be preserved.

1668

not be preserved.

1774

1669

1775

This function is similar to calling ``self._chunk()`` multiple times,

1670

This function is similar to calling ``self._chunk()`` multiple times,

1776

but is faster.

1671

but is faster.

1777

1672

1778

Returns a list with decompressed data for each requested revision.

1673

Returns a list with decompressed data for each requested revision.

1779

"""

1674

"""

1780

if not revs:

1675

if not revs:

1781

return []

1676

return []

1782

start = self.start

1677

start = self.start

1783

length = self.length

1678

length = self.length

1784

inline = self._inline

1679

inline = self._inline

1785

iosize = self.index.entry_size

1680

iosize = self.index.entry_size

1786

buffer = util.buffer

1681

buffer = util.buffer

1787

1682

1788

l = []

1683

l = []

1789

ladd = l.append

1684

ladd = l.append

1790

1685

1791

if not self._withsparseread:

1686

if not self._withsparseread:

1792

slicedchunks = (revs,)

1687

slicedchunks = (revs,)

1793

else:

1688

else:

1794

slicedchunks = deltautil.slicechunk(

1689

slicedchunks = deltautil.slicechunk(

1795

self, revs, targetsize=targetsize

1690

self, revs, targetsize=targetsize

1796

)

1691

)

1797

1692

1798

for revschunk in slicedchunks:

1693

for revschunk in slicedchunks:

1799

firstrev = revschunk[0]

1694

firstrev = revschunk[0]

1800

# Skip trailing revisions with empty diff

1695

# Skip trailing revisions with empty diff

1801

for lastrev in revschunk[::-1]:

1696

for lastrev in revschunk[::-1]:

1802

if length(lastrev) != 0:

1697

if length(lastrev) != 0:

1803

break

1698

break

1804

1699

1805

try:

1700

try:

1806

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1701

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1807

except OverflowError:

1702

except OverflowError:

1808

# issue4215 - we can't cache a run of chunks greater than

1703

# issue4215 - we can't cache a run of chunks greater than

1809

# 2G on Windows

1704

# 2G on Windows

1810

return [self._chunk(rev, df=df) for rev in revschunk]

1705

return [self._chunk(rev, df=df) for rev in revschunk]

1811

1706

1812

decomp = self.decompress

1707

decomp = self.decompress

1813

# self._decompressor might be None, but will not be used in that case

1708

# self._decompressor might be None, but will not be used in that case

1814

def_decomp = self._decompressor

1709

def_decomp = self._decompressor

1815

for rev in revschunk:

1710

for rev in revschunk:

1816

chunkstart = start(rev)

1711

chunkstart = start(rev)

1817

if inline:

1712

if inline:

1818

chunkstart += (rev + 1) * iosize

1713

chunkstart += (rev + 1) * iosize

1819

chunklength = length(rev)

1714

chunklength = length(rev)

1820

comp_mode = self.index[rev][10]

1715

comp_mode = self.index[rev][10]

1821

c = buffer(data, chunkstart - offset, chunklength)

1716

c = buffer(data, chunkstart - offset, chunklength)

1822

if comp_mode == COMP_MODE_PLAIN:

1717

if comp_mode == COMP_MODE_PLAIN:

1823

ladd(c)

1718

ladd(c)

1824

elif comp_mode == COMP_MODE_INLINE:

1719

elif comp_mode == COMP_MODE_INLINE:

1825

ladd(decomp(c))

1720

ladd(decomp(c))

1826

elif comp_mode == COMP_MODE_DEFAULT:

1721

elif comp_mode == COMP_MODE_DEFAULT:

1827

ladd(def_decomp(c))

1722

ladd(def_decomp(c))

1828

else:

1723

else:

1829

msg = b'unknown compression mode %d'

1724

msg = b'unknown compression mode %d'

1830

msg %= comp_mode

1725

msg %= comp_mode

1831

raise error.RevlogError(msg)

1726

raise error.RevlogError(msg)

1832

1727

1833

return l

1728

return l

1834

1729

1835

def _chunkclear(self):

1836

"""Clear the raw chunk cache."""

1837

self._chunkcache = (0, b'')

1838

1839

def deltaparent(self, rev):

1730

def deltaparent(self, rev):

1840

"""return deltaparent of the given revision"""

1731

"""return deltaparent of the given revision"""

1841

base = self.index[rev][3]

1732

base = self.index[rev][3]

1842

if base == rev:

1733

if base == rev:

1843

return nullrev

1734

return nullrev

1844

elif self._generaldelta:

1735

elif self._generaldelta:

1845

return base

1736

return base

1846

else:

1737

else:

1847

return rev - 1

1738

return rev - 1

1848

1739

1849

def issnapshot(self, rev):

1740

def issnapshot(self, rev):

1850

"""tells whether rev is a snapshot"""

1741

"""tells whether rev is a snapshot"""

1851

if not self._sparserevlog:

1742

if not self._sparserevlog:

1852

return self.deltaparent(rev) == nullrev

1743

return self.deltaparent(rev) == nullrev

1853

elif util.safehasattr(self.index, b'issnapshot'):

1744

elif util.safehasattr(self.index, b'issnapshot'):

1854

# directly assign the method to cache the testing and access

1745

# directly assign the method to cache the testing and access

1855

self.issnapshot = self.index.issnapshot

1746

self.issnapshot = self.index.issnapshot

1856

return self.issnapshot(rev)

1747

return self.issnapshot(rev)

1857

if rev == nullrev:

1748

if rev == nullrev:

1858

return True

1749

return True

1859

entry = self.index[rev]

1750

entry = self.index[rev]

1860

base = entry[3]

1751

base = entry[3]

1861

if base == rev:

1752

if base == rev:

1862

return True

1753

return True

1863

if base == nullrev:

1754

if base == nullrev:

1864

return True

1755

return True

1865

p1 = entry[5]

1756

p1 = entry[5]

1866

p2 = entry[6]

1757

p2 = entry[6]

1867

if base == p1 or base == p2:

1758

if base == p1 or base == p2:

1868

return False

1759

return False

1869

return self.issnapshot(base)

1760

return self.issnapshot(base)

1870

1761

1871

def snapshotdepth(self, rev):

1762

def snapshotdepth(self, rev):

1872

"""number of snapshot in the chain before this one"""

1763

"""number of snapshot in the chain before this one"""

1873

if not self.issnapshot(rev):

1764

if not self.issnapshot(rev):

1874

raise error.ProgrammingError(b'revision %d not a snapshot')

1765

raise error.ProgrammingError(b'revision %d not a snapshot')

1875

return len(self._deltachain(rev)[0]) - 1

1766

return len(self._deltachain(rev)[0]) - 1

1876

1767

1877

def revdiff(self, rev1, rev2):

1768

def revdiff(self, rev1, rev2):

1878

"""return or calculate a delta between two revisions

1769

"""return or calculate a delta between two revisions

1879

1770

1880

The delta calculated is in binary form and is intended to be written to

1771

The delta calculated is in binary form and is intended to be written to

1881

revlog data directly. So this function needs raw revision data.

1772

revlog data directly. So this function needs raw revision data.

1882

"""

1773

"""

1883

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1774

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1884

return bytes(self._chunk(rev2))

1775

return bytes(self._chunk(rev2))

1885

1776

1886

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1777

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1887

1778

1888

def _processflags(self, text, flags, operation, raw=False):

1779

def _processflags(self, text, flags, operation, raw=False):

1889

"""deprecated entry point to access flag processors"""

1780

"""deprecated entry point to access flag processors"""

1890

msg = b'_processflag(...) use the specialized variant'

1781

msg = b'_processflag(...) use the specialized variant'

1891

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1782

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1892

if raw:

1783

if raw:

1893

return text, flagutil.processflagsraw(self, text, flags)

1784

return text, flagutil.processflagsraw(self, text, flags)

1894

elif operation == b'read':

1785

elif operation == b'read':

1895

return flagutil.processflagsread(self, text, flags)

1786

return flagutil.processflagsread(self, text, flags)

1896

else: # write operation

1787

else: # write operation

1897

return flagutil.processflagswrite(self, text, flags)

1788

return flagutil.processflagswrite(self, text, flags)

1898

1789

1899

def revision(self, nodeorrev, _df=None, raw=False):

1790

def revision(self, nodeorrev, _df=None, raw=False):

1900

"""return an uncompressed revision of a given node or revision

1791

"""return an uncompressed revision of a given node or revision

1901

number.

1792

number.

1902

1793

1903

_df - an existing file handle to read from. (internal-only)

1794

_df - an existing file handle to read from. (internal-only)

1904

raw - an optional argument specifying if the revision data is to be

1795

raw - an optional argument specifying if the revision data is to be

1905

treated as raw data when applying flag transforms. 'raw' should be set

1796

treated as raw data when applying flag transforms. 'raw' should be set

1906

to True when generating changegroups or in debug commands.

1797

to True when generating changegroups or in debug commands.

1907

"""

1798

"""

1908

if raw:

1799

if raw:

1909

msg = (

1800

msg = (

1910

b'revlog.revision(..., raw=True) is deprecated, '

1801

b'revlog.revision(..., raw=True) is deprecated, '

1911

b'use revlog.rawdata(...)'

1802

b'use revlog.rawdata(...)'

1912

)

1803

)

1913

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1804

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1914

return self._revisiondata(nodeorrev, _df, raw=raw)

1805

return self._revisiondata(nodeorrev, _df, raw=raw)

1915

1806

1916

def sidedata(self, nodeorrev, _df=None):

1807

def sidedata(self, nodeorrev, _df=None):

1917

"""a map of extra data related to the changeset but not part of the hash

1808

"""a map of extra data related to the changeset but not part of the hash

1918

1809

1919

This function currently return a dictionary. However, more advanced

1810

This function currently return a dictionary. However, more advanced

1920

mapping object will likely be used in the future for a more

1811

mapping object will likely be used in the future for a more

1921

efficient/lazy code.

1812

efficient/lazy code.

1922

"""

1813

"""

1923

# deal with <nodeorrev> argument type

1814

# deal with <nodeorrev> argument type

1924

if isinstance(nodeorrev, int):

1815

if isinstance(nodeorrev, int):

1925

rev = nodeorrev

1816

rev = nodeorrev

1926

else:

1817

else:

1927

rev = self.rev(nodeorrev)

1818

rev = self.rev(nodeorrev)

1928

return self._sidedata(rev)

1819

return self._sidedata(rev)

1929

1820

1930

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1821

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1931

# deal with <nodeorrev> argument type

1822

# deal with <nodeorrev> argument type

1932

if isinstance(nodeorrev, int):

1823

if isinstance(nodeorrev, int):

1933

rev = nodeorrev

1824

rev = nodeorrev

1934

node = self.node(rev)

1825

node = self.node(rev)

1935

else:

1826

else:

1936

node = nodeorrev

1827

node = nodeorrev

1937

rev = None

1828

rev = None

1938

1829

1939

# fast path the special `nullid` rev

1830

# fast path the special `nullid` rev

1940

if node == self.nullid:

1831

if node == self.nullid:

1941

return b""

1832

return b""

1942

1833

1943

# ``rawtext`` is the text as stored inside the revlog. Might be the

1834

# ``rawtext`` is the text as stored inside the revlog. Might be the

1944

# revision or might need to be processed to retrieve the revision.

1835

# revision or might need to be processed to retrieve the revision.

1945

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1836

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1946

1837

1947

if raw and validated:

1838

if raw and validated:

1948

# if we don't want to process the raw text and that raw

1839

# if we don't want to process the raw text and that raw

1949

# text is cached, we can exit early.

1840

# text is cached, we can exit early.

1950

return rawtext

1841

return rawtext

1951

if rev is None:

1842

if rev is None:

1952

rev = self.rev(node)

1843

rev = self.rev(node)

1953

# the revlog's flag for this revision

1844

# the revlog's flag for this revision

1954

# (usually alter its state or content)

1845

# (usually alter its state or content)

1955

flags = self.flags(rev)

1846

flags = self.flags(rev)

1956

1847

1957

if validated and flags == REVIDX_DEFAULT_FLAGS:

1848

if validated and flags == REVIDX_DEFAULT_FLAGS:

1958

# no extra flags set, no flag processor runs, text = rawtext

1849

# no extra flags set, no flag processor runs, text = rawtext

1959

return rawtext

1850

return rawtext

1960

1851

1961

if raw:

1852

if raw:

1962

validatehash = flagutil.processflagsraw(self, rawtext, flags)

1853

validatehash = flagutil.processflagsraw(self, rawtext, flags)

1963

text = rawtext

1854

text = rawtext

1964

else:

1855

else:

1965

r = flagutil.processflagsread(self, rawtext, flags)

1856

r = flagutil.processflagsread(self, rawtext, flags)

1966

text, validatehash = r

1857

text, validatehash = r

1967

if validatehash:

1858

if validatehash:

1968

self.checkhash(text, node, rev=rev)

1859

self.checkhash(text, node, rev=rev)

1969

if not validated:

1860

if not validated:

1970

self._revisioncache = (node, rev, rawtext)

1861

self._revisioncache = (node, rev, rawtext)

1971

1862

1972

return text

1863

return text

1973

1864

1974

def _rawtext(self, node, rev, _df=None):

1865

def _rawtext(self, node, rev, _df=None):

1975

"""return the possibly unvalidated rawtext for a revision

1866

"""return the possibly unvalidated rawtext for a revision

1976

1867

1977

returns (rev, rawtext, validated)

1868

returns (rev, rawtext, validated)

1978

"""

1869

"""

1979

1870

1980

# revision in the cache (could be useful to apply delta)

1871

# revision in the cache (could be useful to apply delta)

1981

cachedrev = None

1872

cachedrev = None

1982

# An intermediate text to apply deltas to

1873

# An intermediate text to apply deltas to

1983

basetext = None

1874

basetext = None

1984

1875

1985

# Check if we have the entry in cache

1876

# Check if we have the entry in cache

1986

# The cache entry looks like (node, rev, rawtext)

1877

# The cache entry looks like (node, rev, rawtext)

1987

if self._revisioncache:

1878

if self._revisioncache:

1988

if self._revisioncache[0] == node:

1879

if self._revisioncache[0] == node:

1989

return (rev, self._revisioncache[2], True)

1880

return (rev, self._revisioncache[2], True)

1990

cachedrev = self._revisioncache[1]

1881

cachedrev = self._revisioncache[1]

1991

1882

1992

if rev is None:

1883

if rev is None:

1993

rev = self.rev(node)

1884

rev = self.rev(node)

1994

1885

1995

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1886

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1996

if stopped:

1887

if stopped:

1997

basetext = self._revisioncache[2]

1888

basetext = self._revisioncache[2]

1998

1889

1999

# drop cache to save memory, the caller is expected to

1890

# drop cache to save memory, the caller is expected to

2000

# update self._revisioncache after validating the text

1891

# update self._revisioncache after validating the text

2001

self._revisioncache = None

1892

self._revisioncache = None

2002

1893

2003

targetsize = None

1894

targetsize = None

2004

rawsize = self.index[rev][2]

1895

rawsize = self.index[rev][2]

2005

if 0 <= rawsize:

1896

if 0 <= rawsize:

2006

targetsize = 4 * rawsize

1897

targetsize = 4 * rawsize

2007

1898

2008

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1899

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2009

if basetext is None:

1900

if basetext is None:

2010

basetext = bytes(bins[0])

1901

basetext = bytes(bins[0])

2011

bins = bins[1:]

1902

bins = bins[1:]

2012

1903

2013

rawtext = mdiff.patches(basetext, bins)

1904

rawtext = mdiff.patches(basetext, bins)

2014

del basetext # let us have a chance to free memory early

1905

del basetext # let us have a chance to free memory early

2015

return (rev, rawtext, False)

1906

return (rev, rawtext, False)

2016

1907

2017

def _sidedata(self, rev):

1908

def _sidedata(self, rev):

2018

"""Return the sidedata for a given revision number."""

1909

"""Return the sidedata for a given revision number."""

2019

index_entry = self.index[rev]

1910

index_entry = self.index[rev]

2020

sidedata_offset = index_entry[8]

1911

sidedata_offset = index_entry[8]

2021

sidedata_size = index_entry[9]

1912

sidedata_size = index_entry[9]

2022

1913

2023

if self._inline:

1914

if self._inline:

2024

sidedata_offset += self.index.entry_size * (1 + rev)

1915

sidedata_offset += self.index.entry_size * (1 + rev)

2025

if sidedata_size == 0:

1916

if sidedata_size == 0:

2026

return {}

1917

return {}

2027

1918

2028

# XXX this need caching, as we do for data

1919

# XXX this need caching, as we do for data

2029

with self._sidedatareadfp() as sdf:

1920

with self._sidedatareadfp() as sdf:

2030

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

1921

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

2031

filename = self._sidedatafile

1922

filename = self._sidedatafile

2032

end = self._docket.sidedata_end

1923

end = self._docket.sidedata_end

2033

offset = sidedata_offset

1924

offset = sidedata_offset

2034

length = sidedata_size

1925

length = sidedata_size

2035

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

1926

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

2036

raise error.RevlogError(m)

1927

raise error.RevlogError(m)

2037

1928

2038

sdf.seek(sidedata_offset, os.SEEK_SET)

1929

sdf.seek(sidedata_offset, os.SEEK_SET)

2039

comp_segment = sdf.read(sidedata_size)

1930

comp_segment = sdf.read(sidedata_size)

2040

1931

2041

if len(comp_segment) < sidedata_size:

1932

if len(comp_segment) < sidedata_size:

2042

filename = self._sidedatafile

1933

filename = self._sidedatafile

2043

length = sidedata_size

1934

length = sidedata_size

2044

offset = sidedata_offset

1935

offset = sidedata_offset

2045

got = len(comp_segment)

1936

got = len(comp_segment)

2046

m = PARTIAL_READ_MSG % (~~filename~~, ~~length~~, ~~offset~~, ~~got~~)

1937

m = randomaccessfile.PARTIAL_READ_MSG % (

1938

filename,

1939

length,

1940

offset,

1941

got,

1942

)

2047

raise error.RevlogError(m)

1943

raise error.RevlogError(m)

2048

1944

2049

comp = self.index[rev][11]

1945

comp = self.index[rev][11]

2050

if comp == COMP_MODE_PLAIN:

1946

if comp == COMP_MODE_PLAIN:

2051

segment = comp_segment

1947

segment = comp_segment

2052

elif comp == COMP_MODE_DEFAULT:

1948

elif comp == COMP_MODE_DEFAULT:

2053

segment = self._decompressor(comp_segment)

1949

segment = self._decompressor(comp_segment)

2054

elif comp == COMP_MODE_INLINE:

1950

elif comp == COMP_MODE_INLINE:

2055

segment = self.decompress(comp_segment)

1951

segment = self.decompress(comp_segment)

2056

else:

1952

else:

2057

msg = b'unknown compression mode %d'

1953

msg = b'unknown compression mode %d'

2058

msg %= comp

1954

msg %= comp

2059

raise error.RevlogError(msg)

1955

raise error.RevlogError(msg)

2060

1956

2061

sidedata = sidedatautil.deserialize_sidedata(segment)

1957

sidedata = sidedatautil.deserialize_sidedata(segment)

2062

return sidedata

1958

return sidedata

2063

1959

2064

def rawdata(self, nodeorrev, _df=None):

1960

def rawdata(self, nodeorrev, _df=None):

2065

"""return an uncompressed raw data of a given node or revision number.

1961

"""return an uncompressed raw data of a given node or revision number.

2066

1962

2067

_df - an existing file handle to read from. (internal-only)

1963

_df - an existing file handle to read from. (internal-only)

2068

"""

1964

"""

2069

return self._revisiondata(nodeorrev, _df, raw=True)

1965

return self._revisiondata(nodeorrev, _df, raw=True)

2070

1966

2071

def hash(self, text, p1, p2):

1967

def hash(self, text, p1, p2):

2072

"""Compute a node hash.

1968

"""Compute a node hash.

2073

1969

2074

Available as a function so that subclasses can replace the hash

1970

Available as a function so that subclasses can replace the hash

2075

as needed.

1971

as needed.

2076

"""

1972

"""

2077

return storageutil.hashrevisionsha1(text, p1, p2)

1973

return storageutil.hashrevisionsha1(text, p1, p2)

2078

1974

2079

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1975

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2080

"""Check node hash integrity.

1976

"""Check node hash integrity.

2081

1977

2082

Available as a function so that subclasses can extend hash mismatch

1978

Available as a function so that subclasses can extend hash mismatch

2083

behaviors as needed.

1979

behaviors as needed.

2084

"""

1980

"""

2085

try:

1981

try:

2086

if p1 is None and p2 is None:

1982

if p1 is None and p2 is None:

2087

p1, p2 = self.parents(node)

1983

p1, p2 = self.parents(node)

2088

if node != self.hash(text, p1, p2):

1984

if node != self.hash(text, p1, p2):

2089

# Clear the revision cache on hash failure. The revision cache

1985

# Clear the revision cache on hash failure. The revision cache

2090

# only stores the raw revision and clearing the cache does have

1986

# only stores the raw revision and clearing the cache does have

2091

# the side-effect that we won't have a cache hit when the raw

1987

# the side-effect that we won't have a cache hit when the raw

2092

# revision data is accessed. But this case should be rare and

1988

# revision data is accessed. But this case should be rare and

2093

# it is extra work to teach the cache about the hash

1989

# it is extra work to teach the cache about the hash

2094

# verification state.

1990

# verification state.

2095

if self._revisioncache and self._revisioncache[0] == node:

1991

if self._revisioncache and self._revisioncache[0] == node:

2096

self._revisioncache = None

1992

self._revisioncache = None

2097

1993

2098

revornode = rev

1994

revornode = rev

2099

if revornode is None:

1995

if revornode is None:

2100

revornode = templatefilters.short(hex(node))

1996

revornode = templatefilters.short(hex(node))

2101

raise error.RevlogError(

1997

raise error.RevlogError(

2102

_(b"integrity check failed on %s:%s")

1998

_(b"integrity check failed on %s:%s")

2103

% (self.display_id, pycompat.bytestr(revornode))

1999

% (self.display_id, pycompat.bytestr(revornode))

2104

)

2000

)

2105

except error.RevlogError:

2001

except error.RevlogError:

2106

if self._censorable and storageutil.iscensoredtext(text):

2002

if self._censorable and storageutil.iscensoredtext(text):

2107

raise error.CensoredNodeError(self.display_id, node, text)

2003

raise error.CensoredNodeError(self.display_id, node, text)

2108

raise

2004

raise

2109

2005

2110

def _enforceinlinesize(self, tr):

2006

def _enforceinlinesize(self, tr):

2111

"""Check if the revlog is too big for inline and convert if so.

2007

"""Check if the revlog is too big for inline and convert if so.

2112

2008

2113

This should be called after revisions are added to the revlog. If the

2009

This should be called after revisions are added to the revlog. If the

2114

revlog has grown too large to be an inline revlog, it will convert it

2010

revlog has grown too large to be an inline revlog, it will convert it

2115

to use multiple index and data files.

2011

to use multiple index and data files.

2116

"""

2012

"""

2117

tiprev = len(self) - 1

2013

tiprev = len(self) - 1

2118

total_size = self.start(tiprev) + self.length(tiprev)

2014

total_size = self.start(tiprev) + self.length(tiprev)

2119

if not self._inline or total_size < _maxinline:

2015

if not self._inline or total_size < _maxinline:

2120

return

2016

return

2121

2017

2122

troffset = tr.findoffset(self._indexfile)

2018

troffset = tr.findoffset(self._indexfile)

2123

if troffset is None:

2019

if troffset is None:

2124

raise error.RevlogError(

2020

raise error.RevlogError(

2125

_(b"%s not found in the transaction") % self._indexfile

2021

_(b"%s not found in the transaction") % self._indexfile

2126

)

2022

)

2127

trindex = 0

2023

trindex = 0

2128

tr.add(self._datafile, 0)

2024

tr.add(self._datafile, 0)

2129

2025

2130

existing_handles = False

2026

existing_handles = False

2131

if self._writinghandles is not None:

2027

if self._writinghandles is not None:

2132

existing_handles = True

2028

existing_handles = True

2133

fp = self._writinghandles[0]

2029

fp = self._writinghandles[0]

2134

fp.flush()

2030

fp.flush()

2135

fp.close()

2031

fp.close()

2136

# We can't use the cached file handle after close(). So prevent

2032

# We can't use the cached file handle after close(). So prevent

2137

# its usage.

2033

# its usage.

2138

self._writinghandles = None

2034

self._writinghandles = None

2035

self._segmentfile.writing_handle = None

2139

2036

2140

new_dfh = self._datafp(b'w+')

2037

new_dfh = self._datafp(b'w+')

2141

new_dfh.truncate(0) # drop any potentially existing data

2038

new_dfh.truncate(0) # drop any potentially existing data

2142

try:

2039

try:

2143

with self._indexfp() as read_ifh:

2040

with self._indexfp() as read_ifh:

2144

for r in self:

2041

for r in self:

2145

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2042

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2146

if troffset <= self.start(r) + r * self.index.entry_size:

2043

if troffset <= self.start(r) + r * self.index.entry_size:

2147

trindex = r

2044

trindex = r

2148

new_dfh.flush()

2045

new_dfh.flush()

2149

2046

2150

with self.__index_new_fp() as fp:

2047

with self.__index_new_fp() as fp:

2151

self._format_flags &= ~FLAG_INLINE_DATA

2048

self._format_flags &= ~FLAG_INLINE_DATA

2152

self._inline = False

2049

self._inline = False

2153

for i in self:

2050

for i in self:

2154

e = self.index.entry_binary(i)

2051

e = self.index.entry_binary(i)

2155

if i == 0 and self._docket is None:

2052

if i == 0 and self._docket is None:

2156

header = self._format_flags | self._format_version

2053

header = self._format_flags | self._format_version

2157

header = self.index.pack_header(header)

2054

header = self.index.pack_header(header)

2158

e = header + e

2055

e = header + e

2159

fp.write(e)

2056

fp.write(e)

2160

if self._docket is not None:

2057

if self._docket is not None:

2161

self._docket.index_end = fp.tell()

2058

self._docket.index_end = fp.tell()

2162

2059

2163

# There is a small transactional race here. If the rename of

2060

# There is a small transactional race here. If the rename of

2164

# the index fails, we should remove the datafile. It is more

2061

# the index fails, we should remove the datafile. It is more

2165

# important to ensure that the data file is not truncated

2062

# important to ensure that the data file is not truncated

2166

# when the index is replaced as otherwise data is lost.

2063

# when the index is replaced as otherwise data is lost.

2167

tr.replace(self._datafile, self.start(trindex))

2064

tr.replace(self._datafile, self.start(trindex))

2168

2065

2169

# the temp file replace the real index when we exit the context

2066

# the temp file replace the real index when we exit the context

2170

# manager

2067

# manager

2171

2068

2172

tr.replace(self._indexfile, trindex * self.index.entry_size)

2069

tr.replace(self._indexfile, trindex * self.index.entry_size)

2173

nodemaputil.setup_persistent_nodemap(tr, self)

2070

nodemaputil.setup_persistent_nodemap(tr, self)

2174

self._chunkclear()

2071

self._segmentfile = randomaccessfile.randomaccessfile(

2072

self.opener,

2073

self._datafile,

2074

self._chunkcachesize,

2075

)

2175

2076

2176

if existing_handles:

2077

if existing_handles:

2177

# switched from inline to conventional reopen the index

2078

# switched from inline to conventional reopen the index

2178

ifh = self.__index_write_fp()

2079

ifh = self.__index_write_fp()

2179

self._writinghandles = (ifh, new_dfh, None)

2080

self._writinghandles = (ifh, new_dfh, None)

2081

self._segmentfile.writing_handle = new_dfh

2180

new_dfh = None

2082

new_dfh = None

2181

finally:

2083

finally:

2182

if new_dfh is not None:

2084

if new_dfh is not None:

2183

new_dfh.close()

2085

new_dfh.close()

2184

2086

2185

def _nodeduplicatecallback(self, transaction, node):

2087

def _nodeduplicatecallback(self, transaction, node):

2186

"""called when trying to add a node already stored."""

2088

"""called when trying to add a node already stored."""

2187

2089

2188

@contextlib.contextmanager

2090

@contextlib.contextmanager

2189

def _writing(self, transaction):

2091

def _writing(self, transaction):

2190

if self._trypending:

2092

if self._trypending:

2191

msg = b'try to write in a `trypending` revlog: %s'

2093

msg = b'try to write in a `trypending` revlog: %s'

2192

msg %= self.display_id

2094

msg %= self.display_id

2193

raise error.ProgrammingError(msg)

2095

raise error.ProgrammingError(msg)

2194

if self._writinghandles is not None:

2096

if self._writinghandles is not None:

2195

yield

2097

yield

2196

else:

2098

else:

2197

ifh = dfh = sdfh = None

2099

ifh = dfh = sdfh = None

2198

try:

2100

try:

2199

r = len(self)

2101

r = len(self)

2200

# opening the data file.

2102

# opening the data file.

2201

dsize = 0

2103

dsize = 0

2202

if r:

2104

if r:

2203

dsize = self.end(r - 1)

2105

dsize = self.end(r - 1)

2204

dfh = None

2106

dfh = None

2205

if not self._inline:

2107

if not self._inline:

2206

try:

2108

try:

2207

dfh = self._datafp(b"r+")

2109

dfh = self._datafp(b"r+")

2208

if self._docket is None:

2110

if self._docket is None:

2209

dfh.seek(0, os.SEEK_END)

2111

dfh.seek(0, os.SEEK_END)

2210

else:

2112

else:

2211

dfh.seek(self._docket.data_end, os.SEEK_SET)

2113

dfh.seek(self._docket.data_end, os.SEEK_SET)

2212

except IOError as inst:

2114

except IOError as inst:

2213

if inst.errno != errno.ENOENT:

2115

if inst.errno != errno.ENOENT:

2214

raise

2116

raise

2215

dfh = self._datafp(b"w+")

2117

dfh = self._datafp(b"w+")

2216

transaction.add(self._datafile, dsize)

2118

transaction.add(self._datafile, dsize)

2217

if self._sidedatafile is not None:

2119

if self._sidedatafile is not None:

2218

try:

2120

try:

2219

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2121

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2220

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2122

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2221

except IOError as inst:

2123

except IOError as inst:

2222

if inst.errno != errno.ENOENT:

2124

if inst.errno != errno.ENOENT:

2223

raise

2125

raise

2224

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2126

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2225

transaction.add(

2127

transaction.add(

2226

self._sidedatafile, self._docket.sidedata_end

2128

self._sidedatafile, self._docket.sidedata_end

2227

)

2129

)

2228

2130

2229

# opening the index file.

2131

# opening the index file.

2230

isize = r * self.index.entry_size

2132

isize = r * self.index.entry_size

2231

ifh = self.__index_write_fp()

2133

ifh = self.__index_write_fp()

2232

if self._inline:

2134

if self._inline:

2233

transaction.add(self._indexfile, dsize + isize)

2135

transaction.add(self._indexfile, dsize + isize)

2234

else:

2136

else:

2235

transaction.add(self._indexfile, isize)

2137

transaction.add(self._indexfile, isize)

2236

# exposing all file handle for writing.

2138

# exposing all file handle for writing.

2237

self._writinghandles = (ifh, dfh, sdfh)

2139

self._writinghandles = (ifh, dfh, sdfh)

2140

self._segmentfile.writing_handle = ifh if self._inline else dfh

2238

yield

2141

yield

2239

if self._docket is not None:

2142

if self._docket is not None:

2240

self._write_docket(transaction)

2143

self._write_docket(transaction)

2241

finally:

2144

finally:

2242

self._writinghandles = None

2145

self._writinghandles = None

2146

self._segmentfile.writing_handle = None

2243

if dfh is not None:

2147

if dfh is not None:

2244

dfh.close()

2148

dfh.close()

2245

if sdfh is not None:

2149

if sdfh is not None:

2246

sdfh.close()

2150

sdfh.close()

2247

# closing the index file last to avoid exposing referent to

2151

# closing the index file last to avoid exposing referent to

2248

# potential unflushed data content.

2152

# potential unflushed data content.

2249

if ifh is not None:

2153

if ifh is not None:

2250

ifh.close()

2154

ifh.close()

2251

2155

2252

def _write_docket(self, transaction):

2156

def _write_docket(self, transaction):

2253

"""write the current docket on disk

2157

"""write the current docket on disk

2254

2158

2255

Exist as a method to help changelog to implement transaction logic

2159

Exist as a method to help changelog to implement transaction logic

2256

2160

2257

We could also imagine using the same transaction logic for all revlog

2161

We could also imagine using the same transaction logic for all revlog

2258

since docket are cheap."""

2162

since docket are cheap."""

2259

self._docket.write(transaction)

2163

self._docket.write(transaction)

2260

2164

2261

def addrevision(

2165

def addrevision(

2262

self,

2166

self,

2263

text,

2167

text,

2264

transaction,

2168

transaction,

2265

link,

2169

link,

2266

p1,

2170

p1,

2267

p2,

2171

p2,

2268

cachedelta=None,

2172

cachedelta=None,

2269

node=None,

2173

node=None,

2270

flags=REVIDX_DEFAULT_FLAGS,

2174

flags=REVIDX_DEFAULT_FLAGS,

2271

deltacomputer=None,

2175

deltacomputer=None,

2272

sidedata=None,

2176

sidedata=None,

2273

):

2177

):

2274

"""add a revision to the log

2178

"""add a revision to the log

2275

2179

2276

text - the revision data to add

2180

text - the revision data to add

2277

transaction - the transaction object used for rollback

2181

transaction - the transaction object used for rollback

2278

link - the linkrev data to add

2182

link - the linkrev data to add

2279

p1, p2 - the parent nodeids of the revision

2183

p1, p2 - the parent nodeids of the revision

2280

cachedelta - an optional precomputed delta

2184

cachedelta - an optional precomputed delta

2281

node - nodeid of revision; typically node is not specified, and it is

2185

node - nodeid of revision; typically node is not specified, and it is

2282

computed by default as hash(text, p1, p2), however subclasses might

2186

computed by default as hash(text, p1, p2), however subclasses might

2283

use different hashing method (and override checkhash() in such case)

2187

use different hashing method (and override checkhash() in such case)

2284

flags - the known flags to set on the revision

2188

flags - the known flags to set on the revision

2285

deltacomputer - an optional deltacomputer instance shared between

2189

deltacomputer - an optional deltacomputer instance shared between

2286

multiple calls

2190

multiple calls

2287

"""

2191

"""

2288

if link == nullrev:

2192

if link == nullrev:

2289

raise error.RevlogError(

2193

raise error.RevlogError(

2290

_(b"attempted to add linkrev -1 to %s") % self.display_id

2194

_(b"attempted to add linkrev -1 to %s") % self.display_id

2291

)

2195

)

2292

2196

2293

if sidedata is None:

2197

if sidedata is None:

2294

sidedata = {}

2198

sidedata = {}

2295

elif sidedata and not self.hassidedata:

2199

elif sidedata and not self.hassidedata:

2296

raise error.ProgrammingError(

2200

raise error.ProgrammingError(

2297

_(b"trying to add sidedata to a revlog who don't support them")

2201

_(b"trying to add sidedata to a revlog who don't support them")

2298

)

2202

)

2299

2203

2300

if flags:

2204

if flags:

2301

node = node or self.hash(text, p1, p2)

2205

node = node or self.hash(text, p1, p2)

2302

2206

2303

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2207

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2304

2208

2305

# If the flag processor modifies the revision data, ignore any provided

2209

# If the flag processor modifies the revision data, ignore any provided

2306

# cachedelta.

2210

# cachedelta.

2307

if rawtext != text:

2211

if rawtext != text:

2308

cachedelta = None

2212

cachedelta = None

2309

2213

2310

if len(rawtext) > _maxentrysize:

2214

if len(rawtext) > _maxentrysize:

2311

raise error.RevlogError(

2215

raise error.RevlogError(

2312

_(

2216

_(

2313

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2217

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2314

)

2218

)

2315

% (self.display_id, len(rawtext))

2219

% (self.display_id, len(rawtext))

2316

)

2220

)

2317

2221

2318

node = node or self.hash(rawtext, p1, p2)

2222

node = node or self.hash(rawtext, p1, p2)

2319

rev = self.index.get_rev(node)

2223

rev = self.index.get_rev(node)

2320

if rev is not None:

2224

if rev is not None:

2321

return rev

2225

return rev

2322

2226

2323

if validatehash:

2227

if validatehash:

2324

self.checkhash(rawtext, node, p1=p1, p2=p2)

2228

self.checkhash(rawtext, node, p1=p1, p2=p2)

2325

2229

2326

return self.addrawrevision(

2230

return self.addrawrevision(

2327

rawtext,

2231

rawtext,

2328

transaction,

2232

transaction,

2329

link,

2233

link,

2330

p1,

2234

p1,

2331

p2,

2235

p2,

2332

node,

2236

node,

2333

flags,

2237

flags,

2334

cachedelta=cachedelta,

2238

cachedelta=cachedelta,

2335

deltacomputer=deltacomputer,

2239

deltacomputer=deltacomputer,

2336

sidedata=sidedata,

2240

sidedata=sidedata,

2337

)

2241

)

2338

2242

2339

def addrawrevision(

2243

def addrawrevision(

2340

self,

2244

self,

2341

rawtext,

2245

rawtext,

2342

transaction,

2246

transaction,

2343

link,

2247

link,

2344

p1,

2248

p1,

2345

p2,

2249

p2,

2346

node,

2250

node,

2347

flags,

2251

flags,

2348

cachedelta=None,

2252

cachedelta=None,

2349

deltacomputer=None,

2253

deltacomputer=None,

2350

sidedata=None,

2254

sidedata=None,

2351

):

2255

):

2352

"""add a raw revision with known flags, node and parents

2256

"""add a raw revision with known flags, node and parents

2353

useful when reusing a revision not stored in this revlog (ex: received

2257

useful when reusing a revision not stored in this revlog (ex: received

2354

over wire, or read from an external bundle).

2258

over wire, or read from an external bundle).

2355

"""

2259

"""

2356

with self._writing(transaction):

2260

with self._writing(transaction):

2357

return self._addrevision(

2261

return self._addrevision(

2358

node,

2262

node,

2359

rawtext,

2263

rawtext,

2360

transaction,

2264

transaction,

2361

link,

2265

link,

2362

p1,

2266

p1,

2363

p2,

2267

p2,

2364

flags,

2268

flags,

2365

cachedelta,

2269

cachedelta,

2366

deltacomputer=deltacomputer,

2270

deltacomputer=deltacomputer,

2367

sidedata=sidedata,

2271

sidedata=sidedata,

2368

)

2272

)

2369

2273

2370

def compress(self, data):

2274

def compress(self, data):

2371

"""Generate a possibly-compressed representation of data."""

2275

"""Generate a possibly-compressed representation of data."""

2372

if not data:

2276

if not data:

2373

return b'', data

2277

return b'', data

2374

2278

2375

compressed = self._compressor.compress(data)

2279

compressed = self._compressor.compress(data)

2376

2280

2377

if compressed:

2281

if compressed:

2378

# The revlog compressor added the header in the returned data.

2282

# The revlog compressor added the header in the returned data.

2379

return b'', compressed

2283

return b'', compressed

2380

2284

2381

if data[0:1] == b'\0':

2285

if data[0:1] == b'\0':

2382

return b'', data

2286

return b'', data

2383

return b'u', data

2287

return b'u', data

2384

2288

2385

def decompress(self, data):

2289

def decompress(self, data):

2386

"""Decompress a revlog chunk.

2290

"""Decompress a revlog chunk.

2387

2291

2388

The chunk is expected to begin with a header identifying the

2292

The chunk is expected to begin with a header identifying the

2389

format type so it can be routed to an appropriate decompressor.

2293

format type so it can be routed to an appropriate decompressor.

2390

"""

2294

"""

2391

if not data:

2295

if not data:

2392

return data

2296

return data

2393

2297

2394

# Revlogs are read much more frequently than they are written and many

2298

# Revlogs are read much more frequently than they are written and many

2395

# chunks only take microseconds to decompress, so performance is

2299

# chunks only take microseconds to decompress, so performance is

2396

# important here.

2300

# important here.

2397

#

2301

#

2398

# We can make a few assumptions about revlogs:

2302

# We can make a few assumptions about revlogs:

2399

#

2303

#

2400

# 1) the majority of chunks will be compressed (as opposed to inline

2304

# 1) the majority of chunks will be compressed (as opposed to inline

2401

# raw data).

2305

# raw data).

2402

# 2) decompressing *any* data will likely by at least 10x slower than

2306

# 2) decompressing *any* data will likely by at least 10x slower than

2403

# returning raw inline data.

2307

# returning raw inline data.

2404

# 3) we want to prioritize common and officially supported compression

2308

# 3) we want to prioritize common and officially supported compression

2405

# engines

2309

# engines

2406

#

2310

#

2407

# It follows that we want to optimize for "decompress compressed data

2311

# It follows that we want to optimize for "decompress compressed data

2408

# when encoded with common and officially supported compression engines"

2312

# when encoded with common and officially supported compression engines"

2409

# case over "raw data" and "data encoded by less common or non-official

2313

# case over "raw data" and "data encoded by less common or non-official

2410

# compression engines." That is why we have the inline lookup first

2314

# compression engines." That is why we have the inline lookup first

2411

# followed by the compengines lookup.

2315

# followed by the compengines lookup.

2412

#

2316

#

2413

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2317

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2414

# compressed chunks. And this matters for changelog and manifest reads.

2318

# compressed chunks. And this matters for changelog and manifest reads.

2415

t = data[0:1]

2319

t = data[0:1]

2416

2320

2417

if t == b'x':

2321

if t == b'x':

2418

try:

2322

try:

2419

return _zlibdecompress(data)

2323

return _zlibdecompress(data)

2420

except zlib.error as e:

2324

except zlib.error as e:

2421

raise error.RevlogError(

2325

raise error.RevlogError(

2422

_(b'revlog decompress error: %s')

2326

_(b'revlog decompress error: %s')

2423

% stringutil.forcebytestr(e)

2327

% stringutil.forcebytestr(e)

2424

)

2328

)

2425

# '\0' is more common than 'u' so it goes first.

2329

# '\0' is more common than 'u' so it goes first.

2426

elif t == b'\0':

2330

elif t == b'\0':

2427

return data

2331

return data

2428

elif t == b'u':

2332

elif t == b'u':

2429

return util.buffer(data, 1)

2333

return util.buffer(data, 1)

2430

2334

2431

compressor = self._get_decompressor(t)

2335

compressor = self._get_decompressor(t)

2432

2336

2433

return compressor.decompress(data)

2337

return compressor.decompress(data)

2434

2338

2435

def _addrevision(

2339

def _addrevision(

2436

self,

2340

self,

2437

node,

2341

node,

2438

rawtext,

2342

rawtext,

2439

transaction,

2343

transaction,

2440

link,

2344

link,

2441

p1,

2345

p1,

2442

p2,

2346

p2,

2443

flags,

2347

flags,

2444

cachedelta,

2348

cachedelta,

2445

alwayscache=False,

2349

alwayscache=False,

2446

deltacomputer=None,

2350

deltacomputer=None,

2447

sidedata=None,

2351

sidedata=None,

2448

):

2352

):

2449

"""internal function to add revisions to the log

2353

"""internal function to add revisions to the log

2450

2354

2451

see addrevision for argument descriptions.

2355

see addrevision for argument descriptions.

2452

2356

2453

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2357

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2454

2358

2455

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2359

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2456

be used.

2360

be used.

2457

2361

2458

invariants:

2362

invariants:

2459

- rawtext is optional (can be None); if not set, cachedelta must be set.

2363

- rawtext is optional (can be None); if not set, cachedelta must be set.

2460

if both are set, they must correspond to each other.

2364

if both are set, they must correspond to each other.

2461

"""

2365

"""

2462

if node == self.nullid:

2366

if node == self.nullid:

2463

raise error.RevlogError(

2367

raise error.RevlogError(

2464

_(b"%s: attempt to add null revision") % self.display_id

2368

_(b"%s: attempt to add null revision") % self.display_id

2465

)

2369

)

2466

if (

2370

if (

2467

node == self.nodeconstants.wdirid

2371

node == self.nodeconstants.wdirid

2468

or node in self.nodeconstants.wdirfilenodeids

2372

or node in self.nodeconstants.wdirfilenodeids

2469

):

2373

):

2470

raise error.RevlogError(

2374

raise error.RevlogError(

2471

_(b"%s: attempt to add wdir revision") % self.display_id

2375

_(b"%s: attempt to add wdir revision") % self.display_id

2472

)

2376

)

2473

if self._writinghandles is None:

2377

if self._writinghandles is None:

2474

msg = b'adding revision outside `revlog._writing` context'

2378

msg = b'adding revision outside `revlog._writing` context'

2475

raise error.ProgrammingError(msg)

2379

raise error.ProgrammingError(msg)

2476

2380

2477

if self._inline:

2381

if self._inline:

2478

fh = self._writinghandles[0]

2382

fh = self._writinghandles[0]

2479

else:

2383

else:

2480

fh = self._writinghandles[1]

2384

fh = self._writinghandles[1]

2481

2385

2482

btext = [rawtext]

2386

btext = [rawtext]

2483

2387

2484

curr = len(self)

2388

curr = len(self)

2485

prev = curr - 1

2389

prev = curr - 1

2486

2390

2487

offset = self._get_data_offset(prev)

2391

offset = self._get_data_offset(prev)

2488

2392

2489

if self._concurrencychecker:

2393

if self._concurrencychecker:

2490

ifh, dfh, sdfh = self._writinghandles

2394

ifh, dfh, sdfh = self._writinghandles

2491

# XXX no checking for the sidedata file

2395

# XXX no checking for the sidedata file

2492

if self._inline:

2396

if self._inline:

2493

# offset is "as if" it were in the .d file, so we need to add on

2397

# offset is "as if" it were in the .d file, so we need to add on

2494

# the size of the entry metadata.

2398

# the size of the entry metadata.

2495

self._concurrencychecker(

2399

self._concurrencychecker(

2496

ifh, self._indexfile, offset + curr * self.index.entry_size

2400

ifh, self._indexfile, offset + curr * self.index.entry_size

2497

)

2401

)

2498

else:

2402

else:

2499

# Entries in the .i are a consistent size.

2403

# Entries in the .i are a consistent size.

2500

self._concurrencychecker(

2404

self._concurrencychecker(

2501

ifh, self._indexfile, curr * self.index.entry_size

2405

ifh, self._indexfile, curr * self.index.entry_size

2502

)

2406

)

2503

self._concurrencychecker(dfh, self._datafile, offset)

2407

self._concurrencychecker(dfh, self._datafile, offset)

2504

2408

2505

p1r, p2r = self.rev(p1), self.rev(p2)

2409

p1r, p2r = self.rev(p1), self.rev(p2)

2506

2410

2507

# full versions are inserted when the needed deltas

2411

# full versions are inserted when the needed deltas

2508

# become comparable to the uncompressed text

2412

# become comparable to the uncompressed text

2509

if rawtext is None:

2413

if rawtext is None:

2510

# need rawtext size, before changed by flag processors, which is

2414

# need rawtext size, before changed by flag processors, which is

2511

# the non-raw size. use revlog explicitly to avoid filelog's extra

2415

# the non-raw size. use revlog explicitly to avoid filelog's extra

2512

# logic that might remove metadata size.

2416

# logic that might remove metadata size.

2513

textlen = mdiff.patchedsize(

2417

textlen = mdiff.patchedsize(

2514

revlog.size(self, cachedelta[0]), cachedelta[1]

2418

revlog.size(self, cachedelta[0]), cachedelta[1]

2515

)

2419

)

2516

else:

2420

else:

2517

textlen = len(rawtext)

2421

textlen = len(rawtext)

2518

2422

2519

if deltacomputer is None:

2423

if deltacomputer is None:

2520

deltacomputer = deltautil.deltacomputer(self)

2424

deltacomputer = deltautil.deltacomputer(self)

2521

2425

2522

revinfo = revlogutils.revisioninfo(

2426

revinfo = revlogutils.revisioninfo(

2523

node,

2427

node,

2524

p1,

2428

p1,

2525

p2,

2429

p2,

2526

btext,

2430

btext,

2527

textlen,

2431

textlen,

2528

cachedelta,

2432

cachedelta,

2529

flags,

2433

flags,

2530

)

2434

)

2531

2435

2532

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2436

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2533

2437

2534

compression_mode = COMP_MODE_INLINE

2438

compression_mode = COMP_MODE_INLINE

2535

if self._docket is not None:

2439

if self._docket is not None:

2536

h, d = deltainfo.data

2440

h, d = deltainfo.data

2537

if not h and not d:

2441

if not h and not d:

2538

# not data to store at all... declare them uncompressed

2442

# not data to store at all... declare them uncompressed

2539

compression_mode = COMP_MODE_PLAIN

2443

compression_mode = COMP_MODE_PLAIN

2540

elif not h:

2444

elif not h:

2541

t = d[0:1]

2445

t = d[0:1]

2542

if t == b'\0':

2446

if t == b'\0':

2543

compression_mode = COMP_MODE_PLAIN

2447

compression_mode = COMP_MODE_PLAIN

2544

elif t == self._docket.default_compression_header:

2448

elif t == self._docket.default_compression_header:

2545

compression_mode = COMP_MODE_DEFAULT

2449

compression_mode = COMP_MODE_DEFAULT

2546

elif h == b'u':

2450

elif h == b'u':

2547

# we have a more efficient way to declare uncompressed

2451

# we have a more efficient way to declare uncompressed

2548

h = b''

2452

h = b''

2549

compression_mode = COMP_MODE_PLAIN

2453

compression_mode = COMP_MODE_PLAIN

2550

deltainfo = deltautil.drop_u_compression(deltainfo)

2454

deltainfo = deltautil.drop_u_compression(deltainfo)

2551

2455

2552

sidedata_compression_mode = COMP_MODE_INLINE

2456

sidedata_compression_mode = COMP_MODE_INLINE

2553

if sidedata and self.hassidedata:

2457

if sidedata and self.hassidedata:

2554

sidedata_compression_mode = COMP_MODE_PLAIN

2458

sidedata_compression_mode = COMP_MODE_PLAIN

2555

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2459

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2556

sidedata_offset = self._docket.sidedata_end

2460

sidedata_offset = self._docket.sidedata_end

2557

h, comp_sidedata = self.compress(serialized_sidedata)

2461

h, comp_sidedata = self.compress(serialized_sidedata)

2558

if (

2462

if (

2559

h != b'u'

2463

h != b'u'

2560

and comp_sidedata[0:1] != b'\0'

2464

and comp_sidedata[0:1] != b'\0'

2561

and len(comp_sidedata) < len(serialized_sidedata)

2465

and len(comp_sidedata) < len(serialized_sidedata)

2562

):

2466

):

2563

assert not h

2467

assert not h

2564

if (

2468

if (

2565

comp_sidedata[0:1]

2469

comp_sidedata[0:1]

2566

== self._docket.default_compression_header

2470

== self._docket.default_compression_header

2567

):

2471

):

2568

sidedata_compression_mode = COMP_MODE_DEFAULT

2472

sidedata_compression_mode = COMP_MODE_DEFAULT

2569

serialized_sidedata = comp_sidedata

2473

serialized_sidedata = comp_sidedata

2570

else:

2474

else:

2571

sidedata_compression_mode = COMP_MODE_INLINE

2475

sidedata_compression_mode = COMP_MODE_INLINE

2572

serialized_sidedata = comp_sidedata

2476

serialized_sidedata = comp_sidedata

2573

else:

2477

else:

2574

serialized_sidedata = b""

2478

serialized_sidedata = b""

2575

# Don't store the offset if the sidedata is empty, that way

2479

# Don't store the offset if the sidedata is empty, that way

2576

# we can easily detect empty sidedata and they will be no different

2480

# we can easily detect empty sidedata and they will be no different

2577

# than ones we manually add.

2481

# than ones we manually add.

2578

sidedata_offset = 0

2482

sidedata_offset = 0

2579

2483

2580

e = revlogutils.entry(

2484

e = revlogutils.entry(

2581

flags=flags,

2485

flags=flags,

2582

data_offset=offset,

2486

data_offset=offset,

2583

data_compressed_length=deltainfo.deltalen,

2487

data_compressed_length=deltainfo.deltalen,

2584

data_uncompressed_length=textlen,

2488

data_uncompressed_length=textlen,

2585

data_compression_mode=compression_mode,

2489

data_compression_mode=compression_mode,

2586

data_delta_base=deltainfo.base,

2490

data_delta_base=deltainfo.base,

2587

link_rev=link,

2491

link_rev=link,

2588

parent_rev_1=p1r,

2492

parent_rev_1=p1r,

2589

parent_rev_2=p2r,

2493

parent_rev_2=p2r,

2590

node_id=node,

2494

node_id=node,

2591

sidedata_offset=sidedata_offset,

2495

sidedata_offset=sidedata_offset,

2592

sidedata_compressed_length=len(serialized_sidedata),

2496

sidedata_compressed_length=len(serialized_sidedata),

2593

sidedata_compression_mode=sidedata_compression_mode,

2497

sidedata_compression_mode=sidedata_compression_mode,

2594

)

2498

)

2595

2499

2596

self.index.append(e)

2500

self.index.append(e)

2597

entry = self.index.entry_binary(curr)

2501

entry = self.index.entry_binary(curr)

2598

if curr == 0 and self._docket is None:

2502

if curr == 0 and self._docket is None:

2599

header = self._format_flags | self._format_version

2503

header = self._format_flags | self._format_version

2600

header = self.index.pack_header(header)

2504

header = self.index.pack_header(header)

2601

entry = header + entry

2505

entry = header + entry

2602

self._writeentry(

2506

self._writeentry(

2603

transaction,

2507

transaction,

2604

entry,

2508

entry,

2605

deltainfo.data,

2509

deltainfo.data,

2606

link,

2510

link,

2607

offset,

2511

offset,

2608

serialized_sidedata,

2512

serialized_sidedata,

2609

sidedata_offset,

2513

sidedata_offset,

2610

)

2514

)

2611

2515

2612

rawtext = btext[0]

2516

rawtext = btext[0]

2613

2517

2614

if alwayscache and rawtext is None:

2518

if alwayscache and rawtext is None:

2615

rawtext = deltacomputer.buildtext(revinfo, fh)

2519

rawtext = deltacomputer.buildtext(revinfo, fh)

2616

2520

2617

if type(rawtext) == bytes: # only accept immutable objects

2521

if type(rawtext) == bytes: # only accept immutable objects

2618

self._revisioncache = (node, curr, rawtext)

2522

self._revisioncache = (node, curr, rawtext)

2619

self._chainbasecache[curr] = deltainfo.chainbase

2523

self._chainbasecache[curr] = deltainfo.chainbase

2620

return curr

2524

return curr

2621

2525

2622

def _get_data_offset(self, prev):

2526

def _get_data_offset(self, prev):

2623

"""Returns the current offset in the (in-transaction) data file.

2527

"""Returns the current offset in the (in-transaction) data file.

2624

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2528

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2625

file to store that information: since sidedata can be rewritten to the

2529

file to store that information: since sidedata can be rewritten to the

2626

end of the data file within a transaction, you can have cases where, for

2530

end of the data file within a transaction, you can have cases where, for

2627

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2531

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2628

to `n - 1`'s sidedata being written after `n`'s data.

2532

to `n - 1`'s sidedata being written after `n`'s data.

2629

2533

2630

TODO cache this in a docket file before getting out of experimental."""

2534

TODO cache this in a docket file before getting out of experimental."""

2631

if self._docket is None:

2535

if self._docket is None:

2632

return self.end(prev)

2536

return self.end(prev)

2633

else:

2537

else:

2634

return self._docket.data_end

2538

return self._docket.data_end

2635

2539

2636

def _writeentry(

2540

def _writeentry(

2637

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2541

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2638

):

2542

):

2639

# Files opened in a+ mode have inconsistent behavior on various

2543

# Files opened in a+ mode have inconsistent behavior on various

2640

# platforms. Windows requires that a file positioning call be made

2544

# platforms. Windows requires that a file positioning call be made

2641

# when the file handle transitions between reads and writes. See

2545

# when the file handle transitions between reads and writes. See

2642

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2546

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2643

# platforms, Python or the platform itself can be buggy. Some versions

2547

# platforms, Python or the platform itself can be buggy. Some versions

2644

# of Solaris have been observed to not append at the end of the file

2548

# of Solaris have been observed to not append at the end of the file

2645

# if the file was seeked to before the end. See issue4943 for more.

2549

# if the file was seeked to before the end. See issue4943 for more.

2646

#

2550

#

2647

# We work around this issue by inserting a seek() before writing.

2551

# We work around this issue by inserting a seek() before writing.

2648

# Note: This is likely not necessary on Python 3. However, because

2552

# Note: This is likely not necessary on Python 3. However, because

2649

# the file handle is reused for reads and may be seeked there, we need

2553

# the file handle is reused for reads and may be seeked there, we need

2650

# to be careful before changing this.

2554

# to be careful before changing this.

2651

if self._writinghandles is None:

2555

if self._writinghandles is None:

2652

msg = b'adding revision outside `revlog._writing` context'

2556

msg = b'adding revision outside `revlog._writing` context'

2653

raise error.ProgrammingError(msg)

2557

raise error.ProgrammingError(msg)

2654

ifh, dfh, sdfh = self._writinghandles

2558

ifh, dfh, sdfh = self._writinghandles

2655

if self._docket is None:

2559

if self._docket is None:

2656

ifh.seek(0, os.SEEK_END)

2560

ifh.seek(0, os.SEEK_END)

2657

else:

2561

else:

2658

ifh.seek(self._docket.index_end, os.SEEK_SET)

2562

ifh.seek(self._docket.index_end, os.SEEK_SET)

2659

if dfh:

2563

if dfh:

2660

if self._docket is None:

2564

if self._docket is None:

2661

dfh.seek(0, os.SEEK_END)

2565

dfh.seek(0, os.SEEK_END)

2662

else:

2566

else:

2663

dfh.seek(self._docket.data_end, os.SEEK_SET)

2567

dfh.seek(self._docket.data_end, os.SEEK_SET)

2664

if sdfh:

2568

if sdfh:

2665

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2569

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2666

2570

2667

curr = len(self) - 1

2571

curr = len(self) - 1

2668

if not self._inline:

2572

if not self._inline:

2669

transaction.add(self._datafile, offset)

2573

transaction.add(self._datafile, offset)

2670

if self._sidedatafile:

2574

if self._sidedatafile:

2671

transaction.add(self._sidedatafile, sidedata_offset)

2575

transaction.add(self._sidedatafile, sidedata_offset)

2672

transaction.add(self._indexfile, curr * len(entry))

2576

transaction.add(self._indexfile, curr * len(entry))

2673

if data[0]:

2577

if data[0]:

2674

dfh.write(data[0])

2578

dfh.write(data[0])

2675

dfh.write(data[1])

2579

dfh.write(data[1])

2676

if sidedata:

2580

if sidedata:

2677

sdfh.write(sidedata)

2581

sdfh.write(sidedata)

2678

ifh.write(entry)

2582

ifh.write(entry)

2679

else:

2583

else:

2680

offset += curr * self.index.entry_size

2584

offset += curr * self.index.entry_size

2681

transaction.add(self._indexfile, offset)

2585

transaction.add(self._indexfile, offset)

2682

ifh.write(entry)

2586

ifh.write(entry)

2683

ifh.write(data[0])

2587

ifh.write(data[0])

2684

ifh.write(data[1])

2588

ifh.write(data[1])

2685

assert not sidedata

2589

assert not sidedata

2686

self._enforceinlinesize(transaction)

2590

self._enforceinlinesize(transaction)

2687

if self._docket is not None:

2591

if self._docket is not None:

2688

self._docket.index_end = self._writinghandles[0].tell()

2592

self._docket.index_end = self._writinghandles[0].tell()

2689

self._docket.data_end = self._writinghandles[1].tell()

2593

self._docket.data_end = self._writinghandles[1].tell()

2690

self._docket.sidedata_end = self._writinghandles[2].tell()

2594

self._docket.sidedata_end = self._writinghandles[2].tell()

2691

2595

2692

nodemaputil.setup_persistent_nodemap(transaction, self)

2596

nodemaputil.setup_persistent_nodemap(transaction, self)

2693

2597

2694

def addgroup(

2598

def addgroup(

2695

self,

2599

self,

2696

deltas,

2600

deltas,

2697

linkmapper,

2601

linkmapper,

2698

transaction,

2602

transaction,

2699

alwayscache=False,

2603

alwayscache=False,

2700

addrevisioncb=None,

2604

addrevisioncb=None,

2701

duplicaterevisioncb=None,

2605

duplicaterevisioncb=None,

2702

):

2606

):

2703

"""

2607

"""

2704

add a delta group

2608

add a delta group

2705

2609

2706

given a set of deltas, add them to the revision log. the

2610

given a set of deltas, add them to the revision log. the

2707

first delta is against its parent, which should be in our

2611

first delta is against its parent, which should be in our

2708

log, the rest are against the previous delta.

2612

log, the rest are against the previous delta.

2709

2613

2710

If ``addrevisioncb`` is defined, it will be called with arguments of

2614

If ``addrevisioncb`` is defined, it will be called with arguments of

2711

this revlog and the node that was added.

2615

this revlog and the node that was added.

2712

"""

2616

"""

2713

2617

2714

if self._adding_group:

2618

if self._adding_group:

2715

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2619

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2716

2620

2717

self._adding_group = True

2621

self._adding_group = True

2718

empty = True

2622

empty = True

2719

try:

2623

try:

2720

with self._writing(transaction):

2624

with self._writing(transaction):

2721

deltacomputer = deltautil.deltacomputer(self)

2625

deltacomputer = deltautil.deltacomputer(self)

2722

# loop through our set of deltas

2626

# loop through our set of deltas

2723

for data in deltas:

2627

for data in deltas:

2724

(

2628

(

2725

node,

2629

node,

2726

p1,

2630

p1,

2727

p2,

2631

p2,

2728

linknode,

2632

linknode,

2729

deltabase,

2633

deltabase,

2730

delta,

2634

delta,

2731

flags,

2635

flags,

2732

sidedata,

2636

sidedata,

2733

) = data

2637

) = data

2734

link = linkmapper(linknode)

2638

link = linkmapper(linknode)

2735

flags = flags or REVIDX_DEFAULT_FLAGS

2639

flags = flags or REVIDX_DEFAULT_FLAGS

2736

2640

2737

rev = self.index.get_rev(node)

2641

rev = self.index.get_rev(node)

2738

if rev is not None:

2642

if rev is not None:

2739

# this can happen if two branches make the same change

2643

# this can happen if two branches make the same change

2740

self._nodeduplicatecallback(transaction, rev)

2644

self._nodeduplicatecallback(transaction, rev)

2741

if duplicaterevisioncb:

2645

if duplicaterevisioncb:

2742

duplicaterevisioncb(self, rev)

2646

duplicaterevisioncb(self, rev)

2743

empty = False

2647

empty = False

2744

continue

2648

continue

2745

2649

2746

for p in (p1, p2):

2650

for p in (p1, p2):

2747

if not self.index.has_node(p):

2651

if not self.index.has_node(p):

2748

raise error.LookupError(

2652

raise error.LookupError(

2749

p, self.radix, _(b'unknown parent')

2653

p, self.radix, _(b'unknown parent')

2750

)

2654

)

2751

2655

2752

if not self.index.has_node(deltabase):

2656

if not self.index.has_node(deltabase):

2753

raise error.LookupError(

2657

raise error.LookupError(

2754

deltabase, self.display_id, _(b'unknown delta base')

2658

deltabase, self.display_id, _(b'unknown delta base')

2755

)

2659

)

2756

2660

2757

baserev = self.rev(deltabase)

2661

baserev = self.rev(deltabase)

2758

2662

2759

if baserev != nullrev and self.iscensored(baserev):

2663

if baserev != nullrev and self.iscensored(baserev):

2760

# if base is censored, delta must be full replacement in a

2664

# if base is censored, delta must be full replacement in a

2761

# single patch operation

2665

# single patch operation

2762

hlen = struct.calcsize(b">lll")

2666

hlen = struct.calcsize(b">lll")

2763

oldlen = self.rawsize(baserev)

2667

oldlen = self.rawsize(baserev)

2764

newlen = len(delta) - hlen

2668

newlen = len(delta) - hlen

2765

if delta[:hlen] != mdiff.replacediffheader(

2669

if delta[:hlen] != mdiff.replacediffheader(

2766

oldlen, newlen

2670

oldlen, newlen

2767

):

2671

):

2768

raise error.CensoredBaseError(

2672

raise error.CensoredBaseError(

2769

self.display_id, self.node(baserev)

2673

self.display_id, self.node(baserev)

2770

)

2674

)

2771

2675

2772

if not flags and self._peek_iscensored(baserev, delta):

2676

if not flags and self._peek_iscensored(baserev, delta):

2773

flags |= REVIDX_ISCENSORED

2677

flags |= REVIDX_ISCENSORED

2774

2678

2775

# We assume consumers of addrevisioncb will want to retrieve

2679

# We assume consumers of addrevisioncb will want to retrieve

2776

# the added revision, which will require a call to

2680

# the added revision, which will require a call to

2777

# revision(). revision() will fast path if there is a cache

2681

# revision(). revision() will fast path if there is a cache

2778

# hit. So, we tell _addrevision() to always cache in this case.

2682

# hit. So, we tell _addrevision() to always cache in this case.

2779

# We're only using addgroup() in the context of changegroup

2683

# We're only using addgroup() in the context of changegroup

2780

# generation so the revision data can always be handled as raw

2684

# generation so the revision data can always be handled as raw

2781

# by the flagprocessor.

2685

# by the flagprocessor.

2782

rev = self._addrevision(

2686

rev = self._addrevision(

2783

node,

2687

node,

2784

None,

2688

None,

2785

transaction,

2689

transaction,

2786

link,

2690

link,

2787

p1,

2691

p1,

2788

p2,

2692

p2,

2789

flags,

2693

flags,

2790

(baserev, delta),

2694

(baserev, delta),

2791

alwayscache=alwayscache,

2695

alwayscache=alwayscache,

2792

deltacomputer=deltacomputer,

2696

deltacomputer=deltacomputer,

2793

sidedata=sidedata,

2697

sidedata=sidedata,

2794

)

2698

)

2795

2699

2796

if addrevisioncb:

2700

if addrevisioncb:

2797

addrevisioncb(self, rev)

2701

addrevisioncb(self, rev)

2798

empty = False

2702

empty = False

2799

finally:

2703

finally:

2800

self._adding_group = False

2704

self._adding_group = False

2801

return not empty

2705

return not empty

2802

2706

2803

def iscensored(self, rev):

2707

def iscensored(self, rev):

2804

"""Check if a file revision is censored."""

2708

"""Check if a file revision is censored."""

2805

if not self._censorable:

2709

if not self._censorable:

2806

return False

2710

return False

2807

2711

2808

return self.flags(rev) & REVIDX_ISCENSORED

2712

return self.flags(rev) & REVIDX_ISCENSORED

2809

2713

2810

def _peek_iscensored(self, baserev, delta):

2714

def _peek_iscensored(self, baserev, delta):

2811

"""Quickly check if a delta produces a censored revision."""

2715

"""Quickly check if a delta produces a censored revision."""

2812

if not self._censorable:

2716

if not self._censorable:

2813

return False

2717

return False

2814

2718

2815

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2719

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2816

2720

2817

def getstrippoint(self, minlink):

2721

def getstrippoint(self, minlink):

2818

"""find the minimum rev that must be stripped to strip the linkrev

2722

"""find the minimum rev that must be stripped to strip the linkrev

2819

2723

2820

Returns a tuple containing the minimum rev and a set of all revs that

2724

Returns a tuple containing the minimum rev and a set of all revs that

2821

have linkrevs that will be broken by this strip.

2725

have linkrevs that will be broken by this strip.

2822

"""

2726

"""

2823

return storageutil.resolvestripinfo(

2727

return storageutil.resolvestripinfo(

2824

minlink,

2728

minlink,

2825

len(self) - 1,

2729

len(self) - 1,

2826

self.headrevs(),

2730

self.headrevs(),

2827

self.linkrev,

2731

self.linkrev,

2828

self.parentrevs,

2732

self.parentrevs,

2829

)

2733

)

2830

2734

2831

def strip(self, minlink, transaction):

2735

def strip(self, minlink, transaction):

2832

"""truncate the revlog on the first revision with a linkrev >= minlink

2736

"""truncate the revlog on the first revision with a linkrev >= minlink

2833

2737

2834

This function is called when we're stripping revision minlink and

2738

This function is called when we're stripping revision minlink and

2835

its descendants from the repository.

2739

its descendants from the repository.

2836

2740

2837

We have to remove all revisions with linkrev >= minlink, because

2741

We have to remove all revisions with linkrev >= minlink, because

2838

the equivalent changelog revisions will be renumbered after the

2742

the equivalent changelog revisions will be renumbered after the

2839

strip.

2743

strip.

2840

2744

2841

So we truncate the revlog on the first of these revisions, and

2745

So we truncate the revlog on the first of these revisions, and

2842

trust that the caller has saved the revisions that shouldn't be

2746

trust that the caller has saved the revisions that shouldn't be

2843

removed and that it'll re-add them after this truncation.

2747

removed and that it'll re-add them after this truncation.

2844

"""

2748

"""

2845

if len(self) == 0:

2749

if len(self) == 0:

2846

return

2750

return

2847

2751

2848

rev, _ = self.getstrippoint(minlink)

2752

rev, _ = self.getstrippoint(minlink)

2849

if rev == len(self):

2753

if rev == len(self):

2850

return

2754

return

2851

2755

2852

# first truncate the files on disk

2756

# first truncate the files on disk

2853

data_end = self.start(rev)

2757

data_end = self.start(rev)

2854

if not self._inline:

2758

if not self._inline:

2855

transaction.add(self._datafile, data_end)

2759

transaction.add(self._datafile, data_end)

2856

end = rev * self.index.entry_size

2760

end = rev * self.index.entry_size

2857

else:

2761

else:

2858

end = data_end + (rev * self.index.entry_size)

2762

end = data_end + (rev * self.index.entry_size)

2859

2763

2860

if self._sidedatafile:

2764

if self._sidedatafile:

2861

sidedata_end = self.sidedata_cut_off(rev)

2765

sidedata_end = self.sidedata_cut_off(rev)

2862

transaction.add(self._sidedatafile, sidedata_end)

2766

transaction.add(self._sidedatafile, sidedata_end)

2863

2767

2864

transaction.add(self._indexfile, end)

2768

transaction.add(self._indexfile, end)

2865

if self._docket is not None:

2769

if self._docket is not None:

2866

# XXX we could, leverage the docket while stripping. However it is

2770

# XXX we could, leverage the docket while stripping. However it is

2867

# not powerfull enough at the time of this comment

2771

# not powerfull enough at the time of this comment

2868

self._docket.index_end = end

2772

self._docket.index_end = end

2869

self._docket.data_end = data_end

2773

self._docket.data_end = data_end

2870

self._docket.sidedata_end = sidedata_end

2774

self._docket.sidedata_end = sidedata_end

2871

self._docket.write(transaction, stripping=True)

2775

self._docket.write(transaction, stripping=True)

2872

2776

2873

# then reset internal state in memory to forget those revisions

2777

# then reset internal state in memory to forget those revisions

2874

self._revisioncache = None

2778

self._revisioncache = None

2875

self._chaininfocache = util.lrucachedict(500)

2779

self._chaininfocache = util.lrucachedict(500)

2876

self._~~chunkclear~~()

2780

self._segmentfile.clear_cache()

2877

2781

2878

del self.index[rev:-1]

2782

del self.index[rev:-1]

2879

2783

2880

def checksize(self):

2784

def checksize(self):

2881

"""Check size of index and data files

2785

"""Check size of index and data files

2882

2786

2883

return a (dd, di) tuple.

2787

return a (dd, di) tuple.

2884

- dd: extra bytes for the "data" file

2788

- dd: extra bytes for the "data" file

2885

- di: extra bytes for the "index" file

2789

- di: extra bytes for the "index" file

2886

2790

2887

A healthy revlog will return (0, 0).

2791

A healthy revlog will return (0, 0).

2888

"""

2792

"""

2889

expected = 0

2793

expected = 0

2890

if len(self):

2794

if len(self):

2891

expected = max(0, self.end(len(self) - 1))

2795

expected = max(0, self.end(len(self) - 1))

2892

2796

2893

try:

2797

try:

2894

with self._datafp() as f:

2798

with self._datafp() as f:

2895

f.seek(0, io.SEEK_END)

2799

f.seek(0, io.SEEK_END)

2896

actual = f.tell()

2800

actual = f.tell()

2897

dd = actual - expected

2801

dd = actual - expected

2898

except IOError as inst:

2802

except IOError as inst:

2899

if inst.errno != errno.ENOENT:

2803

if inst.errno != errno.ENOENT:

2900

raise

2804

raise

2901

dd = 0

2805

dd = 0

2902

2806

2903

try:

2807

try:

2904

f = self.opener(self._indexfile)

2808

f = self.opener(self._indexfile)

2905

f.seek(0, io.SEEK_END)

2809

f.seek(0, io.SEEK_END)

2906

actual = f.tell()

2810

actual = f.tell()

2907

f.close()

2811

f.close()

2908

s = self.index.entry_size

2812

s = self.index.entry_size

2909

i = max(0, actual // s)

2813

i = max(0, actual // s)

2910

di = actual - (i * s)

2814

di = actual - (i * s)

2911

if self._inline:

2815

if self._inline:

2912

databytes = 0

2816

databytes = 0

2913

for r in self:

2817

for r in self:

2914

databytes += max(0, self.length(r))

2818

databytes += max(0, self.length(r))

2915

dd = 0

2819

dd = 0

2916

di = actual - len(self) * s - databytes

2820

di = actual - len(self) * s - databytes

2917

except IOError as inst:

2821

except IOError as inst:

2918

if inst.errno != errno.ENOENT:

2822

if inst.errno != errno.ENOENT:

2919

raise

2823

raise

2920

di = 0

2824

di = 0

2921

2825

2922

return (dd, di)

2826

return (dd, di)

2923

2827

2924

def files(self):

2828

def files(self):

2925

res = [self._indexfile]

2829

res = [self._indexfile]

2926

if not self._inline:

2830

if not self._inline:

2927

res.append(self._datafile)

2831

res.append(self._datafile)

2928

return res

2832

return res

2929

2833

2930

def emitrevisions(

2834

def emitrevisions(

2931

self,

2835

self,

2932

nodes,

2836

nodes,

2933

nodesorder=None,

2837

nodesorder=None,

2934

revisiondata=False,

2838

revisiondata=False,

2935

assumehaveparentrevisions=False,

2839

assumehaveparentrevisions=False,

2936

deltamode=repository.CG_DELTAMODE_STD,

2840

deltamode=repository.CG_DELTAMODE_STD,

2937

sidedata_helpers=None,

2841

sidedata_helpers=None,

2938

):

2842

):

2939

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2843

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2940

raise error.ProgrammingError(

2844

raise error.ProgrammingError(

2941

b'unhandled value for nodesorder: %s' % nodesorder

2845

b'unhandled value for nodesorder: %s' % nodesorder

2942

)

2846

)

2943

2847

2944

if nodesorder is None and not self._generaldelta:

2848

if nodesorder is None and not self._generaldelta:

2945

nodesorder = b'storage'

2849

nodesorder = b'storage'

2946

2850

2947

if (

2851

if (

2948

not self._storedeltachains

2852

not self._storedeltachains

2949

and deltamode != repository.CG_DELTAMODE_PREV

2853

and deltamode != repository.CG_DELTAMODE_PREV

2950

):

2854

):

2951

deltamode = repository.CG_DELTAMODE_FULL

2855

deltamode = repository.CG_DELTAMODE_FULL

2952

2856

2953

return storageutil.emitrevisions(

2857

return storageutil.emitrevisions(

2954

self,

2858

self,

2955

nodes,

2859

nodes,

2956

nodesorder,

2860

nodesorder,

2957

revlogrevisiondelta,

2861

revlogrevisiondelta,

2958

deltaparentfn=self.deltaparent,

2862

deltaparentfn=self.deltaparent,

2959

candeltafn=self.candelta,

2863

candeltafn=self.candelta,

2960

rawsizefn=self.rawsize,

2864

rawsizefn=self.rawsize,

2961

revdifffn=self.revdiff,

2865

revdifffn=self.revdiff,

2962

flagsfn=self.flags,

2866

flagsfn=self.flags,

2963

deltamode=deltamode,

2867

deltamode=deltamode,

2964

revisiondata=revisiondata,

2868

revisiondata=revisiondata,

2965

assumehaveparentrevisions=assumehaveparentrevisions,

2869

assumehaveparentrevisions=assumehaveparentrevisions,

2966

sidedata_helpers=sidedata_helpers,

2870

sidedata_helpers=sidedata_helpers,

2967

)

2871

)

2968

2872

2969

DELTAREUSEALWAYS = b'always'

2873

DELTAREUSEALWAYS = b'always'

2970

DELTAREUSESAMEREVS = b'samerevs'

2874

DELTAREUSESAMEREVS = b'samerevs'

2971

DELTAREUSENEVER = b'never'

2875

DELTAREUSENEVER = b'never'

2972

2876

2973

DELTAREUSEFULLADD = b'fulladd'

2877

DELTAREUSEFULLADD = b'fulladd'

2974

2878

2975

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2879

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2976

2880

2977

def clone(

2881

def clone(

2978

self,

2882

self,

2979

tr,

2883

tr,

2980

destrevlog,

2884

destrevlog,

2981

addrevisioncb=None,

2885

addrevisioncb=None,

2982

deltareuse=DELTAREUSESAMEREVS,

2886

deltareuse=DELTAREUSESAMEREVS,

2983

forcedeltabothparents=None,

2887

forcedeltabothparents=None,

2984

sidedata_helpers=None,

2888

sidedata_helpers=None,

2985

):

2889

):

2986

"""Copy this revlog to another, possibly with format changes.

2890

"""Copy this revlog to another, possibly with format changes.

2987

2891

2988

The destination revlog will contain the same revisions and nodes.

2892

The destination revlog will contain the same revisions and nodes.

2989

However, it may not be bit-for-bit identical due to e.g. delta encoding

2893

However, it may not be bit-for-bit identical due to e.g. delta encoding

2990

differences.

2894

differences.

2991

2895

2992

The ``deltareuse`` argument control how deltas from the existing revlog

2896

The ``deltareuse`` argument control how deltas from the existing revlog

2993

are preserved in the destination revlog. The argument can have the

2897

are preserved in the destination revlog. The argument can have the

2994

following values:

2898

following values:

2995

2899

2996

DELTAREUSEALWAYS

2900

DELTAREUSEALWAYS

2997

Deltas will always be reused (if possible), even if the destination

2901

Deltas will always be reused (if possible), even if the destination

2998

revlog would not select the same revisions for the delta. This is the

2902

revlog would not select the same revisions for the delta. This is the

2999

fastest mode of operation.

2903

fastest mode of operation.

3000

DELTAREUSESAMEREVS

2904

DELTAREUSESAMEREVS

3001

Deltas will be reused if the destination revlog would pick the same

2905

Deltas will be reused if the destination revlog would pick the same

3002

revisions for the delta. This mode strikes a balance between speed

2906

revisions for the delta. This mode strikes a balance between speed

3003

and optimization.

2907

and optimization.

3004

DELTAREUSENEVER

2908

DELTAREUSENEVER

3005

Deltas will never be reused. This is the slowest mode of execution.

2909

Deltas will never be reused. This is the slowest mode of execution.

3006

This mode can be used to recompute deltas (e.g. if the diff/delta

2910

This mode can be used to recompute deltas (e.g. if the diff/delta

3007

algorithm changes).

2911

algorithm changes).

3008

DELTAREUSEFULLADD

2912

DELTAREUSEFULLADD

3009

Revision will be re-added as if their were new content. This is

2913

Revision will be re-added as if their were new content. This is

3010

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

2914

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3011

eg: large file detection and handling.

2915

eg: large file detection and handling.

3012

2916

3013

Delta computation can be slow, so the choice of delta reuse policy can

2917

Delta computation can be slow, so the choice of delta reuse policy can

3014

significantly affect run time.

2918

significantly affect run time.

3015

2919

3016

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2920

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3017

two extremes. Deltas will be reused if they are appropriate. But if the

2921

two extremes. Deltas will be reused if they are appropriate. But if the

3018

delta could choose a better revision, it will do so. This means if you

2922

delta could choose a better revision, it will do so. This means if you

3019

are converting a non-generaldelta revlog to a generaldelta revlog,

2923

are converting a non-generaldelta revlog to a generaldelta revlog,

3020

deltas will be recomputed if the delta's parent isn't a parent of the

2924

deltas will be recomputed if the delta's parent isn't a parent of the

3021

revision.

2925

revision.

3022

2926

3023

In addition to the delta policy, the ``forcedeltabothparents``

2927

In addition to the delta policy, the ``forcedeltabothparents``

3024

argument controls whether to force compute deltas against both parents

2928

argument controls whether to force compute deltas against both parents

3025

for merges. By default, the current default is used.

2929

for merges. By default, the current default is used.

3026

2930

3027

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

2931

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3028

`sidedata_helpers`.

2932

`sidedata_helpers`.

3029

"""

2933

"""

3030

if deltareuse not in self.DELTAREUSEALL:

2934

if deltareuse not in self.DELTAREUSEALL:

3031

raise ValueError(

2935

raise ValueError(

3032

_(b'value for deltareuse invalid: %s') % deltareuse

2936

_(b'value for deltareuse invalid: %s') % deltareuse

3033

)

2937

)

3034

2938

3035

if len(destrevlog):

2939

if len(destrevlog):

3036

raise ValueError(_(b'destination revlog is not empty'))

2940

raise ValueError(_(b'destination revlog is not empty'))

3037

2941

3038

if getattr(self, 'filteredrevs', None):

2942

if getattr(self, 'filteredrevs', None):

3039

raise ValueError(_(b'source revlog has filtered revisions'))

2943

raise ValueError(_(b'source revlog has filtered revisions'))

3040

if getattr(destrevlog, 'filteredrevs', None):

2944

if getattr(destrevlog, 'filteredrevs', None):

3041

raise ValueError(_(b'destination revlog has filtered revisions'))

2945

raise ValueError(_(b'destination revlog has filtered revisions'))

3042

2946

3043

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

2947

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3044

# if possible.

2948

# if possible.

3045

oldlazydelta = destrevlog._lazydelta

2949

oldlazydelta = destrevlog._lazydelta

3046

oldlazydeltabase = destrevlog._lazydeltabase

2950

oldlazydeltabase = destrevlog._lazydeltabase

3047

oldamd = destrevlog._deltabothparents

2951

oldamd = destrevlog._deltabothparents

3048

2952

3049

try:

2953

try:

3050

if deltareuse == self.DELTAREUSEALWAYS:

2954

if deltareuse == self.DELTAREUSEALWAYS:

3051

destrevlog._lazydeltabase = True

2955

destrevlog._lazydeltabase = True

3052

destrevlog._lazydelta = True

2956

destrevlog._lazydelta = True

3053

elif deltareuse == self.DELTAREUSESAMEREVS:

2957

elif deltareuse == self.DELTAREUSESAMEREVS:

3054

destrevlog._lazydeltabase = False

2958

destrevlog._lazydeltabase = False

3055

destrevlog._lazydelta = True

2959

destrevlog._lazydelta = True

3056

elif deltareuse == self.DELTAREUSENEVER:

2960

elif deltareuse == self.DELTAREUSENEVER:

3057

destrevlog._lazydeltabase = False

2961

destrevlog._lazydeltabase = False

3058

destrevlog._lazydelta = False

2962

destrevlog._lazydelta = False

3059

2963

3060

destrevlog._deltabothparents = forcedeltabothparents or oldamd

2964

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3061

2965

3062

self._clone(

2966

self._clone(

3063

tr,

2967

tr,

3064

destrevlog,

2968

destrevlog,

3065

addrevisioncb,

2969

addrevisioncb,

3066

deltareuse,

2970

deltareuse,

3067

forcedeltabothparents,

2971

forcedeltabothparents,

3068

sidedata_helpers,

2972

sidedata_helpers,

3069

)

2973

)

3070

2974

3071

finally:

2975

finally:

3072

destrevlog._lazydelta = oldlazydelta

2976

destrevlog._lazydelta = oldlazydelta

3073

destrevlog._lazydeltabase = oldlazydeltabase

2977

destrevlog._lazydeltabase = oldlazydeltabase

3074

destrevlog._deltabothparents = oldamd

2978

destrevlog._deltabothparents = oldamd

3075

2979

3076

def _clone(

2980

def _clone(

3077

self,

2981

self,

3078

tr,

2982

tr,

3079

destrevlog,

2983

destrevlog,

3080

addrevisioncb,

2984

addrevisioncb,

3081

deltareuse,

2985

deltareuse,

3082

forcedeltabothparents,

2986

forcedeltabothparents,

3083

sidedata_helpers,

2987

sidedata_helpers,

3084

):

2988

):

3085

"""perform the core duty of `revlog.clone` after parameter processing"""

2989

"""perform the core duty of `revlog.clone` after parameter processing"""

3086

deltacomputer = deltautil.deltacomputer(destrevlog)

2990

deltacomputer = deltautil.deltacomputer(destrevlog)

3087

index = self.index

2991

index = self.index

3088

for rev in self:

2992

for rev in self:

3089

entry = index[rev]

2993

entry = index[rev]

3090

2994

3091

# Some classes override linkrev to take filtered revs into

2995

# Some classes override linkrev to take filtered revs into

3092

# account. Use raw entry from index.

2996

# account. Use raw entry from index.

3093

flags = entry[0] & 0xFFFF

2997

flags = entry[0] & 0xFFFF

3094

linkrev = entry[4]

2998

linkrev = entry[4]

3095

p1 = index[entry[5]][7]

2999

p1 = index[entry[5]][7]

3096

p2 = index[entry[6]][7]

3000

p2 = index[entry[6]][7]

3097

node = entry[7]

3001

node = entry[7]

3098

3002

3099

# (Possibly) reuse the delta from the revlog if allowed and

3003

# (Possibly) reuse the delta from the revlog if allowed and

3100

# the revlog chunk is a delta.

3004

# the revlog chunk is a delta.

3101

cachedelta = None

3005

cachedelta = None

3102

rawtext = None

3006

rawtext = None

3103

if deltareuse == self.DELTAREUSEFULLADD:

3007

if deltareuse == self.DELTAREUSEFULLADD:

3104

text = self._revisiondata(rev)

3008

text = self._revisiondata(rev)

3105

sidedata = self.sidedata(rev)

3009

sidedata = self.sidedata(rev)

3106

3010

3107

if sidedata_helpers is not None:

3011

if sidedata_helpers is not None:

3108

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3012

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3109

self, sidedata_helpers, sidedata, rev

3013

self, sidedata_helpers, sidedata, rev

3110

)

3014

)

3111

flags = flags | new_flags[0] & ~new_flags[1]

3015

flags = flags | new_flags[0] & ~new_flags[1]

3112

3016

3113

destrevlog.addrevision(

3017

destrevlog.addrevision(

3114

text,

3018

text,

3115

tr,

3019

tr,

3116

linkrev,

3020

linkrev,

3117

p1,

3021

p1,

3118

p2,

3022

p2,

3119

cachedelta=cachedelta,

3023

cachedelta=cachedelta,

3120

node=node,

3024

node=node,

3121

flags=flags,

3025

flags=flags,

3122

deltacomputer=deltacomputer,

3026

deltacomputer=deltacomputer,

3123

sidedata=sidedata,

3027

sidedata=sidedata,

3124

)

3028

)

3125

else:

3029

else:

3126

if destrevlog._lazydelta:

3030

if destrevlog._lazydelta:

3127

dp = self.deltaparent(rev)

3031

dp = self.deltaparent(rev)

3128

if dp != nullrev:

3032

if dp != nullrev:

3129

cachedelta = (dp, bytes(self._chunk(rev)))

3033

cachedelta = (dp, bytes(self._chunk(rev)))

3130

3034

3131

sidedata = None

3035

sidedata = None

3132

if not cachedelta:

3036

if not cachedelta:

3133

rawtext = self._revisiondata(rev)

3037

rawtext = self._revisiondata(rev)

3134

sidedata = self.sidedata(rev)

3038

sidedata = self.sidedata(rev)

3135

if sidedata is None:

3039

if sidedata is None:

3136

sidedata = self.sidedata(rev)

3040

sidedata = self.sidedata(rev)

3137

3041

3138

if sidedata_helpers is not None:

3042

if sidedata_helpers is not None:

3139

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3043

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3140

self, sidedata_helpers, sidedata, rev

3044

self, sidedata_helpers, sidedata, rev

3141

)

3045

)

3142

flags = flags | new_flags[0] & ~new_flags[1]

3046

flags = flags | new_flags[0] & ~new_flags[1]

3143

3047

3144

with destrevlog._writing(tr):

3048

with destrevlog._writing(tr):

3145

destrevlog._addrevision(

3049

destrevlog._addrevision(

3146

node,

3050

node,

3147

rawtext,

3051

rawtext,

3148

tr,

3052

tr,

3149

linkrev,

3053

linkrev,

3150

p1,

3054

p1,

3151

p2,

3055

p2,

3152

flags,

3056

flags,

3153

cachedelta,

3057

cachedelta,

3154

deltacomputer=deltacomputer,

3058

deltacomputer=deltacomputer,

3155

sidedata=sidedata,

3059

sidedata=sidedata,

3156

)

3060

)

3157

3061

3158

if addrevisioncb:

3062

if addrevisioncb:

3159

addrevisioncb(self, rev, node)

3063

addrevisioncb(self, rev, node)

3160

3064

3161

def censorrevision(self, tr, censornode, tombstone=b''):

3065

def censorrevision(self, tr, censornode, tombstone=b''):

3162

if self._format_version == REVLOGV0:

3066

if self._format_version == REVLOGV0:

3163

raise error.RevlogError(

3067

raise error.RevlogError(

3164

_(b'cannot censor with version %d revlogs')

3068

_(b'cannot censor with version %d revlogs')

3165

% self._format_version

3069

% self._format_version

3166

)

3070

)

3167

elif self._format_version == REVLOGV1:

3071

elif self._format_version == REVLOGV1:

3168

censor.v1_censor(self, tr, censornode, tombstone)

3072

censor.v1_censor(self, tr, censornode, tombstone)

3169

else:

3073

else:

3170

# revlog v2

3074

# revlog v2

3171

raise error.RevlogError(

3075

raise error.RevlogError(

3172

_(b'cannot censor with version %d revlogs')

3076

_(b'cannot censor with version %d revlogs')

3173

% self._format_version

3077

% self._format_version

3174

)

3078

)

3175

3079

3176

def verifyintegrity(self, state):

3080

def verifyintegrity(self, state):

3177

"""Verifies the integrity of the revlog.

3081

"""Verifies the integrity of the revlog.

3178

3082

3179

Yields ``revlogproblem`` instances describing problems that are

3083

Yields ``revlogproblem`` instances describing problems that are

3180

found.

3084

found.

3181

"""

3085

"""

3182

dd, di = self.checksize()

3086

dd, di = self.checksize()

3183

if dd:

3087

if dd:

3184

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3088

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3185

if di:

3089

if di:

3186

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3090

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3187

3091

3188

version = self._format_version

3092

version = self._format_version

3189

3093

3190

# The verifier tells us what version revlog we should be.

3094

# The verifier tells us what version revlog we should be.

3191

if version != state[b'expectedversion']:

3095

if version != state[b'expectedversion']:

3192

yield revlogproblem(

3096

yield revlogproblem(

3193

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3097

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3194

% (self.display_id, version, state[b'expectedversion'])

3098

% (self.display_id, version, state[b'expectedversion'])

3195

)

3099

)

3196

3100

3197

state[b'skipread'] = set()

3101

state[b'skipread'] = set()

3198

state[b'safe_renamed'] = set()

3102

state[b'safe_renamed'] = set()

3199

3103

3200

for rev in self:

3104

for rev in self:

3201

node = self.node(rev)

3105

node = self.node(rev)

3202

3106

3203

# Verify contents. 4 cases to care about:

3107

# Verify contents. 4 cases to care about:

3204

#

3108

#

3205

# common: the most common case

3109

# common: the most common case

3206

# rename: with a rename

3110

# rename: with a rename

3207

# meta: file content starts with b'\1\n', the metadata

3111

# meta: file content starts with b'\1\n', the metadata

3208

# header defined in filelog.py, but without a rename

3112

# header defined in filelog.py, but without a rename

3209

# ext: content stored externally

3113

# ext: content stored externally

3210

#

3114

#

3211

# More formally, their differences are shown below:

3115

# More formally, their differences are shown below:

3212

#

3116

#

3213

# | common | rename | meta | ext

3117

# | common | rename | meta | ext

3214

# -------------------------------------------------------

3118

# -------------------------------------------------------

3215

# flags() | 0 | 0 | 0 | not 0

3119

# flags() | 0 | 0 | 0 | not 0

3216

# renamed() | False | True | False | ?

3120

# renamed() | False | True | False | ?

3217

# rawtext[0:2]=='\1\n'| False | True | True | ?

3121

# rawtext[0:2]=='\1\n'| False | True | True | ?

3218

#

3122

#

3219

# "rawtext" means the raw text stored in revlog data, which

3123

# "rawtext" means the raw text stored in revlog data, which

3220

# could be retrieved by "rawdata(rev)". "text"

3124

# could be retrieved by "rawdata(rev)". "text"

3221

# mentioned below is "revision(rev)".

3125

# mentioned below is "revision(rev)".

3222

#

3126

#

3223

# There are 3 different lengths stored physically:

3127

# There are 3 different lengths stored physically:

3224

# 1. L1: rawsize, stored in revlog index

3128

# 1. L1: rawsize, stored in revlog index

3225

# 2. L2: len(rawtext), stored in revlog data

3129

# 2. L2: len(rawtext), stored in revlog data

3226

# 3. L3: len(text), stored in revlog data if flags==0, or

3130

# 3. L3: len(text), stored in revlog data if flags==0, or

3227

# possibly somewhere else if flags!=0

3131

# possibly somewhere else if flags!=0

3228

#

3132

#

3229

# L1 should be equal to L2. L3 could be different from them.

3133

# L1 should be equal to L2. L3 could be different from them.

3230

# "text" may or may not affect commit hash depending on flag

3134

# "text" may or may not affect commit hash depending on flag

3231

# processors (see flagutil.addflagprocessor).

3135

# processors (see flagutil.addflagprocessor).

3232

#

3136

#

3233

# | common | rename | meta | ext

3137

# | common | rename | meta | ext

3234

# -------------------------------------------------

3138

# -------------------------------------------------

3235

# rawsize() | L1 | L1 | L1 | L1

3139

# rawsize() | L1 | L1 | L1 | L1

3236

# size() | L1 | L2-LM | L1(*) | L1 (?)

3140

# size() | L1 | L2-LM | L1(*) | L1 (?)

3237

# len(rawtext) | L2 | L2 | L2 | L2

3141

# len(rawtext) | L2 | L2 | L2 | L2

3238

# len(text) | L2 | L2 | L2 | L3

3142

# len(text) | L2 | L2 | L2 | L3

3239

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3143

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3240

#

3144

#

3241

# LM: length of metadata, depending on rawtext

3145

# LM: length of metadata, depending on rawtext

3242

# (*): not ideal, see comment in filelog.size

3146

# (*): not ideal, see comment in filelog.size

3243

# (?): could be "- len(meta)" if the resolved content has

3147

# (?): could be "- len(meta)" if the resolved content has

3244

# rename metadata

3148

# rename metadata

3245

#

3149

#

3246

# Checks needed to be done:

3150

# Checks needed to be done:

3247

# 1. length check: L1 == L2, in all cases.

3151

# 1. length check: L1 == L2, in all cases.

3248

# 2. hash check: depending on flag processor, we may need to

3152

# 2. hash check: depending on flag processor, we may need to

3249

# use either "text" (external), or "rawtext" (in revlog).

3153

# use either "text" (external), or "rawtext" (in revlog).

3250

3154

3251

try:

3155

try:

3252

skipflags = state.get(b'skipflags', 0)

3156

skipflags = state.get(b'skipflags', 0)

3253

if skipflags:

3157

if skipflags:

3254

skipflags &= self.flags(rev)

3158

skipflags &= self.flags(rev)

3255

3159

3256

_verify_revision(self, skipflags, state, node)

3160

_verify_revision(self, skipflags, state, node)

3257

3161

3258

l1 = self.rawsize(rev)

3162

l1 = self.rawsize(rev)

3259

l2 = len(self.rawdata(node))

3163

l2 = len(self.rawdata(node))

3260

3164

3261

if l1 != l2:

3165

if l1 != l2:

3262

yield revlogproblem(

3166

yield revlogproblem(

3263

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3167

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3264

node=node,

3168

node=node,

3265

)

3169

)

3266

3170

3267

except error.CensoredNodeError:

3171

except error.CensoredNodeError:

3268

if state[b'erroroncensored']:

3172

if state[b'erroroncensored']:

3269

yield revlogproblem(

3173

yield revlogproblem(

3270

error=_(b'censored file data'), node=node

3174

error=_(b'censored file data'), node=node

3271

)

3175

)

3272

state[b'skipread'].add(node)

3176

state[b'skipread'].add(node)

3273

except Exception as e:

3177

except Exception as e:

3274

yield revlogproblem(

3178

yield revlogproblem(

3275

error=_(b'unpacking %s: %s')

3179

error=_(b'unpacking %s: %s')

3276

% (short(node), stringutil.forcebytestr(e)),

3180

% (short(node), stringutil.forcebytestr(e)),

3277

node=node,

3181

node=node,

3278

)

3182

)

3279

state[b'skipread'].add(node)

3183

state[b'skipread'].add(node)

3280

3184

3281

def storageinfo(

3185

def storageinfo(

3282

self,

3186

self,

3283

exclusivefiles=False,

3187

exclusivefiles=False,

3284

sharedfiles=False,

3188

sharedfiles=False,

3285

revisionscount=False,

3189

revisionscount=False,

3286

trackedsize=False,

3190

trackedsize=False,

3287

storedsize=False,

3191

storedsize=False,

3288

):

3192

):

3289

d = {}

3193

d = {}

3290

3194

3291

if exclusivefiles:

3195

if exclusivefiles:

3292

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3196

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3293

if not self._inline:

3197

if not self._inline:

3294

d[b'exclusivefiles'].append((self.opener, self._datafile))

3198

d[b'exclusivefiles'].append((self.opener, self._datafile))

3295

3199

3296

if sharedfiles:

3200

if sharedfiles:

3297

d[b'sharedfiles'] = []

3201

d[b'sharedfiles'] = []

3298

3202

3299

if revisionscount:

3203

if revisionscount:

3300

d[b'revisionscount'] = len(self)

3204

d[b'revisionscount'] = len(self)

3301

3205

3302

if trackedsize:

3206

if trackedsize:

3303

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3207

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3304

3208

3305

if storedsize:

3209

if storedsize:

3306

d[b'storedsize'] = sum(

3210

d[b'storedsize'] = sum(

3307

self.opener.stat(path).st_size for path in self.files()

3211

self.opener.stat(path).st_size for path in self.files()

3308

)

3212

)

3309

3213

3310

return d

3214

return d

3311

3215

3312

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3216

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3313

if not self.hassidedata:

3217

if not self.hassidedata:

3314

return

3218

return

3315

# revlog formats with sidedata support does not support inline

3219

# revlog formats with sidedata support does not support inline

3316

assert not self._inline

3220

assert not self._inline

3317

if not helpers[1] and not helpers[2]:

3221

if not helpers[1] and not helpers[2]:

3318

# Nothing to generate or remove

3222

# Nothing to generate or remove

3319

return

3223

return

3320

3224

3321

new_entries = []

3225

new_entries = []

3322

# append the new sidedata

3226

# append the new sidedata

3323

with self._writing(transaction):

3227

with self._writing(transaction):

3324

ifh, dfh, sdfh = self._writinghandles

3228

ifh, dfh, sdfh = self._writinghandles

3325

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3229

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3326

3230

3327

current_offset = sdfh.tell()

3231

current_offset = sdfh.tell()

3328

for rev in range(startrev, endrev + 1):

3232

for rev in range(startrev, endrev + 1):

3329

entry = self.index[rev]

3233

entry = self.index[rev]

3330

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3234

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3331

store=self,

3235

store=self,

3332

sidedata_helpers=helpers,

3236

sidedata_helpers=helpers,

3333

sidedata={},

3237

sidedata={},

3334

rev=rev,

3238

rev=rev,

3335

)

3239

)

3336

3240

3337

serialized_sidedata = sidedatautil.serialize_sidedata(

3241

serialized_sidedata = sidedatautil.serialize_sidedata(

3338

new_sidedata

3242

new_sidedata

3339

)

3243

)

3340

3244

3341

sidedata_compression_mode = COMP_MODE_INLINE

3245

sidedata_compression_mode = COMP_MODE_INLINE

3342

if serialized_sidedata and self.hassidedata:

3246

if serialized_sidedata and self.hassidedata:

3343

sidedata_compression_mode = COMP_MODE_PLAIN

3247

sidedata_compression_mode = COMP_MODE_PLAIN

3344

h, comp_sidedata = self.compress(serialized_sidedata)

3248

h, comp_sidedata = self.compress(serialized_sidedata)

3345

if (

3249

if (

3346

h != b'u'

3250

h != b'u'

3347

and comp_sidedata[0] != b'\0'

3251

and comp_sidedata[0] != b'\0'

3348

and len(comp_sidedata) < len(serialized_sidedata)

3252

and len(comp_sidedata) < len(serialized_sidedata)

3349

):

3253

):

3350

assert not h

3254

assert not h

3351

if (

3255

if (

3352

comp_sidedata[0]

3256

comp_sidedata[0]

3353

== self._docket.default_compression_header

3257

== self._docket.default_compression_header

3354

):

3258

):

3355

sidedata_compression_mode = COMP_MODE_DEFAULT

3259

sidedata_compression_mode = COMP_MODE_DEFAULT

3356

serialized_sidedata = comp_sidedata

3260

serialized_sidedata = comp_sidedata

3357

else:

3261

else:

3358

sidedata_compression_mode = COMP_MODE_INLINE

3262

sidedata_compression_mode = COMP_MODE_INLINE

3359

serialized_sidedata = comp_sidedata

3263

serialized_sidedata = comp_sidedata

3360

if entry[8] != 0 or entry[9] != 0:

3264

if entry[8] != 0 or entry[9] != 0:

3361

# rewriting entries that already have sidedata is not

3265

# rewriting entries that already have sidedata is not

3362

# supported yet, because it introduces garbage data in the

3266

# supported yet, because it introduces garbage data in the

3363

# revlog.

3267

# revlog.

3364

msg = b"rewriting existing sidedata is not supported yet"

3268

msg = b"rewriting existing sidedata is not supported yet"

3365

raise error.Abort(msg)

3269

raise error.Abort(msg)

3366

3270

3367

# Apply (potential) flags to add and to remove after running

3271

# Apply (potential) flags to add and to remove after running

3368

# the sidedata helpers

3272

# the sidedata helpers

3369

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3273

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3370

entry_update = (

3274

entry_update = (

3371

current_offset,

3275

current_offset,

3372

len(serialized_sidedata),

3276

len(serialized_sidedata),

3373

new_offset_flags,

3277

new_offset_flags,

3374

sidedata_compression_mode,

3278

sidedata_compression_mode,

3375

)

3279

)

3376

3280

3377

# the sidedata computation might have move the file cursors around

3281

# the sidedata computation might have move the file cursors around

3378

sdfh.seek(current_offset, os.SEEK_SET)

3282

sdfh.seek(current_offset, os.SEEK_SET)

3379

sdfh.write(serialized_sidedata)

3283

sdfh.write(serialized_sidedata)

3380

new_entries.append(entry_update)

3284

new_entries.append(entry_update)

3381

current_offset += len(serialized_sidedata)

3285

current_offset += len(serialized_sidedata)

3382

self._docket.sidedata_end = sdfh.tell()

3286

self._docket.sidedata_end = sdfh.tell()

3383

3287

3384

# rewrite the new index entries

3288

# rewrite the new index entries

3385

ifh.seek(startrev * self.index.entry_size)

3289

ifh.seek(startrev * self.index.entry_size)

3386

for i, e in enumerate(new_entries):

3290

for i, e in enumerate(new_entries):

3387

rev = startrev + i

3291

rev = startrev + i

3388

self.index.replace_sidedata_info(rev, *e)

3292

self.index.replace_sidedata_info(rev, *e)

3389

packed = self.index.entry_binary(rev)

3293

packed = self.index.entry_binary(rev)

3390

if rev == 0 and self._docket is None:

3294

if rev == 0 and self._docket is None:

3391

header = self._format_flags | self._format_version

3295

header = self._format_flags | self._format_version

3392

header = self.index.pack_header(header)

3296

header = self.index.pack_header(header)

3393

packed = header + packed

3297

packed = header + packed

3394

ifh.write(packed)

3298

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -0,0 +1,138 b''
	1	# Copyright Mercurial Contributors
	2	#
	3	# This software may be used and distributed according to the terms of the
	4	# GNU General Public License version 2 or any later version.
	5
	6	import contextlib
	7
	8	from ..i18n import _
	9	from .. import (
	10	error,
	11	util,
	12	)
	13
	14
	15	_MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
	16
	17	PARTIAL_READ_MSG = _(
	18	b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
	19	)
	20
	21
	22	def _is_power_of_two(n):
	23	return (n & (n - 1) == 0) and n != 0
	24
	25
	26	class randomaccessfile(object):
	27	"""Accessing arbitrary chuncks of data within a file, with some caching"""
	28
	29	def __init__(
	30	self,
	31	opener,
	32	filename,
	33	default_cached_chunk_size,
	34	initial_cache=None,
	35	):
	36	# Required by bitwise manipulation below
	37	assert _is_power_of_two(default_cached_chunk_size)
	38
	39	self.opener = opener
	40	self.filename = filename
	41	self.default_cached_chunk_size = default_cached_chunk_size
	42	self.writing_handle = None # This is set from revlog.py
	43	self._cached_chunk = b''
	44	self._cached_chunk_position = 0 # Offset from the start of the file
	45	if initial_cache:
	46	self._cached_chunk_position, self._cached_chunk = initial_cache
	47
	48	def clear_cache(self):
	49	self._cached_chunk = b''
	50	self._cached_chunk_position = 0
	51
	52	def _open(self, mode=b'r'):
	53	"""Return a file object"""
	54	return self.opener(self.filename, mode=mode)
	55
	56	@contextlib.contextmanager
	57	def _open_read(self, existing_file_obj=None):
	58	"""File object suitable for reading data"""
	59	# Use explicit file handle, if given.
	60	if existing_file_obj is not None:
	61	yield existing_file_obj
	62
	63	# Use a file handle being actively used for writes, if available.
	64	# There is some danger to doing this because reads will seek the
	65	# file. However, revlog._writeentry performs a SEEK_END before all
	66	# writes, so we should be safe.
	67	elif self.writing_handle:
	68	yield self.writing_handle
	69
	70	# Otherwise open a new file handle.
	71	else:
	72	with self._open() as fp:
	73	yield fp
	74
	75	def read_chunk(self, offset, length, existing_file_obj=None):
	76	"""Read a chunk of bytes from the file.
	77
	78	Accepts an absolute offset, length to read, and an optional existing
	79	file handle to read from.
	80
	81	If an existing file handle is passed, it will be seeked and the
	82	original seek position will NOT be restored.
	83
	84	Returns a str or buffer of raw byte data.
	85
	86	Raises if the requested number of bytes could not be read.
	87	"""
	88	end = offset + length
	89	cache_start = self._cached_chunk_position
	90	cache_end = cache_start + len(self._cached_chunk)
	91	# Is the requested chunk within the cache?
	92	if cache_start <= offset and end <= cache_end:
	93	if cache_start == offset and end == cache_end:
	94	return self._cached_chunk # avoid a copy
	95	relative_start = offset - cache_start
	96	return util.buffer(self._cached_chunk, relative_start, length)
	97
	98	return self._read_and_update_cache(offset, length, existing_file_obj)
	99
	100	def _read_and_update_cache(self, offset, length, existing_file_obj=None):
	101	# Cache data both forward and backward around the requested
	102	# data, in a fixed size window. This helps speed up operations
	103	# involving reading the revlog backwards.
	104	real_offset = offset & ~(self.default_cached_chunk_size - 1)
	105	real_length = (
	106	(offset + length + self.default_cached_chunk_size)
	107	& ~(self.default_cached_chunk_size - 1)
	108	) - real_offset
	109	with self._open_read(existing_file_obj) as file_obj:
	110	file_obj.seek(real_offset)
	111	data = file_obj.read(real_length)
	112
	113	self._add_cached_chunk(real_offset, data)
	114
	115	relative_offset = offset - real_offset
	116	got = len(data) - relative_offset
	117	if got < length:
	118	message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
	119	raise error.RevlogError(message)
	120
	121	if offset != real_offset or real_length != length:
	122	return util.buffer(data, relative_offset, length)
	123	return data
	124
	125	def _add_cached_chunk(self, offset, data):
	126	"""Add to or replace the cached data chunk.
	127
	128	Accepts an absolute offset and the data that is at that location.
	129	"""
	130	if (
	131	self._cached_chunk_position + len(self._cached_chunk) == offset
	132	and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
	133	):
	134	# add to existing cache
	135	self._cached_chunk += data
	136	else:
	137	self._cached_chunk = data
	138	self._cached_chunk_position = offset

             # changelog.py - changelog class for mercurial
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             from .i18n import _
             from .node import (
                 bin,
                 hex,
             )
             from .thirdparty import attr
             from . import (
                 encoding,
                 error,
                 metadata,
                 pycompat,
                 revlog,
             )
             from .utils import (
                 dateutil,
                 stringutil,
             )
             from .revlogutils import (
                 constants as revlog_constants,
                 flagutil,
             )
             _defaultextra = {b'branch': b'default'}
             def _string_escape(text):
                 """
                 >>> from .pycompat import bytechr as chr
                 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
                 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
                 >>> s
                 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
                 >>> res = _string_escape(s)
                 >>> s == _string_unescape(res)
                 True
                 """
                 # subset of the string_escape codec
                 text = (
                     text.replace(b'\\', b'\\\\')
                     .replace(b'\n', b'\\n')
                     .replace(b'\r', b'\\r')
                 )
                 return text.replace(b'\0', b'\\0')
             def _string_unescape(text):
                 if b'\\0' in text:
                     # fix up \0 without getting into trouble with \\0
                     text = text.replace(b'\\\\', b'\\\\\n')
                     text = text.replace(b'\\0', b'\0')
                     text = text.replace(b'\n', b'')
                 return stringutil.unescapestr(text)
             def decodeextra(text):
                 """
                 >>> from .pycompat import bytechr as chr
                 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
                 ...                    ).items())
                 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
                 ...                                 b'baz': chr(92) + chr(0) + b'2'})
                 ...                    ).items())
                 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 """
                 extra = _defaultextra.copy()
                 for l in text.split(b'\0'):
                     if l:
                         k, v = _string_unescape(l).split(b':', 1)
                         extra[k] = v
                 return extra
             def encodeextra(d):
                 # keys must be sorted to produce a deterministic changelog entry
                 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
                 return b"\0".join(items)
             def stripdesc(desc):
                 """strip trailing whitespace and leading and trailing empty lines"""
                 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
             class appender(object):
                 """the changelog index must be updated last on disk, so we use this class
                 to delay writes to it"""
                 def __init__(self, vfs, name, mode, buf):
                     self.data = buf
                     fp = vfs(name, mode)
                     self.fp = fp
                     self.offset = fp.tell()
                     self.size = vfs.fstat(fp).st_size
                     self._end = self.size
                 def end(self):
                     return self._end
                 def tell(self):
                     return self.offset
                 def flush(self):
                     pass
                 @property
                 def closed(self):
                     return self.fp.closed
                 def close(self):
                     self.fp.close()
                 def seek(self, offset, whence=0):
                     '''virtual file offset spans real file and data'''
                     if whence == 0:
                         self.offset = offset
                     elif whence == 1:
                         self.offset += offset
                     elif whence == 2:
                         self.offset = self.end() + offset
                     if self.offset < self.size:
                         self.fp.seek(self.offset)
                 def read(self, count=-1):
                     '''only trick here is reads that span real file and data'''
                     ret = b""
                     if self.offset < self.size:
                         s = self.fp.read(count)
                         ret = s
                         self.offset += len(s)
                         if count > 0:
                             count -= len(s)
                     if count != 0:
                         doff = self.offset - self.size
                         self.data.insert(0, b"".join(self.data))
                         del self.data[1:]
                         s = self.data[0][doff : doff + count]
                         self.offset += len(s)
                         ret += s
                     return ret
                 def write(self, s):
                     self.data.append(bytes(s))
                     self.offset += len(s)
                     self._end += len(s)
                 def __enter__(self):
                     self.fp.__enter__()
                     return self
                 def __exit__(self, *args):
                     return self.fp.__exit__(*args)
             class _divertopener(object):
                 def __init__(self, opener, target):
                     self._opener = opener
                     self._target = target
                 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
                     if name != self._target:
                         return self._opener(name, mode, **kwargs)
                     return self._opener(name + b".a", mode, **kwargs)
                 def __getattr__(self, attr):
                     return getattr(self._opener, attr)
             def _delayopener(opener, target, buf):
                 """build an opener that stores chunks in 'buf' instead of 'target'"""
                 def _delay(name, mode=b'r', checkambig=False, **kwargs):
                     if name != target:
                         return opener(name, mode, **kwargs)
                     assert not kwargs
                     return appender(opener, name, mode, buf)
                 return _delay
             @attr.s
             class _changelogrevision(object):
                 # Extensions might modify _defaultextra, so let the constructor below pass
                 # it in
                 extra = attr.ib()
                 manifest = attr.ib()
                 user = attr.ib(default=b'')
                 date = attr.ib(default=(0, 0))
                 files = attr.ib(default=attr.Factory(list))
                 filesadded = attr.ib(default=None)
                 filesremoved = attr.ib(default=None)
                 p1copies = attr.ib(default=None)
                 p2copies = attr.ib(default=None)
                 description = attr.ib(default=b'')
                 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
             class changelogrevision(object):
                 """Holds results of a parsed changelog revision.
                 Changelog revisions consist of multiple pieces of data, including
                 the manifest node, user, and date. This object exposes a view into
                 the parsed object.
                 """
                 __slots__ = (
                     '_offsets',
                     '_text',
                     '_sidedata',
                     '_cpsd',
                     '_changes',
                 )
                 def __new__(cls, cl, text, sidedata, cpsd):
                     if not text:
                         return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
                     self = super(changelogrevision, cls).__new__(cls)
                     # We could return here and implement the following as an __init__.
                     # But doing it here is equivalent and saves an extra function call.
                     # format used:
                     # nodeid\n        : manifest node in ascii
                     # user\n          : user, no \n or \r allowed
                     # time tz extra\n : date (time is int or float, timezone is int)
                     #                 : extra is metadata, encoded and separated by '\0'
                     #                 : older versions ignore it
                     # files\n\n       : files modified by the cset, no \n or \r allowed
                     # (.*)            : comment (free text, ideally utf-8)
                     #
                     # changelog v0 doesn't use extra
                     nl1 = text.index(b'\n')
                     nl2 = text.index(b'\n', nl1 + 1)
                     nl3 = text.index(b'\n', nl2 + 1)
                     # The list of files may be empty. Which means nl3 is the first of the
                     # double newline that precedes the description.
                     if text[nl3 + 1 : nl3 + 2] == b'\n':
                         doublenl = nl3
                     else:
                         doublenl = text.index(b'\n\n', nl3 + 1)
                     self._offsets = (nl1, nl2, nl3, doublenl)
                     self._text = text
                     self._sidedata = sidedata
                     self._cpsd = cpsd
                     self._changes = None
                     return self
                 @property
                 def manifest(self):
                     return bin(self._text[0 : self._offsets[0]])
                 @property
                 def user(self):
                     off = self._offsets
                     return encoding.tolocal(self._text[off[0] + 1 : off[1]])
                 @property
                 def _rawdate(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1 : off[2]]
                     return dateextra.split(b' ', 2)[0:2]
                 @property
                 def _rawextra(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1 : off[2]]
                     fields = dateextra.split(b' ', 2)
                     if len(fields) != 3:
                         return None
                     return fields[2]
                 @property
                 def date(self):
                     raw = self._rawdate
                     time = float(raw[0])
                     # Various tools did silly things with the timezone.
                     try:
                         timezone = int(raw[1])
                     except ValueError:
                         timezone = 0
                     return time, timezone
                 @property
                 def extra(self):
                     raw = self._rawextra
                     if raw is None:
                         return _defaultextra
                     return decodeextra(raw)
                 @property
                 def changes(self):
                     if self._changes is not None:
                         return self._changes
                     if self._cpsd:
                         changes = metadata.decode_files_sidedata(self._sidedata)
                     else:
                         changes = metadata.ChangingFiles(
                             touched=self.files or (),
                             added=self.filesadded or (),
                             removed=self.filesremoved or (),
                             p1_copies=self.p1copies or {},
                             p2_copies=self.p2copies or {},
                         )
                     self._changes = changes
                     return changes
                 @property
                 def files(self):
                     if self._cpsd:
                         return sorted(self.changes.touched)
                     off = self._offsets
                     if off[2] == off[3]:
                         return []
                     return self._text[off[2] + 1 : off[3]].split(b'\n')
                 @property
                 def filesadded(self):
                     if self._cpsd:
                         return self.changes.added
                     else:
                         rawindices = self.extra.get(b'filesadded')
                     if rawindices is None:
                         return None
                     return metadata.decodefileindices(self.files, rawindices)
                 @property
                 def filesremoved(self):
                     if self._cpsd:
                         return self.changes.removed
                     else:
                         rawindices = self.extra.get(b'filesremoved')
                     if rawindices is None:
                         return None
                     return metadata.decodefileindices(self.files, rawindices)
                 @property
                 def p1copies(self):
                     if self._cpsd:
                         return self.changes.copied_from_p1
                     else:
                         rawcopies = self.extra.get(b'p1copies')
                     if rawcopies is None:
                         return None
                     return metadata.decodecopies(self.files, rawcopies)
                 @property
                 def p2copies(self):
                     if self._cpsd:
                         return self.changes.copied_from_p2
                     else:
                         rawcopies = self.extra.get(b'p2copies')
                     if rawcopies is None:
                         return None
                     return metadata.decodecopies(self.files, rawcopies)
                 @property
                 def description(self):
                     return encoding.tolocal(self._text[self._offsets[3] + 2 :])
                 @property
                 def branchinfo(self):
                     extra = self.extra
                     return encoding.tolocal(extra.get(b"branch")), b'close' in extra
             class changelog(revlog.revlog):
                 def __init__(self, opener, trypending=False, concurrencychecker=None):
                     """Load a changelog revlog using an opener.
                     If ``trypending`` is true, we attempt to load the index from a
                     ``00changelog.i.a`` file instead of the default ``00changelog.i``.
                     The ``00changelog.i.a`` file contains index (and possibly inline
                     revision) data for a transaction that hasn't been finalized yet.
                     It exists in a separate file to facilitate readers (such as
                     hooks processes) accessing data before a transaction is finalized.
                     ``concurrencychecker`` will be passed to the revlog init function, see
                     the documentation there.
                     """
                     revlog.revlog.__init__(
                         self,
                         opener,
                         target=(revlog_constants.KIND_CHANGELOG, None),
                         radix=b'00changelog',
                         checkambig=True,
                         mmaplargeindex=True,
                         persistentnodemap=opener.options.get(b'persistent-nodemap', False),
                         concurrencychecker=concurrencychecker,
                         trypending=trypending,
                     )
                     if self._initempty and (self._format_version == revlog.REVLOGV1):
                         # changelogs don't benefit from generaldelta.
                         self._format_flags &= ~revlog.FLAG_GENERALDELTA
                         self._generaldelta = False
                     # Delta chains for changelogs tend to be very small because entries
                     # tend to be small and don't delta well with each. So disable delta
                     # chains.
                     self._storedeltachains = False
                     self._realopener = opener
                     self._delayed = False
                     self._delaybuf = None
                     self._divert = False
                     self._filteredrevs = frozenset()
                     self._filteredrevs_hashcache = {}
                     self._copiesstorage = opener.options.get(b'copies-storage')
                 @property
                 def filteredrevs(self):
                     return self._filteredrevs
                 @filteredrevs.setter
                 def filteredrevs(self, val):
                     # Ensure all updates go through this function
                     assert isinstance(val, frozenset)
                     self._filteredrevs = val
                     self._filteredrevs_hashcache = {}
                 def _write_docket(self, tr):
                     if not self._delayed:
                         super(changelog, self)._write_docket(tr)
                 def delayupdate(self, tr):
                     """delay visibility of index updates to other readers"""
                     if self._docket is None and not self._delayed:
                         if len(self) == 0:
                             self._divert = True
                             if self._realopener.exists(self._indexfile + b'.a'):
                                 self._realopener.unlink(self._indexfile + b'.a')
                             self.opener = _divertopener(self._realopener, self._indexfile)
                         else:
                             self._delaybuf = []
                             self.opener = _delayopener(
                                 self._realopener, self._indexfile, self._delaybuf
                             )
+                        self._segmentfile.opener = self.opener
                     self._delayed = True
                     tr.addpending(b'cl-%i' % id(self), self._writepending)
                     tr.addfinalize(b'cl-%i' % id(self), self._finalize)
                 def _finalize(self, tr):
                     """finalize index updates"""
                     self._delayed = False
                     self.opener = self._realopener
+                    self._segmentfile.opener = self.opener
                     # move redirected index data back into place
                     if self._docket is not None:
                         self._write_docket(tr)
                     elif self._divert:
                         assert not self._delaybuf
                         tmpname = self._indexfile + b".a"
                         nfile = self.opener.open(tmpname)
                         nfile.close()
                         self.opener.rename(tmpname, self._indexfile, checkambig=True)
                     elif self._delaybuf:
                         fp = self.opener(self._indexfile, b'a', checkambig=True)
                         fp.write(b"".join(self._delaybuf))
                         fp.close()
                         self._delaybuf = None
                     self._divert = False
                     # split when we're done
                     self._enforceinlinesize(tr)
                 def _writepending(self, tr):
                     """create a file containing the unfinalized state for
                     pretxnchangegroup"""
                     if self._docket:
                         return self._docket.write(tr, pending=True)
                     if self._delaybuf:
                         # make a temporary copy of the index
                         fp1 = self._realopener(self._indexfile)
                         pendingfilename = self._indexfile + b".a"
                         # register as a temp file to ensure cleanup on failure
                         tr.registertmp(pendingfilename)
                         # write existing data
                         fp2 = self._realopener(pendingfilename, b"w")
                         fp2.write(fp1.read())
                         # add pending data
                         fp2.write(b"".join(self._delaybuf))
                         fp2.close()
                         # switch modes so finalize can simply rename
                         self._delaybuf = None
                         self._divert = True
                         self.opener = _divertopener(self._realopener, self._indexfile)
+                        self._segmentfile.opener = self.opener
                     if self._divert:
                         return True
                     return False
                 def _enforceinlinesize(self, tr):
                     if not self._delayed:
                         revlog.revlog._enforceinlinesize(self, tr)
                 def read(self, nodeorrev):
                     """Obtain data from a parsed changelog revision.
                     Returns a 6-tuple of:
                        - manifest node in binary
                        - author/user as a localstr
                        - date as a 2-tuple of (time, timezone)
                        - list of files
                        - commit message as a localstr
                        - dict of extra metadata
                     Unless you need to access all fields, consider calling
                     ``changelogrevision`` instead, as it is faster for partial object
                     access.
                     """
                     d = self._revisiondata(nodeorrev)
                     sidedata = self.sidedata(nodeorrev)
                     copy_sd = self._copiesstorage == b'changeset-sidedata'
                     c = changelogrevision(self, d, sidedata, copy_sd)
                     return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
                 def changelogrevision(self, nodeorrev):
                     """Obtain a ``changelogrevision`` for a node or revision."""
                     text = self._revisiondata(nodeorrev)
                     sidedata = self.sidedata(nodeorrev)
                     return changelogrevision(
                         self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
                     )
                 def readfiles(self, nodeorrev):
                     """
                     short version of read that only returns the files modified by the cset
                     """
                     text = self.revision(nodeorrev)
                     if not text:
                         return []
                     last = text.index(b"\n\n")
                     l = text[:last].split(b'\n')
                     return l[3:]
                 def add(
                     self,
                     manifest,
                     files,
                     desc,
                     transaction,
                     p1,
                     p2,
                     user,
                     date=None,
                     extra=None,
                 ):
                     # Convert to UTF-8 encoded bytestrings as the very first
                     # thing: calling any method on a localstr object will turn it
                     # into a str object and the cached UTF-8 string is thus lost.
                     user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
                     user = user.strip()
                     # An empty username or a username with a "\n" will make the
                     # revision text contain two "\n\n" sequences -> corrupt
                     # repository since read cannot unpack the revision.
                     if not user:
                         raise error.StorageError(_(b"empty username"))
                     if b"\n" in user:
                         raise error.StorageError(
                             _(b"username %r contains a newline") % pycompat.bytestr(user)
                         )
                     desc = stripdesc(desc)
                     if date:
                         parseddate = b"%d %d" % dateutil.parsedate(date)
                     else:
                         parseddate = b"%d %d" % dateutil.makedate()
                     if extra:
                         branch = extra.get(b"branch")
                         if branch in (b"default", b""):
                             del extra[b"branch"]
                         elif branch in (b".", b"null", b"tip"):
                             raise error.StorageError(
                                 _(b'the name \'%s\' is reserved') % branch
                             )
                     sortedfiles = sorted(files.touched)
                     flags = 0
                     sidedata = None
                     if self._copiesstorage == b'changeset-sidedata':
                         if files.has_copies_info:
                             flags |= flagutil.REVIDX_HASCOPIESINFO
                         sidedata = metadata.encode_files_sidedata(files)
                     if extra:
                         extra = encodeextra(extra)
                         parseddate = b"%s %s" % (parseddate, extra)
                     l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
                     text = b"\n".join(l)
                     rev = self.addrevision(
                         text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
                     )
                     return self.node(rev)
                 def branchinfo(self, rev):
                     """return the branch name and open/close state of a revision
                     This function exists because creating a changectx object
                     just to access this is costly."""
                     return self.changelogrevision(rev).branchinfo
                 def _nodeduplicatecallback(self, transaction, rev):
                     # keep track of revisions that got "re-added", eg: unbunde of know rev.
                     #
                     # We track them in a list to preserve their order from the source bundle
                     duplicates = transaction.changes.setdefault(b'revduplicates', [])
                     duplicates.append(rev)

             # revlog.py - storage back-end for mercurial
             # coding: utf8
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import contextlib
             import errno
             import io
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .pycompat import getattr
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 revlogutils,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 censor,
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
+                randomaccessfile,
                 revlogv0,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
-            _chunksize = 1048576
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
                 parsers, 'BaseIndexObject'
             )
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 assert not inline
                 from .pure.parsers import parse_index_cl_v2
                 index, cache = parse_index_cl_v2(data)
                 return index, cache
             if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
-            PARTIAL_READ_MSG = _(
-                b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
             FILE_TOO_SHORT_MSG = _(
                 b'cannot read from revlog %s;'
                 b'  expected %d bytes from offset %d, data size is %d'
             )
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 See mercurial/revlogutils/contants.py for details about the content of an
                 index entry.
                 """
                 _flagserrorclass = error.RevlogError
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._sidedatafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 3-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in pycompat.iteritems(
                         opts.get(b'flagprocessors', {})
                     ):
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b''
                 def _loadindex(self, docket=None):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     else:
                         entry_point = b'%s.i' % self.radix
                     if docket is not None:
                         self._docket = docket
                         self._docket_file = entry_point
                     else:
                         entry_data = b''
                         self._initempty = True
                         entry_data = self._get_data(entry_point, mmapindexthreshold)
                         if len(entry_data) > 0:
                             header = INDEX_HEADER.unpack(entry_data[:4])[0]
                             self._initempty = False
                         else:
                             header = new_header
                         self._format_flags = header & ~0xFFFF
                         self._format_version = header & 0xFFFF
                         supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                         if supported_flags is None:
                             msg = _(b'unknown version (%d) in revlog %s')
                             msg %= (self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         elif self._format_flags & ~supported_flags:
                             msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                             display_flag = self._format_flags >> 16
                             msg %= (display_flag, self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         features = FEATURES_BY_VERSION[self._format_version]
                         self._inline = features[b'inline'](self._format_flags)
                         self._generaldelta = features[b'generaldelta'](self._format_flags)
                         self.hassidedata = features[b'sidedata']
                         if not features[b'docket']:
                             self._indexfile = entry_point
                             index_data = entry_data
                         else:
                             self._docket_file = entry_point
                             if self._initempty:
                                 self._docket = docketutil.default_docket(self, header)
                             else:
                                 self._docket = docketutil.parse_docket(
                                     self, entry_data, use_pending=self._trypending
                                 )
                     if self._docket is not None:
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self._docket is not None:
                         self._datafile = self._docket.data_filepath()
                         self._sidedatafile = self._docket.sidedata_filepath()
                     elif self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
-                        index, _chunkcache = d
+                        index, chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and util.safehasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
-                    self.index, self._chunkcache = d
+                    self.index = index
-                    if not self._chunkcache:
+                    self._segmentfile = randomaccessfile.randomaccessfile(
-                        self._chunkclear()
+                        self.opener,
+                        (self._indexfile if self._inline else self._datafile),
+                        self._chunkcachesize,
+                        chunkcache,
+                    )
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     # Maybe we should build a user facing representation of
                     # revlog.target instead of using `self.radix`
                     return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
-                def _datareadfp(self, existingfp=None):
-                    """file object suitable to read data"""
-                    # Use explicit file handle, if given.
-                    if existingfp is not None:
-                        yield existingfp
-                    # Use a file handle being actively used for writes, if available.
-                    # There is some danger to doing this because reads will seek the
-                    # file. However, _writeentry() performs a SEEK_END before all writes,
-                    # so we should be safe.
-                    elif self._writinghandles:
-                        if self._inline:
-                            yield self._writinghandles[0]
-                        else:
-                            yield self._writinghandles[1]
-                    # Otherwise open a new file handle.
-                    else:
-                        if self._inline:
-                            func = self._indexfp
-                        else:
-                            func = self._datafp
-                        with func() as fp:
-                            yield fp
-                @contextlib.contextmanager
                 def _sidedatareadfp(self):
                     """file object suitable to read sidedata"""
                     if self._writinghandles:
                         yield self._writinghandles[2]
                     else:
                         with self.opener(self._sidedatafile) as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @property
                 def nodemap(self):
                     msg = (
                         b"revlog.nodemap is deprecated, "
                         b"use revlog.index.[has_node|rev|get_rev]"
                     )
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 @property
                 def _nodecache(self):
                     msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
-                    self._chunkcache = (0, b'')
+                    self._segmentfile.clear_cache()
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and util.safehasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def sidedata_cut_off(self, rev):
                     sd_cut_off = self.index[rev][8]
                     if sd_cut_off != 0:
                         return sd_cut_off
                     # This is some annoying dance, because entries without sidedata
                     # currently use 0 as their ofsset. (instead of previous-offset +
                     # previous-size)
                     #
                     # We should reconsider this sidedata → 0 sidata_offset policy.
                     # In the meantime, we need this.
                     while 0 <= rev:
                         e = self.index[rev]
                         if e[9] != 0:
                             return e[8] + e[9]
                         rev -= 1
                     return 0
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in pycompat.iteritems(heads) if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 ambiguous = True
                             else:
                                 return partial
                         elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             ambiguous = True
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if ambiguous:
                         raise error.AmbiguousPrefixLookupError(
                             id, self.display_id, _(b'ambiguous identifier')
                         )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[: l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
-                def _cachesegment(self, offset, data):
-                    """Add a segment to the revlog cache.
-                    Accepts an absolute offset and the data that is at that location.
-                    """
-                    o, d = self._chunkcache
-                    # try to add to existing cache
-                    if o + len(d) == offset and len(d) + len(data) < _chunksize:
-                        self._chunkcache = o, d + data
-                    else:
-                        self._chunkcache = offset, data
-                def _readsegment(self, offset, length, df=None):
-                    """Load a segment of raw data from the revlog.
-                    Accepts an absolute offset, length to read, and an optional existing
-                    file handle to read from.
-                    If an existing file handle is passed, it will be seeked and the
-                    original seek position will NOT be restored.
-                    Returns a str or buffer of raw byte data.
-                    Raises if the requested number of bytes could not be read.
-                    """
-                    # Cache data both forward and backward around the requested
-                    # data, in a fixed size window. This helps speed up operations
-                    # involving reading the revlog backwards.
-                    cachesize = self._chunkcachesize
-                    realoffset = offset & ~(cachesize - 1)
-                    reallength = (
-                        (offset + length + cachesize) & ~(cachesize - 1)
-                    ) - realoffset
-                    with self._datareadfp(df) as df:
-                        df.seek(realoffset)
-                        d = df.read(reallength)
-                    self._cachesegment(realoffset, d)
-                    if offset != realoffset or reallength != length:
-                        startoffset = offset - realoffset
-                        if len(d) - startoffset < length:
-                            filename = self._indexfile if self._inline else self._datafile
-                            got = len(d) - startoffset
-                            m = PARTIAL_READ_MSG % (filename, length, offset, got)
-                            raise error.RevlogError(m)
-                        return util.buffer(d, startoffset, length)
-                    if len(d) < length:
-                        filename = self._indexfile if self._inline else self._datafile
-                        got = len(d) - startoffset
-                        m = PARTIAL_READ_MSG % (filename, length, offset, got)
-                        raise error.RevlogError(m)
-                    return d
-                def _getsegment(self, offset, length, df=None):
-                    """Obtain a segment of raw data from the revlog.
-                    Accepts an absolute offset, length of bytes to obtain, and an
-                    optional file handle to the already-opened revlog. If the file
-                    handle is used, it's original seek position will not be preserved.
-                    Requests for data may be returned from a cache.
-                    Returns a str or a buffer instance of raw byte data.
-                    """
-                    o, d = self._chunkcache
-                    l = len(d)
-                    # is it in the cache?
-                    cachestart = offset - o
-                    cacheend = cachestart + length
-                    if cachestart >= 0 and cacheend <= l:
-                        if cachestart == 0 and cacheend == l:
-                            return d  # avoid a copy
-                        return util.buffer(d, cachestart, cacheend - cachestart)
-                    return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
-                    return start, self._getsegment(start, length, df=df)
+                    return start, self._segmentfile.read_chunk(start, length, df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = b'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
-                def _chunkclear(self):
-                    """Clear the raw chunk cache."""
-                    self._chunkcache = (0, b'')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, b'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def _processflags(self, text, flags, operation, raw=False):
                     """deprecated entry point to access flag processors"""
                     msg = b'_processflag(...) use the specialized variant'
                     util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     if raw:
                         return text, flagutil.processflagsraw(self, text, flags)
                     elif operation == b'read':
                         return flagutil.processflagsread(self, text, flags)
                     else:  # write operation
                         return flagutil.processflagswrite(self, text, flags)
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if raw:
                         msg = (
                             b'revlog.revision(..., raw=True) is deprecated, '
                             b'use revlog.rawdata(...)'
                         )
                         util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     return self._revisiondata(nodeorrev, _df, raw=raw)
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                     else:
                         rev = self.rev(nodeorrev)
                     return self._sidedata(rev)
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b""
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     # XXX this need caching, as we do for data
                     with self._sidedatareadfp() as sdf:
                         if self._docket.sidedata_end < sidedata_offset + sidedata_size:
                             filename = self._sidedatafile
                             end = self._docket.sidedata_end
                             offset = sidedata_offset
                             length = sidedata_size
                             m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
                             raise error.RevlogError(m)
                         sdf.seek(sidedata_offset, os.SEEK_SET)
                         comp_segment = sdf.read(sidedata_size)
                         if len(comp_segment) < sidedata_size:
                             filename = self._sidedatafile
                             length = sidedata_size
                             offset = sidedata_offset
                             got = len(comp_segment)
-                            m = PARTIAL_READ_MSG % (filename, length, offset, got)
+                            m = randomaccessfile.PARTIAL_READ_MSG % (
+                                filename,
+                                length,
+                                offset,
+                                got,
+                            )
                             raise error.RevlogError(m)
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 def _enforceinlinesize(self, tr):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     trindex = 0
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
+                        self._segmentfile.writing_handle = None
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                                 if troffset <= self.start(r) + r * self.index.entry_size:
                                     trindex = r
                             new_dfh.flush()
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # There is a small transactional race here. If the rename of
                             # the index fails, we should remove the datafile. It is more
                             # important to ensure that the data file is not truncated
                             # when the index is replaced as otherwise data is lost.
                             tr.replace(self._datafile, self.start(trindex))
                             # the temp file replace the real index when we exit the context
                             # manager
                         tr.replace(self._indexfile, trindex * self.index.entry_size)
                         nodemaputil.setup_persistent_nodemap(tr, self)
-                        self._chunkclear()
+                        self._segmentfile = randomaccessfile.randomaccessfile(
+                            self.opener,
+                            self._datafile,
+                            self._chunkcachesize,
+                        )
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh, None)
+                            self._segmentfile.writing_handle = new_dfh
                             new_dfh = None
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         ifh = dfh = sdfh = None
                         try:
                             r = len(self)
                             # opening the data file.
                             dsize = 0
                             if r:
                                 dsize = self.end(r - 1)
                             dfh = None
                             if not self._inline:
                                 try:
                                     dfh = self._datafp(b"r+")
                                     if self._docket is None:
                                         dfh.seek(0, os.SEEK_END)
                                     else:
                                         dfh.seek(self._docket.data_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     dfh = self._datafp(b"w+")
                                 transaction.add(self._datafile, dsize)
                             if self._sidedatafile is not None:
                                 try:
                                     sdfh = self.opener(self._sidedatafile, mode=b"r+")
                                     dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     sdfh = self.opener(self._sidedatafile, mode=b"w+")
                                 transaction.add(
                                     self._sidedatafile, self._docket.sidedata_end
                                 )
                             # opening the index file.
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             # exposing all file handle for writing.
                             self._writinghandles = (ifh, dfh, sdfh)
+                            self._segmentfile.writing_handle = ifh if self._inline else dfh
                             yield
                             if self._docket is not None:
                                 self._write_docket(transaction)
                         finally:
                             self._writinghandles = None
+                            self._segmentfile.writing_handle = None
                             if dfh is not None:
                                 dfh.close()
                             if sdfh is not None:
                                 sdfh.close()
                             # closing the index file last to avoid exposing referent to
                             # potential unflushed data content.
                             if ifh is not None:
                                 ifh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh, sdfh = self._writinghandles
                         # XXX no checking for the sidedata file
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = revlogutils.revisioninfo(
                         node,
                         p1,
                         p2,
                         btext,
                         textlen,
                         cachedelta,
                         flags,
                     )
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         h, d = deltainfo.data
                         if not h and not d:
                             # not data to store at all... declare them uncompressed
                             compression_mode = COMP_MODE_PLAIN
                         elif not h:
                             t = d[0:1]
                             if t == b'\0':
                                 compression_mode = COMP_MODE_PLAIN
                             elif t == self._docket.default_compression_header:
                                 compression_mode = COMP_MODE_DEFAULT
                         elif h == b'u':
                             # we have a more efficient way to declare uncompressed
                             h = b''
                             compression_mode = COMP_MODE_PLAIN
                             deltainfo = deltautil.drop_u_compression(deltainfo)
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = self._docket.sidedata_end
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     e = revlogutils.entry(
                         flags=flags,
                         data_offset=offset,
                         data_compressed_length=deltainfo.deltalen,
                         data_uncompressed_length=textlen,
                         data_compression_mode=compression_mode,
                         data_delta_base=deltainfo.base,
                         link_rev=link,
                         parent_rev_1=p1r,
                         parent_rev_2=p2r,
                         node_id=node,
                         sidedata_offset=sidedata_offset,
                         sidedata_compressed_length=len(serialized_sidedata),
                         sidedata_compression_mode=sidedata_compression_mode,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                         sidedata_offset,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(
                     self, transaction, entry, data, link, offset, sidedata, sidedata_offset
                 ):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh, sdfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     if sdfh:
                         sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         if self._sidedatafile:
                             transaction.add(self._sidedatafile, sidedata_offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             sdfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         assert not sidedata
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         self._docket.index_end = self._writinghandles[0].tell()
                         self._docket.data_end = self._writinghandles[1].tell()
                         self._docket.sidedata_end = self._writinghandles[2].tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             deltacomputer = deltautil.deltacomputer(self)
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     if self._sidedatafile:
                         sidedata_end = self.sidedata_cut_off(rev)
                         transaction.add(self._sidedatafile, sidedata_end)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.sidedata_end = sidedata_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
-                    self._chunkclear()
+                    self._segmentfile.clear_cache()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if not self._inline:
                         res.append(self._datafile)
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     deltacomputer = deltautil.deltacomputer(destrevlog)
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text = self._revisiondata(rev)
                             sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext = self._revisiondata(rev)
                                 sidedata = self.sidedata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     elif self._format_version == REVLOGV1:
                         censor.v1_censor(self, tr, censornode, tombstone)
                     else:
                         # revlog v2
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh, sdfh = self._writinghandles
                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                         current_offset = sdfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             sdfh.seek(current_offset, os.SEEK_SET)
                             sdfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             self._docket.sidedata_end = sdfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)