upstream/mercurial-mirror Commit - r49783:ceafb0f8

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

# coding: utf8

2

# coding: utf8

3

#

3

#

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

"""Storage back-end for Mercurial.

9

"""Storage back-end for Mercurial.

10

11

This provides efficient delta storage with O(1) retrieve and append

11

This provides efficient delta storage with O(1) retrieve and append

12

and O(changes) merge between branches.

12

and O(changes) merge between branches.

13

"""

13

"""

14

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import contextlib

18

import contextlib

19

import errno

19

import errno

20

import io

20

import io

21

import os

21

import os

22

import struct

22

import struct

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullrev,

29

nullrev,

30

sha1nodeconstants,

30

sha1nodeconstants,

31

short,

31

short,

32

wdirrev,

32

wdirrev,

33

)

33

)

34

from .i18n import _

34

from .i18n import _

35

from .pycompat import getattr

35

from .pycompat import getattr

36

from .revlogutils.constants import (

36

from .revlogutils.constants import (

37

ALL_KINDS,

37

ALL_KINDS,

38

CHANGELOGV2,

38

CHANGELOGV2,

39

COMP_MODE_DEFAULT,

39

COMP_MODE_DEFAULT,

40

COMP_MODE_INLINE,

40

COMP_MODE_INLINE,

41

COMP_MODE_PLAIN,

41

COMP_MODE_PLAIN,

42

ENTRY_RANK,

42

ENTRY_RANK,

43

FEATURES_BY_VERSION,

43

FEATURES_BY_VERSION,

44

FLAG_GENERALDELTA,

44

FLAG_GENERALDELTA,

45

FLAG_INLINE_DATA,

45

FLAG_INLINE_DATA,

46

INDEX_HEADER,

46

INDEX_HEADER,

47

KIND_CHANGELOG,

47

KIND_CHANGELOG,

48

RANK_UNKNOWN,

48

RANK_UNKNOWN,

49

REVLOGV0,

49

REVLOGV0,

50

REVLOGV1,

50

REVLOGV1,

51

REVLOGV1_FLAGS,

51

REVLOGV1_FLAGS,

52

REVLOGV2,

52

REVLOGV2,

53

REVLOGV2_FLAGS,

53

REVLOGV2_FLAGS,

54

REVLOG_DEFAULT_FLAGS,

54

REVLOG_DEFAULT_FLAGS,

55

REVLOG_DEFAULT_FORMAT,

55

REVLOG_DEFAULT_FORMAT,

56

REVLOG_DEFAULT_VERSION,

56

REVLOG_DEFAULT_VERSION,

57

SUPPORTED_FLAGS,

57

SUPPORTED_FLAGS,

58

)

58

)

59

from .revlogutils.flagutil import (

59

from .revlogutils.flagutil import (

60

REVIDX_DEFAULT_FLAGS,

60

REVIDX_DEFAULT_FLAGS,

61

REVIDX_ELLIPSIS,

61

REVIDX_ELLIPSIS,

62

REVIDX_EXTSTORED,

62

REVIDX_EXTSTORED,

63

REVIDX_FLAGS_ORDER,

63

REVIDX_FLAGS_ORDER,

64

REVIDX_HASCOPIESINFO,

64

REVIDX_HASCOPIESINFO,

65

REVIDX_ISCENSORED,

65

REVIDX_ISCENSORED,

66

REVIDX_RAWTEXT_CHANGING_FLAGS,

66

REVIDX_RAWTEXT_CHANGING_FLAGS,

67

)

67

)

68

from .thirdparty import attr

68

from .thirdparty import attr

69

from . import (

69

from . import (

70

ancestor,

70

ancestor,

71

dagop,

71

dagop,

72

error,

72

error,

73

mdiff,

73

mdiff,

74

policy,

74

policy,

75

pycompat,

75

pycompat,

76

revlogutils,

76

revlogutils,

77

templatefilters,

77

templatefilters,

78

util,

78

util,

79

)

79

)

80

from .interfaces import (

80

from .interfaces import (

81

repository,

81

repository,

82

util as interfaceutil,

82

util as interfaceutil,

83

)

83

)

84

from .revlogutils import (

84

from .revlogutils import (

85

deltas as deltautil,

85

deltas as deltautil,

86

docket as docketutil,

86

docket as docketutil,

87

flagutil,

87

flagutil,

88

nodemap as nodemaputil,

88

nodemap as nodemaputil,

89

randomaccessfile,

89

randomaccessfile,

90

revlogv0,

90

revlogv0,

91

rewrite,

91

rewrite,

92

sidedata as sidedatautil,

92

sidedata as sidedatautil,

93

)

93

)

94

from .utils import (

94

from .utils import (

95

storageutil,

95

storageutil,

96

stringutil,

96

stringutil,

97

)

97

)

98

99

# blanked usage of all the name to prevent pyflakes constraints

99

# blanked usage of all the name to prevent pyflakes constraints

100

# We need these name available in the module for extensions.

100

# We need these name available in the module for extensions.

101

102

REVLOGV0

102

REVLOGV0

103

REVLOGV1

103

REVLOGV1

104

REVLOGV2

104

REVLOGV2

105

CHANGELOGV2

105

CHANGELOGV2

106

FLAG_INLINE_DATA

106

FLAG_INLINE_DATA

107

FLAG_GENERALDELTA

107

FLAG_GENERALDELTA

108

REVLOG_DEFAULT_FLAGS

108

REVLOG_DEFAULT_FLAGS

109

REVLOG_DEFAULT_FORMAT

109

REVLOG_DEFAULT_FORMAT

110

REVLOG_DEFAULT_VERSION

110

REVLOG_DEFAULT_VERSION

111

REVLOGV1_FLAGS

111

REVLOGV1_FLAGS

112

REVLOGV2_FLAGS

112

REVLOGV2_FLAGS

113

REVIDX_ISCENSORED

113

REVIDX_ISCENSORED

114

REVIDX_ELLIPSIS

114

REVIDX_ELLIPSIS

115

REVIDX_HASCOPIESINFO

115

REVIDX_HASCOPIESINFO

116

REVIDX_EXTSTORED

116

REVIDX_EXTSTORED

117

REVIDX_DEFAULT_FLAGS

117

REVIDX_DEFAULT_FLAGS

118

REVIDX_FLAGS_ORDER

118

REVIDX_FLAGS_ORDER

119

REVIDX_RAWTEXT_CHANGING_FLAGS

119

REVIDX_RAWTEXT_CHANGING_FLAGS

120

121

parsers = policy.importmod('parsers')

121

parsers = policy.importmod('parsers')

122

rustancestor = policy.importrust('ancestor')

122

rustancestor = policy.importrust('ancestor')

123

rustdagop = policy.importrust('dagop')

123

rustdagop = policy.importrust('dagop')

124

rustrevlog = policy.importrust('revlog')

124

rustrevlog = policy.importrust('revlog')

125

126

# Aliased for performance.

126

# Aliased for performance.

127

_zlibdecompress = zlib.decompress

127

_zlibdecompress = zlib.decompress

128

129

# max size of revlog with inline data

129

# max size of revlog with inline data

130

_maxinline = 131072

130

_maxinline = 131072

131

132

# Flag processors for REVIDX_ELLIPSIS.

132

# Flag processors for REVIDX_ELLIPSIS.

133

def ellipsisreadprocessor(rl, text):

133

def ellipsisreadprocessor(rl, text):

134

return text, False

134

return text, False

135

136

137

def ellipsiswriteprocessor(rl, text):

137

def ellipsiswriteprocessor(rl, text):

138

return text, False

138

return text, False

139

140

141

def ellipsisrawprocessor(rl, text):

141

def ellipsisrawprocessor(rl, text):

142

return False

142

return False

143

144

145

ellipsisprocessor = (

145

ellipsisprocessor = (

146

ellipsisreadprocessor,

146

ellipsisreadprocessor,

147

ellipsiswriteprocessor,

147

ellipsiswriteprocessor,

148

ellipsisrawprocessor,

148

ellipsisrawprocessor,

149

)

149

)

150

151

152

def _verify_revision(rl, skipflags, state, node):

152

def _verify_revision(rl, skipflags, state, node):

153

"""Verify the integrity of the given revlog ``node`` while providing a hook

153

"""Verify the integrity of the given revlog ``node`` while providing a hook

154

point for extensions to influence the operation."""

154

point for extensions to influence the operation."""

155

if skipflags:

155

if skipflags:

156

state[b'skipread'].add(node)

156

state[b'skipread'].add(node)

157

else:

157

else:

158

# Side-effect: read content and verify hash.

158

# Side-effect: read content and verify hash.

159

rl.revision(node)

159

rl.revision(node)

160

161

162

# True if a fast implementation for persistent-nodemap is available

162

# True if a fast implementation for persistent-nodemap is available

163

#

163

#

164

# We also consider we have a "fast" implementation in "pure" python because

164

# We also consider we have a "fast" implementation in "pure" python because

165

# people using pure don't really have performance consideration (and a

165

# people using pure don't really have performance consideration (and a

166

# wheelbarrow of other slowness source)

166

# wheelbarrow of other slowness source)

167

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

167

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

168

parsers, 'BaseIndexObject'

168

parsers, 'BaseIndexObject'

169

)

169

)

170

171

172

@interfaceutil.implementer(repository.irevisiondelta)

172

@interfaceutil.implementer(repository.irevisiondelta)

173

@attr.s(slots=True)

173

@attr.s(slots=True)

174

class revlogrevisiondelta(object):

174

class revlogrevisiondelta(object):

175

node = attr.ib()

175

node = attr.ib()

176

p1node = attr.ib()

176

p1node = attr.ib()

177

p2node = attr.ib()

177

p2node = attr.ib()

178

basenode = attr.ib()

178

basenode = attr.ib()

179

flags = attr.ib()

179

flags = attr.ib()

180

baserevisionsize = attr.ib()

180

baserevisionsize = attr.ib()

181

revision = attr.ib()

181

revision = attr.ib()

182

delta = attr.ib()

182

delta = attr.ib()

183

sidedata = attr.ib()

183

sidedata = attr.ib()

184

protocol_flags = attr.ib()

184

protocol_flags = attr.ib()

185

linknode = attr.ib(default=None)

185

linknode = attr.ib(default=None)

186

187

188

@interfaceutil.implementer(repository.iverifyproblem)

188

@interfaceutil.implementer(repository.iverifyproblem)

189

@attr.s(frozen=True)

189

@attr.s(frozen=True)

190

class revlogproblem(object):

190

class revlogproblem(object):

191

warning = attr.ib(default=None)

191

warning = attr.ib(default=None)

192

error = attr.ib(default=None)

192

error = attr.ib(default=None)

193

node = attr.ib(default=None)

193

node = attr.ib(default=None)

194

195

196

def parse_index_v1(data, inline):

196

def parse_index_v1(data, inline):

197

# call the C implementation to parse the index data

197

# call the C implementation to parse the index data

198

index, cache = parsers.parse_index2(data, inline)

198

index, cache = parsers.parse_index2(data, inline)

199

return index, cache

199

return index, cache

200

201

202

def parse_index_v2(data, inline):

202

def parse_index_v2(data, inline):

203

# call the C implementation to parse the index data

203

# call the C implementation to parse the index data

204

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

204

index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)

205

return index, cache

205

return index, cache

206

207

208

def parse_index_cl_v2(data, inline):

208

def parse_index_cl_v2(data, inline):

209

# call the C implementation to parse the index data

209

# call the C implementation to parse the index data

210

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

210

index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)

211

return index, cache

211

return index, cache

212

213

214

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

214

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

215

216

def parse_index_v1_nodemap(data, inline):

216

def parse_index_v1_nodemap(data, inline):

217

index, cache = parsers.parse_index_devel_nodemap(data, inline)

217

index, cache = parsers.parse_index_devel_nodemap(data, inline)

218

return index, cache

218

return index, cache

219

220

221

else:

221

else:

222

parse_index_v1_nodemap = None

222

parse_index_v1_nodemap = None

223

224

225

def parse_index_v1_mixed(data, inline):

225

def parse_index_v1_mixed(data, inline):

226

index, cache = parse_index_v1(data, inline)

226

index, cache = parse_index_v1(data, inline)

227

return rustrevlog.MixedIndex(index), cache

227

return rustrevlog.MixedIndex(index), cache

228

229

230

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

230

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

231

# signed integer)

231

# signed integer)

232

_maxentrysize = 0x7FFFFFFF

232

_maxentrysize = 0x7FFFFFFF

233

234

FILE_TOO_SHORT_MSG = _(

234

FILE_TOO_SHORT_MSG = _(

235

b'cannot read from revlog %s;'

235

b'cannot read from revlog %s;'

236

b' expected %d bytes from offset %d, data size is %d'

236

b' expected %d bytes from offset %d, data size is %d'

237

)

237

)

238

239

240

class revlog(object):

240

class revlog(object):

241

"""

241

"""

242

the underlying revision storage object

242

the underlying revision storage object

243

244

A revlog consists of two parts, an index and the revision data.

244

A revlog consists of two parts, an index and the revision data.

245

246

The index is a file with a fixed record size containing

246

The index is a file with a fixed record size containing

247

information on each revision, including its nodeid (hash), the

247

information on each revision, including its nodeid (hash), the

248

nodeids of its parents, the position and offset of its data within

248

nodeids of its parents, the position and offset of its data within

249

the data file, and the revision it's based on. Finally, each entry

249

the data file, and the revision it's based on. Finally, each entry

250

contains a linkrev entry that can serve as a pointer to external

250

contains a linkrev entry that can serve as a pointer to external

251

data.

251

data.

252

253

The revision data itself is a linear collection of data chunks.

253

The revision data itself is a linear collection of data chunks.

254

Each chunk represents a revision and is usually represented as a

254

Each chunk represents a revision and is usually represented as a

255

delta against the previous chunk. To bound lookup time, runs of

255

delta against the previous chunk. To bound lookup time, runs of

256

deltas are limited to about 2 times the length of the original

256

deltas are limited to about 2 times the length of the original

257

version data. This makes retrieval of a version proportional to

257

version data. This makes retrieval of a version proportional to

258

its size, or O(1) relative to the number of revisions.

258

its size, or O(1) relative to the number of revisions.

259

260

Both pieces of the revlog are written to in an append-only

260

Both pieces of the revlog are written to in an append-only

261

fashion, which means we never need to rewrite a file to insert or

261

fashion, which means we never need to rewrite a file to insert or

262

remove data, and can use some simple techniques to avoid the need

262

remove data, and can use some simple techniques to avoid the need

263

for locking while reading.

263

for locking while reading.

264

265

If checkambig, indexfile is opened with checkambig=True at

265

If checkambig, indexfile is opened with checkambig=True at

266

writing, to avoid file stat ambiguity.

266

writing, to avoid file stat ambiguity.

267

268

If mmaplargeindex is True, and an mmapindexthreshold is set, the

268

If mmaplargeindex is True, and an mmapindexthreshold is set, the

269

index will be mmapped rather than read if it is larger than the

269

index will be mmapped rather than read if it is larger than the

270

configured threshold.

270

configured threshold.

271

272

If censorable is True, the revlog can have censored revisions.

272

If censorable is True, the revlog can have censored revisions.

273

274

If `upperboundcomp` is not None, this is the expected maximal gain from

274

If `upperboundcomp` is not None, this is the expected maximal gain from

275

compression for the data content.

275

compression for the data content.

276

277

`concurrencychecker` is an optional function that receives 3 arguments: a

277

`concurrencychecker` is an optional function that receives 3 arguments: a

278

file handle, a filename, and an expected position. It should check whether

278

file handle, a filename, and an expected position. It should check whether

279

the current position in the file handle is valid, and log/warn/fail (by

279

the current position in the file handle is valid, and log/warn/fail (by

280

raising).

280

raising).

281

282

See mercurial/revlogutils/contants.py for details about the content of an

282

See mercurial/revlogutils/contants.py for details about the content of an

283

index entry.

283

index entry.

284

"""

284

"""

285

286

_flagserrorclass = error.RevlogError

286

_flagserrorclass = error.RevlogError

287

288

def __init__(

288

def __init__(

289

self,

289

self,

290

opener,

290

opener,

291

target,

291

target,

292

radix,

292

radix,

293

postfix=None, # only exist for `tmpcensored` now

293

postfix=None, # only exist for `tmpcensored` now

294

checkambig=False,

294

checkambig=False,

295

mmaplargeindex=False,

295

mmaplargeindex=False,

296

censorable=False,

296

censorable=False,

297

upperboundcomp=None,

297

upperboundcomp=None,

298

persistentnodemap=False,

298

persistentnodemap=False,

299

concurrencychecker=None,

299

concurrencychecker=None,

300

trypending=False,

300

trypending=False,

301

):

301

):

302

"""

302

"""

303

create a revlog object

303

create a revlog object

304

305

opener is a function that abstracts the file opening operation

305

opener is a function that abstracts the file opening operation

306

and can be used to implement COW semantics or the like.

306

and can be used to implement COW semantics or the like.

307

308

`target`: a (KIND, ID) tuple that identify the content stored in

308

`target`: a (KIND, ID) tuple that identify the content stored in

309

this revlog. It help the rest of the code to understand what the revlog

309

this revlog. It help the rest of the code to understand what the revlog

310

is about without having to resort to heuristic and index filename

310

is about without having to resort to heuristic and index filename

311

analysis. Note: that this must be reliably be set by normal code, but

311

analysis. Note: that this must be reliably be set by normal code, but

312

that test, debug, or performance measurement code might not set this to

312

that test, debug, or performance measurement code might not set this to

313

accurate value.

313

accurate value.

314

"""

314

"""

315

self.upperboundcomp = upperboundcomp

315

self.upperboundcomp = upperboundcomp

316

317

self.radix = radix

317

self.radix = radix

318

319

self._docket_file = None

319

self._docket_file = None

320

self._indexfile = None

320

self._indexfile = None

321

self._datafile = None

321

self._datafile = None

322

self._sidedatafile = None

322

self._sidedatafile = None

323

self._nodemap_file = None

323

self._nodemap_file = None

324

self.postfix = postfix

324

self.postfix = postfix

325

self._trypending = trypending

325

self._trypending = trypending

326

self.opener = opener

326

self.opener = opener

327

if persistentnodemap:

327

if persistentnodemap:

328

self._nodemap_file = nodemaputil.get_nodemap_file(self)

328

self._nodemap_file = nodemaputil.get_nodemap_file(self)

329

330

assert target[0] in ALL_KINDS

330

assert target[0] in ALL_KINDS

331

assert len(target) == 2

331

assert len(target) == 2

332

self.target = target

332

self.target = target

333

# When True, indexfile is opened with checkambig=True at writing, to

333

# When True, indexfile is opened with checkambig=True at writing, to

334

# avoid file stat ambiguity.

334

# avoid file stat ambiguity.

335

self._checkambig = checkambig

335

self._checkambig = checkambig

336

self._mmaplargeindex = mmaplargeindex

336

self._mmaplargeindex = mmaplargeindex

337

self._censorable = censorable

337

self._censorable = censorable

338

# 3-tuple of (node, rev, text) for a raw revision.

338

# 3-tuple of (node, rev, text) for a raw revision.

339

self._revisioncache = None

339

self._revisioncache = None

340

# Maps rev to chain base rev.

340

# Maps rev to chain base rev.

341

self._chainbasecache = util.lrucachedict(100)

341

self._chainbasecache = util.lrucachedict(100)

342

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

342

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

343

self._chunkcache = (0, b'')

343

self._chunkcache = (0, b'')

344

# How much data to read and cache into the raw revlog data cache.

344

# How much data to read and cache into the raw revlog data cache.

345

self._chunkcachesize = 65536

345

self._chunkcachesize = 65536

346

self._maxchainlen = None

346

self._maxchainlen = None

347

self._deltabothparents = True

347

self._deltabothparents = True

348

self.index = None

348

self.index = None

349

self._docket = None

349

self._docket = None

350

self._nodemap_docket = None

350

self._nodemap_docket = None

351

# Mapping of partial identifiers to full nodes.

351

# Mapping of partial identifiers to full nodes.

352

self._pcache = {}

352

self._pcache = {}

353

# Mapping of revision integer to full node.

353

# Mapping of revision integer to full node.

354

self._compengine = b'zlib'

354

self._compengine = b'zlib'

355

self._compengineopts = {}

355

self._compengineopts = {}

356

self._maxdeltachainspan = -1

356

self._maxdeltachainspan = -1

357

self._withsparseread = False

357

self._withsparseread = False

358

self._sparserevlog = False

358

self._sparserevlog = False

359

self.hassidedata = False

359

self.hassidedata = False

360

self._srdensitythreshold = 0.50

360

self._srdensitythreshold = 0.50

361

self._srmingapsize = 262144

361

self._srmingapsize = 262144

362

363

# Make copy of flag processors so each revlog instance can support

363

# Make copy of flag processors so each revlog instance can support

364

# custom flags.

364

# custom flags.

365

self._flagprocessors = dict(flagutil.flagprocessors)

365

self._flagprocessors = dict(flagutil.flagprocessors)

366

367

# 3-tuple of file handles being used for active writing.

367

# 3-tuple of file handles being used for active writing.

368

self._writinghandles = None

368

self._writinghandles = None

369

# prevent nesting of addgroup

369

# prevent nesting of addgroup

370

self._adding_group = None

370

self._adding_group = None

371

372

self._loadindex()

372

self._loadindex()

373

374

self._concurrencychecker = concurrencychecker

374

self._concurrencychecker = concurrencychecker

375

376

def _init_opts(self):

376

def _init_opts(self):

377

"""process options (from above/config) to setup associated default revlog mode

377

"""process options (from above/config) to setup associated default revlog mode

378

379

These values might be affected when actually reading on disk information.

379

These values might be affected when actually reading on disk information.

380

381

The relevant values are returned for use in _loadindex().

381

The relevant values are returned for use in _loadindex().

382

383

* newversionflags:

383

* newversionflags:

384

version header to use if we need to create a new revlog

384

version header to use if we need to create a new revlog

385

386

* mmapindexthreshold:

386

* mmapindexthreshold:

387

minimal index size for start to use mmap

387

minimal index size for start to use mmap

388

389

* force_nodemap:

389

* force_nodemap:

390

force the usage of a "development" version of the nodemap code

390

force the usage of a "development" version of the nodemap code

391

"""

391

"""

392

mmapindexthreshold = None

392

mmapindexthreshold = None

393

opts = self.opener.options

393

opts = self.opener.options

394

395

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

395

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

396

new_header = CHANGELOGV2

396

new_header = CHANGELOGV2

397

elif b'revlogv2' in opts:

397

elif b'revlogv2' in opts:

398

new_header = REVLOGV2

398

new_header = REVLOGV2

399

elif b'revlogv1' in opts:

399

elif b'revlogv1' in opts:

400

new_header = REVLOGV1 | FLAG_INLINE_DATA

400

new_header = REVLOGV1 | FLAG_INLINE_DATA

401

if b'generaldelta' in opts:

401

if b'generaldelta' in opts:

402

new_header |= FLAG_GENERALDELTA

402

new_header |= FLAG_GENERALDELTA

403

elif b'revlogv0' in self.opener.options:

403

elif b'revlogv0' in self.opener.options:

404

new_header = REVLOGV0

404

new_header = REVLOGV0

405

else:

405

else:

406

new_header = REVLOG_DEFAULT_VERSION

406

new_header = REVLOG_DEFAULT_VERSION

407

408

if b'chunkcachesize' in opts:

408

if b'chunkcachesize' in opts:

409

self._chunkcachesize = opts[b'chunkcachesize']

409

self._chunkcachesize = opts[b'chunkcachesize']

410

if b'maxchainlen' in opts:

410

if b'maxchainlen' in opts:

411

self._maxchainlen = opts[b'maxchainlen']

411

self._maxchainlen = opts[b'maxchainlen']

412

if b'deltabothparents' in opts:

412

if b'deltabothparents' in opts:

413

self._deltabothparents = opts[b'deltabothparents']

413

self._deltabothparents = opts[b'deltabothparents']

414

self._lazydelta = bool(opts.get(b'lazydelta', True))

414

self._lazydelta = bool(opts.get(b'lazydelta', True))

415

self._lazydeltabase = False

415

self._lazydeltabase = False

416

if self._lazydelta:

416

if self._lazydelta:

417

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

417

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

418

if b'compengine' in opts:

418

if b'compengine' in opts:

419

self._compengine = opts[b'compengine']

419

self._compengine = opts[b'compengine']

420

if b'zlib.level' in opts:

420

if b'zlib.level' in opts:

421

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

421

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

422

if b'zstd.level' in opts:

422

if b'zstd.level' in opts:

423

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

423

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

424

if b'maxdeltachainspan' in opts:

424

if b'maxdeltachainspan' in opts:

425

self._maxdeltachainspan = opts[b'maxdeltachainspan']

425

self._maxdeltachainspan = opts[b'maxdeltachainspan']

426

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

426

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

427

mmapindexthreshold = opts[b'mmapindexthreshold']

427

mmapindexthreshold = opts[b'mmapindexthreshold']

428

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

428

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

429

withsparseread = bool(opts.get(b'with-sparse-read', False))

429

withsparseread = bool(opts.get(b'with-sparse-read', False))

430

# sparse-revlog forces sparse-read

430

# sparse-revlog forces sparse-read

431

self._withsparseread = self._sparserevlog or withsparseread

431

self._withsparseread = self._sparserevlog or withsparseread

432

if b'sparse-read-density-threshold' in opts:

432

if b'sparse-read-density-threshold' in opts:

433

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

433

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

434

if b'sparse-read-min-gap-size' in opts:

434

if b'sparse-read-min-gap-size' in opts:

435

self._srmingapsize = opts[b'sparse-read-min-gap-size']

435

self._srmingapsize = opts[b'sparse-read-min-gap-size']

436

if opts.get(b'enableellipsis'):

436

if opts.get(b'enableellipsis'):

437

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

437

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

438

439

# revlog v0 doesn't have flag processors

439

# revlog v0 doesn't have flag processors

440

for flag, processor in ~~pyc~~ompat.~~iteritems~~(

440

for flag, processor in opts.get(b'flagprocessors', {}).items():

441

opts.get(b'flagprocessors', {})

442

):

443

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

441

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

444

442

445

if self._chunkcachesize <= 0:

443

if self._chunkcachesize <= 0:

446

raise error.RevlogError(

444

raise error.RevlogError(

447

_(b'revlog chunk cache size %r is not greater than 0')

445

_(b'revlog chunk cache size %r is not greater than 0')

448

% self._chunkcachesize

446

% self._chunkcachesize

449

)

447

)

450

elif self._chunkcachesize & (self._chunkcachesize - 1):

448

elif self._chunkcachesize & (self._chunkcachesize - 1):

451

raise error.RevlogError(

449

raise error.RevlogError(

452

_(b'revlog chunk cache size %r is not a power of 2')

450

_(b'revlog chunk cache size %r is not a power of 2')

453

% self._chunkcachesize

451

% self._chunkcachesize

454

)

452

)

455

force_nodemap = opts.get(b'devel-force-nodemap', False)

453

force_nodemap = opts.get(b'devel-force-nodemap', False)

456

return new_header, mmapindexthreshold, force_nodemap

454

return new_header, mmapindexthreshold, force_nodemap

457

455

458

def _get_data(self, filepath, mmap_threshold, size=None):

456

def _get_data(self, filepath, mmap_threshold, size=None):

459

"""return a file content with or without mmap

457

"""return a file content with or without mmap

460

458

461

If the file is missing return the empty string"""

459

If the file is missing return the empty string"""

462

try:

460

try:

463

with self.opener(filepath) as fp:

461

with self.opener(filepath) as fp:

464

if mmap_threshold is not None:

462

if mmap_threshold is not None:

465

file_size = self.opener.fstat(fp).st_size

463

file_size = self.opener.fstat(fp).st_size

466

if file_size >= mmap_threshold:

464

if file_size >= mmap_threshold:

467

if size is not None:

465

if size is not None:

468

# avoid potentiel mmap crash

466

# avoid potentiel mmap crash

469

size = min(file_size, size)

467

size = min(file_size, size)

470

# TODO: should .close() to release resources without

468

# TODO: should .close() to release resources without

471

# relying on Python GC

469

# relying on Python GC

472

if size is None:

470

if size is None:

473

return util.buffer(util.mmapread(fp))

471

return util.buffer(util.mmapread(fp))

474

else:

472

else:

475

return util.buffer(util.mmapread(fp, size))

473

return util.buffer(util.mmapread(fp, size))

476

if size is None:

474

if size is None:

477

return fp.read()

475

return fp.read()

478

else:

476

else:

479

return fp.read(size)

477

return fp.read(size)

480

except IOError as inst:

478

except IOError as inst:

481

if inst.errno != errno.ENOENT:

479

if inst.errno != errno.ENOENT:

482

raise

480

raise

483

return b''

481

return b''

484

482

485

def _loadindex(self, docket=None):

483

def _loadindex(self, docket=None):

486

484

487

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

485

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

488

486

489

if self.postfix is not None:

487

if self.postfix is not None:

490

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

488

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

491

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

489

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

492

entry_point = b'%s.i.a' % self.radix

490

entry_point = b'%s.i.a' % self.radix

493

else:

491

else:

494

entry_point = b'%s.i' % self.radix

492

entry_point = b'%s.i' % self.radix

495

493

496

if docket is not None:

494

if docket is not None:

497

self._docket = docket

495

self._docket = docket

498

self._docket_file = entry_point

496

self._docket_file = entry_point

499

else:

497

else:

500

entry_data = b''

498

entry_data = b''

501

self._initempty = True

499

self._initempty = True

502

entry_data = self._get_data(entry_point, mmapindexthreshold)

500

entry_data = self._get_data(entry_point, mmapindexthreshold)

503

if len(entry_data) > 0:

501

if len(entry_data) > 0:

504

header = INDEX_HEADER.unpack(entry_data[:4])[0]

502

header = INDEX_HEADER.unpack(entry_data[:4])[0]

505

self._initempty = False

503

self._initempty = False

506

else:

504

else:

507

header = new_header

505

header = new_header

508

506

509

self._format_flags = header & ~0xFFFF

507

self._format_flags = header & ~0xFFFF

510

self._format_version = header & 0xFFFF

508

self._format_version = header & 0xFFFF

511

509

512

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

510

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

513

if supported_flags is None:

511

if supported_flags is None:

514

msg = _(b'unknown version (%d) in revlog %s')

512

msg = _(b'unknown version (%d) in revlog %s')

515

msg %= (self._format_version, self.display_id)

513

msg %= (self._format_version, self.display_id)

516

raise error.RevlogError(msg)

514

raise error.RevlogError(msg)

517

elif self._format_flags & ~supported_flags:

515

elif self._format_flags & ~supported_flags:

518

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

516

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

519

display_flag = self._format_flags >> 16

517

display_flag = self._format_flags >> 16

520

msg %= (display_flag, self._format_version, self.display_id)

518

msg %= (display_flag, self._format_version, self.display_id)

521

raise error.RevlogError(msg)

519

raise error.RevlogError(msg)

522

520

523

features = FEATURES_BY_VERSION[self._format_version]

521

features = FEATURES_BY_VERSION[self._format_version]

524

self._inline = features[b'inline'](self._format_flags)

522

self._inline = features[b'inline'](self._format_flags)

525

self._generaldelta = features[b'generaldelta'](self._format_flags)

523

self._generaldelta = features[b'generaldelta'](self._format_flags)

526

self.hassidedata = features[b'sidedata']

524

self.hassidedata = features[b'sidedata']

527

525

528

if not features[b'docket']:

526

if not features[b'docket']:

529

self._indexfile = entry_point

527

self._indexfile = entry_point

530

index_data = entry_data

528

index_data = entry_data

531

else:

529

else:

532

self._docket_file = entry_point

530

self._docket_file = entry_point

533

if self._initempty:

531

if self._initempty:

534

self._docket = docketutil.default_docket(self, header)

532

self._docket = docketutil.default_docket(self, header)

535

else:

533

else:

536

self._docket = docketutil.parse_docket(

534

self._docket = docketutil.parse_docket(

537

self, entry_data, use_pending=self._trypending

535

self, entry_data, use_pending=self._trypending

538

)

536

)

539

537

540

if self._docket is not None:

538

if self._docket is not None:

541

self._indexfile = self._docket.index_filepath()

539

self._indexfile = self._docket.index_filepath()

542

index_data = b''

540

index_data = b''

543

index_size = self._docket.index_end

541

index_size = self._docket.index_end

544

if index_size > 0:

542

if index_size > 0:

545

index_data = self._get_data(

543

index_data = self._get_data(

546

self._indexfile, mmapindexthreshold, size=index_size

544

self._indexfile, mmapindexthreshold, size=index_size

547

)

545

)

548

if len(index_data) < index_size:

546

if len(index_data) < index_size:

549

msg = _(b'too few index data for %s: got %d, expected %d')

547

msg = _(b'too few index data for %s: got %d, expected %d')

550

msg %= (self.display_id, len(index_data), index_size)

548

msg %= (self.display_id, len(index_data), index_size)

551

raise error.RevlogError(msg)

549

raise error.RevlogError(msg)

552

550

553

self._inline = False

551

self._inline = False

554

# generaldelta implied by version 2 revlogs.

552

# generaldelta implied by version 2 revlogs.

555

self._generaldelta = True

553

self._generaldelta = True

556

# the logic for persistent nodemap will be dealt with within the

554

# the logic for persistent nodemap will be dealt with within the

557

# main docket, so disable it for now.

555

# main docket, so disable it for now.

558

self._nodemap_file = None

556

self._nodemap_file = None

559

557

560

if self._docket is not None:

558

if self._docket is not None:

561

self._datafile = self._docket.data_filepath()

559

self._datafile = self._docket.data_filepath()

562

self._sidedatafile = self._docket.sidedata_filepath()

560

self._sidedatafile = self._docket.sidedata_filepath()

563

elif self.postfix is None:

561

elif self.postfix is None:

564

self._datafile = b'%s.d' % self.radix

562

self._datafile = b'%s.d' % self.radix

565

else:

563

else:

566

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

564

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

567

565

568

self.nodeconstants = sha1nodeconstants

566

self.nodeconstants = sha1nodeconstants

569

self.nullid = self.nodeconstants.nullid

567

self.nullid = self.nodeconstants.nullid

570

568

571

# sparse-revlog can't be on without general-delta (issue6056)

569

# sparse-revlog can't be on without general-delta (issue6056)

572

if not self._generaldelta:

570

if not self._generaldelta:

573

self._sparserevlog = False

571

self._sparserevlog = False

574

572

575

self._storedeltachains = True

573

self._storedeltachains = True

576

574

577

devel_nodemap = (

575

devel_nodemap = (

578

self._nodemap_file

576

self._nodemap_file

579

and force_nodemap

577

and force_nodemap

580

and parse_index_v1_nodemap is not None

578

and parse_index_v1_nodemap is not None

581

)

579

)

582

580

583

use_rust_index = False

581

use_rust_index = False

584

if rustrevlog is not None:

582

if rustrevlog is not None:

585

if self._nodemap_file is not None:

583

if self._nodemap_file is not None:

586

use_rust_index = True

584

use_rust_index = True

587

else:

585

else:

588

use_rust_index = self.opener.options.get(b'rust.index')

586

use_rust_index = self.opener.options.get(b'rust.index')

589

587

590

self._parse_index = parse_index_v1

588

self._parse_index = parse_index_v1

591

if self._format_version == REVLOGV0:

589

if self._format_version == REVLOGV0:

592

self._parse_index = revlogv0.parse_index_v0

590

self._parse_index = revlogv0.parse_index_v0

593

elif self._format_version == REVLOGV2:

591

elif self._format_version == REVLOGV2:

594

self._parse_index = parse_index_v2

592

self._parse_index = parse_index_v2

595

elif self._format_version == CHANGELOGV2:

593

elif self._format_version == CHANGELOGV2:

596

self._parse_index = parse_index_cl_v2

594

self._parse_index = parse_index_cl_v2

597

elif devel_nodemap:

595

elif devel_nodemap:

598

self._parse_index = parse_index_v1_nodemap

596

self._parse_index = parse_index_v1_nodemap

599

elif use_rust_index:

597

elif use_rust_index:

600

self._parse_index = parse_index_v1_mixed

598

self._parse_index = parse_index_v1_mixed

601

try:

599

try:

602

d = self._parse_index(index_data, self._inline)

600

d = self._parse_index(index_data, self._inline)

603

index, chunkcache = d

601

index, chunkcache = d

604

use_nodemap = (

602

use_nodemap = (

605

not self._inline

603

not self._inline

606

and self._nodemap_file is not None

604

and self._nodemap_file is not None

607

and util.safehasattr(index, 'update_nodemap_data')

605

and util.safehasattr(index, 'update_nodemap_data')

608

)

606

)

609

if use_nodemap:

607

if use_nodemap:

610

nodemap_data = nodemaputil.persisted_data(self)

608

nodemap_data = nodemaputil.persisted_data(self)

611

if nodemap_data is not None:

609

if nodemap_data is not None:

612

docket = nodemap_data[0]

610

docket = nodemap_data[0]

613

if (

611

if (

614

len(d[0]) > docket.tip_rev

612

len(d[0]) > docket.tip_rev

615

and d[0][docket.tip_rev][7] == docket.tip_node

613

and d[0][docket.tip_rev][7] == docket.tip_node

616

):

614

):

617

# no changelog tampering

615

# no changelog tampering

618

self._nodemap_docket = docket

616

self._nodemap_docket = docket

619

index.update_nodemap_data(*nodemap_data)

617

index.update_nodemap_data(*nodemap_data)

620

except (ValueError, IndexError):

618

except (ValueError, IndexError):

621

raise error.RevlogError(

619

raise error.RevlogError(

622

_(b"index %s is corrupted") % self.display_id

620

_(b"index %s is corrupted") % self.display_id

623

)

621

)

624

self.index = index

622

self.index = index

625

self._segmentfile = randomaccessfile.randomaccessfile(

623

self._segmentfile = randomaccessfile.randomaccessfile(

626

self.opener,

624

self.opener,

627

(self._indexfile if self._inline else self._datafile),

625

(self._indexfile if self._inline else self._datafile),

628

self._chunkcachesize,

626

self._chunkcachesize,

629

chunkcache,

627

chunkcache,

630

)

628

)

631

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

629

self._segmentfile_sidedata = randomaccessfile.randomaccessfile(

632

self.opener,

630

self.opener,

633

self._sidedatafile,

631

self._sidedatafile,

634

self._chunkcachesize,

632

self._chunkcachesize,

635

)

633

)

636

# revnum -> (chain-length, sum-delta-length)

634

# revnum -> (chain-length, sum-delta-length)

637

self._chaininfocache = util.lrucachedict(500)

635

self._chaininfocache = util.lrucachedict(500)

638

# revlog header -> revlog compressor

636

# revlog header -> revlog compressor

639

self._decompressors = {}

637

self._decompressors = {}

640

638

641

@util.propertycache

639

@util.propertycache

642

def revlog_kind(self):

640

def revlog_kind(self):

643

return self.target[0]

641

return self.target[0]

644

642

645

@util.propertycache

643

@util.propertycache

646

def display_id(self):

644

def display_id(self):

647

"""The public facing "ID" of the revlog that we use in message"""

645

"""The public facing "ID" of the revlog that we use in message"""

648

# Maybe we should build a user facing representation of

646

# Maybe we should build a user facing representation of

649

# revlog.target instead of using `self.radix`

647

# revlog.target instead of using `self.radix`

650

return self.radix

648

return self.radix

651

649

652

def _get_decompressor(self, t):

650

def _get_decompressor(self, t):

653

try:

651

try:

654

compressor = self._decompressors[t]

652

compressor = self._decompressors[t]

655

except KeyError:

653

except KeyError:

656

try:

654

try:

657

engine = util.compengines.forrevlogheader(t)

655

engine = util.compengines.forrevlogheader(t)

658

compressor = engine.revlogcompressor(self._compengineopts)

656

compressor = engine.revlogcompressor(self._compengineopts)

659

self._decompressors[t] = compressor

657

self._decompressors[t] = compressor

660

except KeyError:

658

except KeyError:

661

raise error.RevlogError(

659

raise error.RevlogError(

662

_(b'unknown compression type %s') % binascii.hexlify(t)

660

_(b'unknown compression type %s') % binascii.hexlify(t)

663

)

661

)

664

return compressor

662

return compressor

665

663

666

@util.propertycache

664

@util.propertycache

667

def _compressor(self):

665

def _compressor(self):

668

engine = util.compengines[self._compengine]

666

engine = util.compengines[self._compengine]

669

return engine.revlogcompressor(self._compengineopts)

667

return engine.revlogcompressor(self._compengineopts)

670

668

671

@util.propertycache

669

@util.propertycache

672

def _decompressor(self):

670

def _decompressor(self):

673

"""the default decompressor"""

671

"""the default decompressor"""

674

if self._docket is None:

672

if self._docket is None:

675

return None

673

return None

676

t = self._docket.default_compression_header

674

t = self._docket.default_compression_header

677

c = self._get_decompressor(t)

675

c = self._get_decompressor(t)

678

return c.decompress

676

return c.decompress

679

677

680

def _indexfp(self):

678

def _indexfp(self):

681

"""file object for the revlog's index file"""

679

"""file object for the revlog's index file"""

682

return self.opener(self._indexfile, mode=b"r")

680

return self.opener(self._indexfile, mode=b"r")

683

681

684

def __index_write_fp(self):

682

def __index_write_fp(self):

685

# You should not use this directly and use `_writing` instead

683

# You should not use this directly and use `_writing` instead

686

try:

684

try:

687

f = self.opener(

685

f = self.opener(

688

self._indexfile, mode=b"r+", checkambig=self._checkambig

686

self._indexfile, mode=b"r+", checkambig=self._checkambig

689

)

687

)

690

if self._docket is None:

688

if self._docket is None:

691

f.seek(0, os.SEEK_END)

689

f.seek(0, os.SEEK_END)

692

else:

690

else:

693

f.seek(self._docket.index_end, os.SEEK_SET)

691

f.seek(self._docket.index_end, os.SEEK_SET)

694

return f

692

return f

695

except IOError as inst:

693

except IOError as inst:

696

if inst.errno != errno.ENOENT:

694

if inst.errno != errno.ENOENT:

697

raise

695

raise

698

return self.opener(

696

return self.opener(

699

self._indexfile, mode=b"w+", checkambig=self._checkambig

697

self._indexfile, mode=b"w+", checkambig=self._checkambig

700

)

698

)

701

699

702

def __index_new_fp(self):

700

def __index_new_fp(self):

703

# You should not use this unless you are upgrading from inline revlog

701

# You should not use this unless you are upgrading from inline revlog

704

return self.opener(

702

return self.opener(

705

self._indexfile,

703

self._indexfile,

706

mode=b"w",

704

mode=b"w",

707

checkambig=self._checkambig,

705

checkambig=self._checkambig,

708

atomictemp=True,

706

atomictemp=True,

709

)

707

)

710

708

711

def _datafp(self, mode=b'r'):

709

def _datafp(self, mode=b'r'):

712

"""file object for the revlog's data file"""

710

"""file object for the revlog's data file"""

713

return self.opener(self._datafile, mode=mode)

711

return self.opener(self._datafile, mode=mode)

714

712

715

@contextlib.contextmanager

713

@contextlib.contextmanager

716

def _sidedatareadfp(self):

714

def _sidedatareadfp(self):

717

"""file object suitable to read sidedata"""

715

"""file object suitable to read sidedata"""

718

if self._writinghandles:

716

if self._writinghandles:

719

yield self._writinghandles[2]

717

yield self._writinghandles[2]

720

else:

718

else:

721

with self.opener(self._sidedatafile) as fp:

719

with self.opener(self._sidedatafile) as fp:

722

yield fp

720

yield fp

723

721

724

def tiprev(self):

722

def tiprev(self):

725

return len(self.index) - 1

723

return len(self.index) - 1

726

724

727

def tip(self):

725

def tip(self):

728

return self.node(self.tiprev())

726

return self.node(self.tiprev())

729

727

730

def __contains__(self, rev):

728

def __contains__(self, rev):

731

return 0 <= rev < len(self)

729

return 0 <= rev < len(self)

732

730

733

def __len__(self):

731

def __len__(self):

734

return len(self.index)

732

return len(self.index)

735

733

736

def __iter__(self):

734

def __iter__(self):

737

return iter(pycompat.xrange(len(self)))

735

return iter(pycompat.xrange(len(self)))

738

736

739

def revs(self, start=0, stop=None):

737

def revs(self, start=0, stop=None):

740

"""iterate over all rev in this revlog (from start to stop)"""

738

"""iterate over all rev in this revlog (from start to stop)"""

741

return storageutil.iterrevs(len(self), start=start, stop=stop)

739

return storageutil.iterrevs(len(self), start=start, stop=stop)

742

740

743

def hasnode(self, node):

741

def hasnode(self, node):

744

try:

742

try:

745

self.rev(node)

743

self.rev(node)

746

return True

744

return True

747

except KeyError:

745

except KeyError:

748

return False

746

return False

749

747

750

def candelta(self, baserev, rev):

748

def candelta(self, baserev, rev):

751

"""whether two revisions (baserev, rev) can be delta-ed or not"""

749

"""whether two revisions (baserev, rev) can be delta-ed or not"""

752

# Disable delta if either rev requires a content-changing flag

750

# Disable delta if either rev requires a content-changing flag

753

# processor (ex. LFS). This is because such flag processor can alter

751

# processor (ex. LFS). This is because such flag processor can alter

754

# the rawtext content that the delta will be based on, and two clients

752

# the rawtext content that the delta will be based on, and two clients

755

# could have a same revlog node with different flags (i.e. different

753

# could have a same revlog node with different flags (i.e. different

756

# rawtext contents) and the delta could be incompatible.

754

# rawtext contents) and the delta could be incompatible.

757

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

755

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

758

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

756

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

759

):

757

):

760

return False

758

return False

761

return True

759

return True

762

760

763

def update_caches(self, transaction):

761

def update_caches(self, transaction):

764

if self._nodemap_file is not None:

762

if self._nodemap_file is not None:

765

if transaction is None:

763

if transaction is None:

766

nodemaputil.update_persistent_nodemap(self)

764

nodemaputil.update_persistent_nodemap(self)

767

else:

765

else:

768

nodemaputil.setup_persistent_nodemap(transaction, self)

766

nodemaputil.setup_persistent_nodemap(transaction, self)

769

767

770

def clearcaches(self):

768

def clearcaches(self):

771

self._revisioncache = None

769

self._revisioncache = None

772

self._chainbasecache.clear()

770

self._chainbasecache.clear()

773

self._segmentfile.clear_cache()

771

self._segmentfile.clear_cache()

774

self._segmentfile_sidedata.clear_cache()

772

self._segmentfile_sidedata.clear_cache()

775

self._pcache = {}

773

self._pcache = {}

776

self._nodemap_docket = None

774

self._nodemap_docket = None

777

self.index.clearcaches()

775

self.index.clearcaches()

778

# The python code is the one responsible for validating the docket, we

776

# The python code is the one responsible for validating the docket, we

779

# end up having to refresh it here.

777

# end up having to refresh it here.

780

use_nodemap = (

778

use_nodemap = (

781

not self._inline

779

not self._inline

782

and self._nodemap_file is not None

780

and self._nodemap_file is not None

783

and util.safehasattr(self.index, 'update_nodemap_data')

781

and util.safehasattr(self.index, 'update_nodemap_data')

784

)

782

)

785

if use_nodemap:

783

if use_nodemap:

786

nodemap_data = nodemaputil.persisted_data(self)

784

nodemap_data = nodemaputil.persisted_data(self)

787

if nodemap_data is not None:

785

if nodemap_data is not None:

788

self._nodemap_docket = nodemap_data[0]

786

self._nodemap_docket = nodemap_data[0]

789

self.index.update_nodemap_data(*nodemap_data)

787

self.index.update_nodemap_data(*nodemap_data)

790

788

791

def rev(self, node):

789

def rev(self, node):

792

try:

790

try:

793

return self.index.rev(node)

791

return self.index.rev(node)

794

except TypeError:

792

except TypeError:

795

raise

793

raise

796

except error.RevlogError:

794

except error.RevlogError:

797

# parsers.c radix tree lookup failed

795

# parsers.c radix tree lookup failed

798

if (

796

if (

799

node == self.nodeconstants.wdirid

797

node == self.nodeconstants.wdirid

800

or node in self.nodeconstants.wdirfilenodeids

798

or node in self.nodeconstants.wdirfilenodeids

801

):

799

):

802

raise error.WdirUnsupported

800

raise error.WdirUnsupported

803

raise error.LookupError(node, self.display_id, _(b'no node'))

801

raise error.LookupError(node, self.display_id, _(b'no node'))

804

802

805

# Accessors for index entries.

803

# Accessors for index entries.

806

804

807

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

805

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

808

# are flags.

806

# are flags.

809

def start(self, rev):

807

def start(self, rev):

810

return int(self.index[rev][0] >> 16)

808

return int(self.index[rev][0] >> 16)

811

809

812

def sidedata_cut_off(self, rev):

810

def sidedata_cut_off(self, rev):

813

sd_cut_off = self.index[rev][8]

811

sd_cut_off = self.index[rev][8]

814

if sd_cut_off != 0:

812

if sd_cut_off != 0:

815

return sd_cut_off

813

return sd_cut_off

816

# This is some annoying dance, because entries without sidedata

814

# This is some annoying dance, because entries without sidedata

817

# currently use 0 as their ofsset. (instead of previous-offset +

815

# currently use 0 as their ofsset. (instead of previous-offset +

818

# previous-size)

816

# previous-size)

819

#

817

#

820

# We should reconsider this sidedata → 0 sidata_offset policy.

818

# We should reconsider this sidedata → 0 sidata_offset policy.

821

# In the meantime, we need this.

819

# In the meantime, we need this.

822

while 0 <= rev:

820

while 0 <= rev:

823

e = self.index[rev]

821

e = self.index[rev]

824

if e[9] != 0:

822

if e[9] != 0:

825

return e[8] + e[9]

823

return e[8] + e[9]

826

rev -= 1

824

rev -= 1

827

return 0

825

return 0

828

826

829

def flags(self, rev):

827

def flags(self, rev):

830

return self.index[rev][0] & 0xFFFF

828

return self.index[rev][0] & 0xFFFF

831

829

832

def length(self, rev):

830

def length(self, rev):

833

return self.index[rev][1]

831

return self.index[rev][1]

834

832

835

def sidedata_length(self, rev):

833

def sidedata_length(self, rev):

836

if not self.hassidedata:

834

if not self.hassidedata:

837

return 0

835

return 0

838

return self.index[rev][9]

836

return self.index[rev][9]

839

837

840

def rawsize(self, rev):

838

def rawsize(self, rev):

841

"""return the length of the uncompressed text for a given revision"""

839

"""return the length of the uncompressed text for a given revision"""

842

l = self.index[rev][2]

840

l = self.index[rev][2]

843

if l >= 0:

841

if l >= 0:

844

return l

842

return l

845

843

846

t = self.rawdata(rev)

844

t = self.rawdata(rev)

847

return len(t)

845

return len(t)

848

846

849

def size(self, rev):

847

def size(self, rev):

850

"""length of non-raw text (processed by a "read" flag processor)"""

848

"""length of non-raw text (processed by a "read" flag processor)"""

851

# fast path: if no "read" flag processor could change the content,

849

# fast path: if no "read" flag processor could change the content,

852

# size is rawsize. note: ELLIPSIS is known to not change the content.

850

# size is rawsize. note: ELLIPSIS is known to not change the content.

853

flags = self.flags(rev)

851

flags = self.flags(rev)

854

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

852

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

855

return self.rawsize(rev)

853

return self.rawsize(rev)

856

854

857

return len(self.revision(rev))

855

return len(self.revision(rev))

858

856

859

def fast_rank(self, rev):

857

def fast_rank(self, rev):

860

"""Return the rank of a revision if already known, or None otherwise.

858

"""Return the rank of a revision if already known, or None otherwise.

861

859

862

The rank of a revision is the size of the sub-graph it defines as a

860

The rank of a revision is the size of the sub-graph it defines as a

863

head. Equivalently, the rank of a revision `r` is the size of the set

861

head. Equivalently, the rank of a revision `r` is the size of the set

864

`ancestors(r)`, `r` included.

862

`ancestors(r)`, `r` included.

865

863

866

This method returns the rank retrieved from the revlog in constant

864

This method returns the rank retrieved from the revlog in constant

867

time. It makes no attempt at computing unknown values for versions of

865

time. It makes no attempt at computing unknown values for versions of

868

the revlog which do not persist the rank.

866

the revlog which do not persist the rank.

869

"""

867

"""

870

rank = self.index[rev][ENTRY_RANK]

868

rank = self.index[rev][ENTRY_RANK]

871

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

869

if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:

872

return None

870

return None

873

if rev == nullrev:

871

if rev == nullrev:

874

return 0 # convention

872

return 0 # convention

875

return rank

873

return rank

876

874

877

def chainbase(self, rev):

875

def chainbase(self, rev):

878

base = self._chainbasecache.get(rev)

876

base = self._chainbasecache.get(rev)

879

if base is not None:

877

if base is not None:

880

return base

878

return base

881

879

882

index = self.index

880

index = self.index

883

iterrev = rev

881

iterrev = rev

884

base = index[iterrev][3]

882

base = index[iterrev][3]

885

while base != iterrev:

883

while base != iterrev:

886

iterrev = base

884

iterrev = base

887

base = index[iterrev][3]

885

base = index[iterrev][3]

888

886

889

self._chainbasecache[rev] = base

887

self._chainbasecache[rev] = base

890

return base

888

return base

891

889

892

def linkrev(self, rev):

890

def linkrev(self, rev):

893

return self.index[rev][4]

891

return self.index[rev][4]

894

892

895

def parentrevs(self, rev):

893

def parentrevs(self, rev):

896

try:

894

try:

897

entry = self.index[rev]

895

entry = self.index[rev]

898

except IndexError:

896

except IndexError:

899

if rev == wdirrev:

897

if rev == wdirrev:

900

raise error.WdirUnsupported

898

raise error.WdirUnsupported

901

raise

899

raise

902

900

903

return entry[5], entry[6]

901

return entry[5], entry[6]

904

902

905

# fast parentrevs(rev) where rev isn't filtered

903

# fast parentrevs(rev) where rev isn't filtered

906

_uncheckedparentrevs = parentrevs

904

_uncheckedparentrevs = parentrevs

907

905

908

def node(self, rev):

906

def node(self, rev):

909

try:

907

try:

910

return self.index[rev][7]

908

return self.index[rev][7]

911

except IndexError:

909

except IndexError:

912

if rev == wdirrev:

910

if rev == wdirrev:

913

raise error.WdirUnsupported

911

raise error.WdirUnsupported

914

raise

912

raise

915

913

916

# Derived from index values.

914

# Derived from index values.

917

915

918

def end(self, rev):

916

def end(self, rev):

919

return self.start(rev) + self.length(rev)

917

return self.start(rev) + self.length(rev)

920

918

921

def parents(self, node):

919

def parents(self, node):

922

i = self.index

920

i = self.index

923

d = i[self.rev(node)]

921

d = i[self.rev(node)]

924

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

922

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

925

923

926

def chainlen(self, rev):

924

def chainlen(self, rev):

927

return self._chaininfo(rev)[0]

925

return self._chaininfo(rev)[0]

928

926

929

def _chaininfo(self, rev):

927

def _chaininfo(self, rev):

930

chaininfocache = self._chaininfocache

928

chaininfocache = self._chaininfocache

931

if rev in chaininfocache:

929

if rev in chaininfocache:

932

return chaininfocache[rev]

930

return chaininfocache[rev]

933

index = self.index

931

index = self.index

934

generaldelta = self._generaldelta

932

generaldelta = self._generaldelta

935

iterrev = rev

933

iterrev = rev

936

e = index[iterrev]

934

e = index[iterrev]

937

clen = 0

935

clen = 0

938

compresseddeltalen = 0

936

compresseddeltalen = 0

939

while iterrev != e[3]:

937

while iterrev != e[3]:

940

clen += 1

938

clen += 1

941

compresseddeltalen += e[1]

939

compresseddeltalen += e[1]

942

if generaldelta:

940

if generaldelta:

943

iterrev = e[3]

941

iterrev = e[3]

944

else:

942

else:

945

iterrev -= 1

943

iterrev -= 1

946

if iterrev in chaininfocache:

944

if iterrev in chaininfocache:

947

t = chaininfocache[iterrev]

945

t = chaininfocache[iterrev]

948

clen += t[0]

946

clen += t[0]

949

compresseddeltalen += t[1]

947

compresseddeltalen += t[1]

950

break

948

break

951

e = index[iterrev]

949

e = index[iterrev]

952

else:

950

else:

953

# Add text length of base since decompressing that also takes

951

# Add text length of base since decompressing that also takes

954

# work. For cache hits the length is already included.

952

# work. For cache hits the length is already included.

955

compresseddeltalen += e[1]

953

compresseddeltalen += e[1]

956

r = (clen, compresseddeltalen)

954

r = (clen, compresseddeltalen)

957

chaininfocache[rev] = r

955

chaininfocache[rev] = r

958

return r

956

return r

959

957

960

def _deltachain(self, rev, stoprev=None):

958

def _deltachain(self, rev, stoprev=None):

961

"""Obtain the delta chain for a revision.

959

"""Obtain the delta chain for a revision.

962

960

963

``stoprev`` specifies a revision to stop at. If not specified, we

961

``stoprev`` specifies a revision to stop at. If not specified, we

964

stop at the base of the chain.

962

stop at the base of the chain.

965

963

966

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

964

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

967

revs in ascending order and ``stopped`` is a bool indicating whether

965

revs in ascending order and ``stopped`` is a bool indicating whether

968

``stoprev`` was hit.

966

``stoprev`` was hit.

969

"""

967

"""

970

# Try C implementation.

968

# Try C implementation.

971

try:

969

try:

972

return self.index.deltachain(rev, stoprev, self._generaldelta)

970

return self.index.deltachain(rev, stoprev, self._generaldelta)

973

except AttributeError:

971

except AttributeError:

974

pass

972

pass

975

973

976

chain = []

974

chain = []

977

975

978

# Alias to prevent attribute lookup in tight loop.

976

# Alias to prevent attribute lookup in tight loop.

979

index = self.index

977

index = self.index

980

generaldelta = self._generaldelta

978

generaldelta = self._generaldelta

981

979

982

iterrev = rev

980

iterrev = rev

983

e = index[iterrev]

981

e = index[iterrev]

984

while iterrev != e[3] and iterrev != stoprev:

982

while iterrev != e[3] and iterrev != stoprev:

985

chain.append(iterrev)

983

chain.append(iterrev)

986

if generaldelta:

984

if generaldelta:

987

iterrev = e[3]

985

iterrev = e[3]

988

else:

986

else:

989

iterrev -= 1

987

iterrev -= 1

990

e = index[iterrev]

988

e = index[iterrev]

991

989

992

if iterrev == stoprev:

990

if iterrev == stoprev:

993

stopped = True

991

stopped = True

994

else:

992

else:

995

chain.append(iterrev)

993

chain.append(iterrev)

996

stopped = False

994

stopped = False

997

995

998

chain.reverse()

996

chain.reverse()

999

return chain, stopped

997

return chain, stopped

1000

998

1001

def ancestors(self, revs, stoprev=0, inclusive=False):

999

def ancestors(self, revs, stoprev=0, inclusive=False):

1002

"""Generate the ancestors of 'revs' in reverse revision order.

1000

"""Generate the ancestors of 'revs' in reverse revision order.

1003

Does not generate revs lower than stoprev.

1001

Does not generate revs lower than stoprev.

1004

1002

1005

See the documentation for ancestor.lazyancestors for more details."""

1003

See the documentation for ancestor.lazyancestors for more details."""

1006

1004

1007

# first, make sure start revisions aren't filtered

1005

# first, make sure start revisions aren't filtered

1008

revs = list(revs)

1006

revs = list(revs)

1009

checkrev = self.node

1007

checkrev = self.node

1010

for r in revs:

1008

for r in revs:

1011

checkrev(r)

1009

checkrev(r)

1012

# and we're sure ancestors aren't filtered as well

1010

# and we're sure ancestors aren't filtered as well

1013

1011

1014

if rustancestor is not None and self.index.rust_ext_compat:

1012

if rustancestor is not None and self.index.rust_ext_compat:

1015

lazyancestors = rustancestor.LazyAncestors

1013

lazyancestors = rustancestor.LazyAncestors

1016

arg = self.index

1014

arg = self.index

1017

else:

1015

else:

1018

lazyancestors = ancestor.lazyancestors

1016

lazyancestors = ancestor.lazyancestors

1019

arg = self._uncheckedparentrevs

1017

arg = self._uncheckedparentrevs

1020

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1018

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1021

1019

1022

def descendants(self, revs):

1020

def descendants(self, revs):

1023

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1021

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1024

1022

1025

def findcommonmissing(self, common=None, heads=None):

1023

def findcommonmissing(self, common=None, heads=None):

1026

"""Return a tuple of the ancestors of common and the ancestors of heads

1024

"""Return a tuple of the ancestors of common and the ancestors of heads

1027

that are not ancestors of common. In revset terminology, we return the

1025

that are not ancestors of common. In revset terminology, we return the

1028

tuple:

1026

tuple:

1029

1027

1030

::common, (::heads) - (::common)

1028

::common, (::heads) - (::common)

1031

1029

1032

The list is sorted by revision number, meaning it is

1030

The list is sorted by revision number, meaning it is

1033

topologically sorted.

1031

topologically sorted.

1034

1032

1035

'heads' and 'common' are both lists of node IDs. If heads is

1033

'heads' and 'common' are both lists of node IDs. If heads is

1036

not supplied, uses all of the revlog's heads. If common is not

1034

not supplied, uses all of the revlog's heads. If common is not

1037

supplied, uses nullid."""

1035

supplied, uses nullid."""

1038

if common is None:

1036

if common is None:

1039

common = [self.nullid]

1037

common = [self.nullid]

1040

if heads is None:

1038

if heads is None:

1041

heads = self.heads()

1039

heads = self.heads()

1042

1040

1043

common = [self.rev(n) for n in common]

1041

common = [self.rev(n) for n in common]

1044

heads = [self.rev(n) for n in heads]

1042

heads = [self.rev(n) for n in heads]

1045

1043

1046

# we want the ancestors, but inclusive

1044

# we want the ancestors, but inclusive

1047

class lazyset(object):

1045

class lazyset(object):

1048

def __init__(self, lazyvalues):

1046

def __init__(self, lazyvalues):

1049

self.addedvalues = set()

1047

self.addedvalues = set()

1050

self.lazyvalues = lazyvalues

1048

self.lazyvalues = lazyvalues

1051

1049

1052

def __contains__(self, value):

1050

def __contains__(self, value):

1053

return value in self.addedvalues or value in self.lazyvalues

1051

return value in self.addedvalues or value in self.lazyvalues

1054

1052

1055

def __iter__(self):

1053

def __iter__(self):

1056

added = self.addedvalues

1054

added = self.addedvalues

1057

for r in added:

1055

for r in added:

1058

yield r

1056

yield r

1059

for r in self.lazyvalues:

1057

for r in self.lazyvalues:

1060

if not r in added:

1058

if not r in added:

1061

yield r

1059

yield r

1062

1060

1063

def add(self, value):

1061

def add(self, value):

1064

self.addedvalues.add(value)

1062

self.addedvalues.add(value)

1065

1063

1066

def update(self, values):

1064

def update(self, values):

1067

self.addedvalues.update(values)

1065

self.addedvalues.update(values)

1068

1066

1069

has = lazyset(self.ancestors(common))

1067

has = lazyset(self.ancestors(common))

1070

has.add(nullrev)

1068

has.add(nullrev)

1071

has.update(common)

1069

has.update(common)

1072

1070

1073

# take all ancestors from heads that aren't in has

1071

# take all ancestors from heads that aren't in has

1074

missing = set()

1072

missing = set()

1075

visit = collections.deque(r for r in heads if r not in has)

1073

visit = collections.deque(r for r in heads if r not in has)

1076

while visit:

1074

while visit:

1077

r = visit.popleft()

1075

r = visit.popleft()

1078

if r in missing:

1076

if r in missing:

1079

continue

1077

continue

1080

else:

1078

else:

1081

missing.add(r)

1079

missing.add(r)

1082

for p in self.parentrevs(r):

1080

for p in self.parentrevs(r):

1083

if p not in has:

1081

if p not in has:

1084

visit.append(p)

1082

visit.append(p)

1085

missing = list(missing)

1083

missing = list(missing)

1086

missing.sort()

1084

missing.sort()

1087

return has, [self.node(miss) for miss in missing]

1085

return has, [self.node(miss) for miss in missing]

1088

1086

1089

def incrementalmissingrevs(self, common=None):

1087

def incrementalmissingrevs(self, common=None):

1090

"""Return an object that can be used to incrementally compute the

1088

"""Return an object that can be used to incrementally compute the

1091

revision numbers of the ancestors of arbitrary sets that are not

1089

revision numbers of the ancestors of arbitrary sets that are not

1092

ancestors of common. This is an ancestor.incrementalmissingancestors

1090

ancestors of common. This is an ancestor.incrementalmissingancestors

1093

object.

1091

object.

1094

1092

1095

'common' is a list of revision numbers. If common is not supplied, uses

1093

'common' is a list of revision numbers. If common is not supplied, uses

1096

nullrev.

1094

nullrev.

1097

"""

1095

"""

1098

if common is None:

1096

if common is None:

1099

common = [nullrev]

1097

common = [nullrev]

1100

1098

1101

if rustancestor is not None and self.index.rust_ext_compat:

1099

if rustancestor is not None and self.index.rust_ext_compat:

1102

return rustancestor.MissingAncestors(self.index, common)

1100

return rustancestor.MissingAncestors(self.index, common)

1103

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1101

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1104

1102

1105

def findmissingrevs(self, common=None, heads=None):

1103

def findmissingrevs(self, common=None, heads=None):

1106

"""Return the revision numbers of the ancestors of heads that

1104

"""Return the revision numbers of the ancestors of heads that

1107

are not ancestors of common.

1105

are not ancestors of common.

1108

1106

1109

More specifically, return a list of revision numbers corresponding to

1107

More specifically, return a list of revision numbers corresponding to

1110

nodes N such that every N satisfies the following constraints:

1108

nodes N such that every N satisfies the following constraints:

1111

1109

1112

1. N is an ancestor of some node in 'heads'

1110

1. N is an ancestor of some node in 'heads'

1113

2. N is not an ancestor of any node in 'common'

1111

2. N is not an ancestor of any node in 'common'

1114

1112

1115

The list is sorted by revision number, meaning it is

1113

The list is sorted by revision number, meaning it is

1116

topologically sorted.

1114

topologically sorted.

1117

1115

1118

'heads' and 'common' are both lists of revision numbers. If heads is

1116

'heads' and 'common' are both lists of revision numbers. If heads is

1119

not supplied, uses all of the revlog's heads. If common is not

1117

not supplied, uses all of the revlog's heads. If common is not

1120

supplied, uses nullid."""

1118

supplied, uses nullid."""

1121

if common is None:

1119

if common is None:

1122

common = [nullrev]

1120

common = [nullrev]

1123

if heads is None:

1121

if heads is None:

1124

heads = self.headrevs()

1122

heads = self.headrevs()

1125

1123

1126

inc = self.incrementalmissingrevs(common=common)

1124

inc = self.incrementalmissingrevs(common=common)

1127

return inc.missingancestors(heads)

1125

return inc.missingancestors(heads)

1128

1126

1129

def findmissing(self, common=None, heads=None):

1127

def findmissing(self, common=None, heads=None):

1130

"""Return the ancestors of heads that are not ancestors of common.

1128

"""Return the ancestors of heads that are not ancestors of common.

1131

1129

1132

More specifically, return a list of nodes N such that every N

1130

More specifically, return a list of nodes N such that every N

1133

satisfies the following constraints:

1131

satisfies the following constraints:

1134

1132

1135

1. N is an ancestor of some node in 'heads'

1133

1. N is an ancestor of some node in 'heads'

1136

2. N is not an ancestor of any node in 'common'

1134

2. N is not an ancestor of any node in 'common'

1137

1135

1138

The list is sorted by revision number, meaning it is

1136

The list is sorted by revision number, meaning it is

1139

topologically sorted.

1137

topologically sorted.

1140

1138

1141

'heads' and 'common' are both lists of node IDs. If heads is

1139

'heads' and 'common' are both lists of node IDs. If heads is

1142

not supplied, uses all of the revlog's heads. If common is not

1140

not supplied, uses all of the revlog's heads. If common is not

1143

supplied, uses nullid."""

1141

supplied, uses nullid."""

1144

if common is None:

1142

if common is None:

1145

common = [self.nullid]

1143

common = [self.nullid]

1146

if heads is None:

1144

if heads is None:

1147

heads = self.heads()

1145

heads = self.heads()

1148

1146

1149

common = [self.rev(n) for n in common]

1147

common = [self.rev(n) for n in common]

1150

heads = [self.rev(n) for n in heads]

1148

heads = [self.rev(n) for n in heads]

1151

1149

1152

inc = self.incrementalmissingrevs(common=common)

1150

inc = self.incrementalmissingrevs(common=common)

1153

return [self.node(r) for r in inc.missingancestors(heads)]

1151

return [self.node(r) for r in inc.missingancestors(heads)]

1154

1152

1155

def nodesbetween(self, roots=None, heads=None):

1153

def nodesbetween(self, roots=None, heads=None):

1156

"""Return a topological path from 'roots' to 'heads'.

1154

"""Return a topological path from 'roots' to 'heads'.

1157

1155

1158

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1156

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1159

topologically sorted list of all nodes N that satisfy both of

1157

topologically sorted list of all nodes N that satisfy both of

1160

these constraints:

1158

these constraints:

1161

1159

1162

1. N is a descendant of some node in 'roots'

1160

1. N is a descendant of some node in 'roots'

1163

2. N is an ancestor of some node in 'heads'

1161

2. N is an ancestor of some node in 'heads'

1164

1162

1165

Every node is considered to be both a descendant and an ancestor

1163

Every node is considered to be both a descendant and an ancestor

1166

of itself, so every reachable node in 'roots' and 'heads' will be

1164

of itself, so every reachable node in 'roots' and 'heads' will be

1167

included in 'nodes'.

1165

included in 'nodes'.

1168

1166

1169

'outroots' is the list of reachable nodes in 'roots', i.e., the

1167

'outroots' is the list of reachable nodes in 'roots', i.e., the

1170

subset of 'roots' that is returned in 'nodes'. Likewise,

1168

subset of 'roots' that is returned in 'nodes'. Likewise,

1171

'outheads' is the subset of 'heads' that is also in 'nodes'.

1169

'outheads' is the subset of 'heads' that is also in 'nodes'.

1172

1170

1173

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1171

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1174

unspecified, uses nullid as the only root. If 'heads' is

1172

unspecified, uses nullid as the only root. If 'heads' is

1175

unspecified, uses list of all of the revlog's heads."""

1173

unspecified, uses list of all of the revlog's heads."""

1176

nonodes = ([], [], [])

1174

nonodes = ([], [], [])

1177

if roots is not None:

1175

if roots is not None:

1178

roots = list(roots)

1176

roots = list(roots)

1179

if not roots:

1177

if not roots:

1180

return nonodes

1178

return nonodes

1181

lowestrev = min([self.rev(n) for n in roots])

1179

lowestrev = min([self.rev(n) for n in roots])

1182

else:

1180

else:

1183

roots = [self.nullid] # Everybody's a descendant of nullid

1181

roots = [self.nullid] # Everybody's a descendant of nullid

1184

lowestrev = nullrev

1182

lowestrev = nullrev

1185

if (lowestrev == nullrev) and (heads is None):

1183

if (lowestrev == nullrev) and (heads is None):

1186

# We want _all_ the nodes!

1184

# We want _all_ the nodes!

1187

return (

1185

return (

1188

[self.node(r) for r in self],

1186

[self.node(r) for r in self],

1189

[self.nullid],

1187

[self.nullid],

1190

list(self.heads()),

1188

list(self.heads()),

1191

)

1189

)

1192

if heads is None:

1190

if heads is None:

1193

# All nodes are ancestors, so the latest ancestor is the last

1191

# All nodes are ancestors, so the latest ancestor is the last

1194

# node.

1192

# node.

1195

highestrev = len(self) - 1

1193

highestrev = len(self) - 1

1196

# Set ancestors to None to signal that every node is an ancestor.

1194

# Set ancestors to None to signal that every node is an ancestor.

1197

ancestors = None

1195

ancestors = None

1198

# Set heads to an empty dictionary for later discovery of heads

1196

# Set heads to an empty dictionary for later discovery of heads

1199

heads = {}

1197

heads = {}

1200

else:

1198

else:

1201

heads = list(heads)

1199

heads = list(heads)

1202

if not heads:

1200

if not heads:

1203

return nonodes

1201

return nonodes

1204

ancestors = set()

1202

ancestors = set()

1205

# Turn heads into a dictionary so we can remove 'fake' heads.

1203

# Turn heads into a dictionary so we can remove 'fake' heads.

1206

# Also, later we will be using it to filter out the heads we can't

1204

# Also, later we will be using it to filter out the heads we can't

1207

# find from roots.

1205

# find from roots.

1208

heads = dict.fromkeys(heads, False)

1206

heads = dict.fromkeys(heads, False)

1209

# Start at the top and keep marking parents until we're done.

1207

# Start at the top and keep marking parents until we're done.

1210

nodestotag = set(heads)

1208

nodestotag = set(heads)

1211

# Remember where the top was so we can use it as a limit later.

1209

# Remember where the top was so we can use it as a limit later.

1212

highestrev = max([self.rev(n) for n in nodestotag])

1210

highestrev = max([self.rev(n) for n in nodestotag])

1213

while nodestotag:

1211

while nodestotag:

1214

# grab a node to tag

1212

# grab a node to tag

1215

n = nodestotag.pop()

1213

n = nodestotag.pop()

1216

# Never tag nullid

1214

# Never tag nullid

1217

if n == self.nullid:

1215

if n == self.nullid:

1218

continue

1216

continue

1219

# A node's revision number represents its place in a

1217

# A node's revision number represents its place in a

1220

# topologically sorted list of nodes.

1218

# topologically sorted list of nodes.

1221

r = self.rev(n)

1219

r = self.rev(n)

1222

if r >= lowestrev:

1220

if r >= lowestrev:

1223

if n not in ancestors:

1221

if n not in ancestors:

1224

# If we are possibly a descendant of one of the roots

1222

# If we are possibly a descendant of one of the roots

1225

# and we haven't already been marked as an ancestor

1223

# and we haven't already been marked as an ancestor

1226

ancestors.add(n) # Mark as ancestor

1224

ancestors.add(n) # Mark as ancestor

1227

# Add non-nullid parents to list of nodes to tag.

1225

# Add non-nullid parents to list of nodes to tag.

1228

nodestotag.update(

1226

nodestotag.update(

1229

[p for p in self.parents(n) if p != self.nullid]

1227

[p for p in self.parents(n) if p != self.nullid]

1230

)

1228

)

1231

elif n in heads: # We've seen it before, is it a fake head?

1229

elif n in heads: # We've seen it before, is it a fake head?

1232

# So it is, real heads should not be the ancestors of

1230

# So it is, real heads should not be the ancestors of

1233

# any other heads.

1231

# any other heads.

1234

heads.pop(n)

1232

heads.pop(n)

1235

if not ancestors:

1233

if not ancestors:

1236

return nonodes

1234

return nonodes

1237

# Now that we have our set of ancestors, we want to remove any

1235

# Now that we have our set of ancestors, we want to remove any

1238

# roots that are not ancestors.

1236

# roots that are not ancestors.

1239

1237

1240

# If one of the roots was nullid, everything is included anyway.

1238

# If one of the roots was nullid, everything is included anyway.

1241

if lowestrev > nullrev:

1239

if lowestrev > nullrev:

1242

# But, since we weren't, let's recompute the lowest rev to not

1240

# But, since we weren't, let's recompute the lowest rev to not

1243

# include roots that aren't ancestors.

1241

# include roots that aren't ancestors.

1244

1242

1245

# Filter out roots that aren't ancestors of heads

1243

# Filter out roots that aren't ancestors of heads

1246

roots = [root for root in roots if root in ancestors]

1244

roots = [root for root in roots if root in ancestors]

1247

# Recompute the lowest revision

1245

# Recompute the lowest revision

1248

if roots:

1246

if roots:

1249

lowestrev = min([self.rev(root) for root in roots])

1247

lowestrev = min([self.rev(root) for root in roots])

1250

else:

1248

else:

1251

# No more roots? Return empty list

1249

# No more roots? Return empty list

1252

return nonodes

1250

return nonodes

1253

else:

1251

else:

1254

# We are descending from nullid, and don't need to care about

1252

# We are descending from nullid, and don't need to care about

1255

# any other roots.

1253

# any other roots.

1256

lowestrev = nullrev

1254

lowestrev = nullrev

1257

roots = [self.nullid]

1255

roots = [self.nullid]

1258

# Transform our roots list into a set.

1256

# Transform our roots list into a set.

1259

descendants = set(roots)

1257

descendants = set(roots)

1260

# Also, keep the original roots so we can filter out roots that aren't

1258

# Also, keep the original roots so we can filter out roots that aren't

1261

# 'real' roots (i.e. are descended from other roots).

1259

# 'real' roots (i.e. are descended from other roots).

1262

roots = descendants.copy()

1260

roots = descendants.copy()

1263

# Our topologically sorted list of output nodes.

1261

# Our topologically sorted list of output nodes.

1264

orderedout = []

1262

orderedout = []

1265

# Don't start at nullid since we don't want nullid in our output list,

1263

# Don't start at nullid since we don't want nullid in our output list,

1266

# and if nullid shows up in descendants, empty parents will look like

1264

# and if nullid shows up in descendants, empty parents will look like

1267

# they're descendants.

1265

# they're descendants.

1268

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1266

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1269

n = self.node(r)

1267

n = self.node(r)

1270

isdescendant = False

1268

isdescendant = False

1271

if lowestrev == nullrev: # Everybody is a descendant of nullid

1269

if lowestrev == nullrev: # Everybody is a descendant of nullid

1272

isdescendant = True

1270

isdescendant = True

1273

elif n in descendants:

1271

elif n in descendants:

1274

# n is already a descendant

1272

# n is already a descendant

1275

isdescendant = True

1273

isdescendant = True

1276

# This check only needs to be done here because all the roots

1274

# This check only needs to be done here because all the roots

1277

# will start being marked is descendants before the loop.

1275

# will start being marked is descendants before the loop.

1278

if n in roots:

1276

if n in roots:

1279

# If n was a root, check if it's a 'real' root.

1277

# If n was a root, check if it's a 'real' root.

1280

p = tuple(self.parents(n))

1278

p = tuple(self.parents(n))

1281

# If any of its parents are descendants, it's not a root.

1279

# If any of its parents are descendants, it's not a root.

1282

if (p[0] in descendants) or (p[1] in descendants):

1280

if (p[0] in descendants) or (p[1] in descendants):

1283

roots.remove(n)

1281

roots.remove(n)

1284

else:

1282

else:

1285

p = tuple(self.parents(n))

1283

p = tuple(self.parents(n))

1286

# A node is a descendant if either of its parents are

1284

# A node is a descendant if either of its parents are

1287

# descendants. (We seeded the dependents list with the roots

1285

# descendants. (We seeded the dependents list with the roots

1288

# up there, remember?)

1286

# up there, remember?)

1289

if (p[0] in descendants) or (p[1] in descendants):

1287

if (p[0] in descendants) or (p[1] in descendants):

1290

descendants.add(n)

1288

descendants.add(n)

1291

isdescendant = True

1289

isdescendant = True

1292

if isdescendant and ((ancestors is None) or (n in ancestors)):

1290

if isdescendant and ((ancestors is None) or (n in ancestors)):

1293

# Only include nodes that are both descendants and ancestors.

1291

# Only include nodes that are both descendants and ancestors.

1294

orderedout.append(n)

1292

orderedout.append(n)

1295

if (ancestors is not None) and (n in heads):

1293

if (ancestors is not None) and (n in heads):

1296

# We're trying to figure out which heads are reachable

1294

# We're trying to figure out which heads are reachable

1297

# from roots.

1295

# from roots.

1298

# Mark this head as having been reached

1296

# Mark this head as having been reached

1299

heads[n] = True

1297

heads[n] = True

1300

elif ancestors is None:

1298

elif ancestors is None:

1301

# Otherwise, we're trying to discover the heads.

1299

# Otherwise, we're trying to discover the heads.

1302

# Assume this is a head because if it isn't, the next step

1300

# Assume this is a head because if it isn't, the next step

1303

# will eventually remove it.

1301

# will eventually remove it.

1304

heads[n] = True

1302

heads[n] = True

1305

# But, obviously its parents aren't.

1303

# But, obviously its parents aren't.

1306

for p in self.parents(n):

1304

for p in self.parents(n):

1307

heads.pop(p, None)

1305

heads.pop(p, None)

1308

heads = [head for head, flag in heads.items() if flag]

1306

heads = [head for head, flag in heads.items() if flag]

1309

roots = list(roots)

1307

roots = list(roots)

1310

assert orderedout

1308

assert orderedout

1311

assert roots

1309

assert roots

1312

assert heads

1310

assert heads

1313

return (orderedout, roots, heads)

1311

return (orderedout, roots, heads)

1314

1312

1315

def headrevs(self, revs=None):

1313

def headrevs(self, revs=None):

1316

if revs is None:

1314

if revs is None:

1317

try:

1315

try:

1318

return self.index.headrevs()

1316

return self.index.headrevs()

1319

except AttributeError:

1317

except AttributeError:

1320

return self._headrevs()

1318

return self._headrevs()

1321

if rustdagop is not None and self.index.rust_ext_compat:

1319

if rustdagop is not None and self.index.rust_ext_compat:

1322

return rustdagop.headrevs(self.index, revs)

1320

return rustdagop.headrevs(self.index, revs)

1323

return dagop.headrevs(revs, self._uncheckedparentrevs)

1321

return dagop.headrevs(revs, self._uncheckedparentrevs)

1324

1322

1325

def computephases(self, roots):

1323

def computephases(self, roots):

1326

return self.index.computephasesmapsets(roots)

1324

return self.index.computephasesmapsets(roots)

1327

1325

1328

def _headrevs(self):

1326

def _headrevs(self):

1329

count = len(self)

1327

count = len(self)

1330

if not count:

1328

if not count:

1331

return [nullrev]

1329

return [nullrev]

1332

# we won't iter over filtered rev so nobody is a head at start

1330

# we won't iter over filtered rev so nobody is a head at start

1333

ishead = [0] * (count + 1)

1331

ishead = [0] * (count + 1)

1334

index = self.index

1332

index = self.index

1335

for r in self:

1333

for r in self:

1336

ishead[r] = 1 # I may be an head

1334

ishead[r] = 1 # I may be an head

1337

e = index[r]

1335

e = index[r]

1338

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1336

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1339

return [r for r, val in enumerate(ishead) if val]

1337

return [r for r, val in enumerate(ishead) if val]

1340

1338

1341

def heads(self, start=None, stop=None):

1339

def heads(self, start=None, stop=None):

1342

"""return the list of all nodes that have no children

1340

"""return the list of all nodes that have no children

1343

1341

1344

if start is specified, only heads that are descendants of

1342

if start is specified, only heads that are descendants of

1345

start will be returned

1343

start will be returned

1346

if stop is specified, it will consider all the revs from stop

1344

if stop is specified, it will consider all the revs from stop

1347

as if they had no children

1345

as if they had no children

1348

"""

1346

"""

1349

if start is None and stop is None:

1347

if start is None and stop is None:

1350

if not len(self):

1348

if not len(self):

1351

return [self.nullid]

1349

return [self.nullid]

1352

return [self.node(r) for r in self.headrevs()]

1350

return [self.node(r) for r in self.headrevs()]

1353

1351

1354

if start is None:

1352

if start is None:

1355

start = nullrev

1353

start = nullrev

1356

else:

1354

else:

1357

start = self.rev(start)

1355

start = self.rev(start)

1358

1356

1359

stoprevs = {self.rev(n) for n in stop or []}

1357

stoprevs = {self.rev(n) for n in stop or []}

1360

1358

1361

revs = dagop.headrevssubset(

1359

revs = dagop.headrevssubset(

1362

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1360

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1363

)

1361

)

1364

1362

1365

return [self.node(rev) for rev in revs]

1363

return [self.node(rev) for rev in revs]

1366

1364

1367

def children(self, node):

1365

def children(self, node):

1368

"""find the children of a given node"""

1366

"""find the children of a given node"""

1369

c = []

1367

c = []

1370

p = self.rev(node)

1368

p = self.rev(node)

1371

for r in self.revs(start=p + 1):

1369

for r in self.revs(start=p + 1):

1372

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1370

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1373

if prevs:

1371

if prevs:

1374

for pr in prevs:

1372

for pr in prevs:

1375

if pr == p:

1373

if pr == p:

1376

c.append(self.node(r))

1374

c.append(self.node(r))

1377

elif p == nullrev:

1375

elif p == nullrev:

1378

c.append(self.node(r))

1376

c.append(self.node(r))

1379

return c

1377

return c

1380

1378

1381

def commonancestorsheads(self, a, b):

1379

def commonancestorsheads(self, a, b):

1382

"""calculate all the heads of the common ancestors of nodes a and b"""

1380

"""calculate all the heads of the common ancestors of nodes a and b"""

1383

a, b = self.rev(a), self.rev(b)

1381

a, b = self.rev(a), self.rev(b)

1384

ancs = self._commonancestorsheads(a, b)

1382

ancs = self._commonancestorsheads(a, b)

1385

return pycompat.maplist(self.node, ancs)

1383

return pycompat.maplist(self.node, ancs)

1386

1384

1387

def _commonancestorsheads(self, *revs):

1385

def _commonancestorsheads(self, *revs):

1388

"""calculate all the heads of the common ancestors of revs"""

1386

"""calculate all the heads of the common ancestors of revs"""

1389

try:

1387

try:

1390

ancs = self.index.commonancestorsheads(*revs)

1388

ancs = self.index.commonancestorsheads(*revs)

1391

except (AttributeError, OverflowError): # C implementation failed

1389

except (AttributeError, OverflowError): # C implementation failed

1392

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1390

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1393

return ancs

1391

return ancs

1394

1392

1395

def isancestor(self, a, b):

1393

def isancestor(self, a, b):

1396

"""return True if node a is an ancestor of node b

1394

"""return True if node a is an ancestor of node b

1397

1395

1398

A revision is considered an ancestor of itself."""

1396

A revision is considered an ancestor of itself."""

1399

a, b = self.rev(a), self.rev(b)

1397

a, b = self.rev(a), self.rev(b)

1400

return self.isancestorrev(a, b)

1398

return self.isancestorrev(a, b)

1401

1399

1402

def isancestorrev(self, a, b):

1400

def isancestorrev(self, a, b):

1403

"""return True if revision a is an ancestor of revision b

1401

"""return True if revision a is an ancestor of revision b

1404

1402

1405

A revision is considered an ancestor of itself.

1403

A revision is considered an ancestor of itself.

1406

1404

1407

The implementation of this is trivial but the use of

1405

The implementation of this is trivial but the use of

1408

reachableroots is not."""

1406

reachableroots is not."""

1409

if a == nullrev:

1407

if a == nullrev:

1410

return True

1408

return True

1411

elif a == b:

1409

elif a == b:

1412

return True

1410

return True

1413

elif a > b:

1411

elif a > b:

1414

return False

1412

return False

1415

return bool(self.reachableroots(a, [b], [a], includepath=False))

1413

return bool(self.reachableroots(a, [b], [a], includepath=False))

1416

1414

1417

def reachableroots(self, minroot, heads, roots, includepath=False):

1415

def reachableroots(self, minroot, heads, roots, includepath=False):

1418

"""return (heads(::(<roots> and <roots>::<heads>)))

1416

"""return (heads(::(<roots> and <roots>::<heads>)))

1419

1417

1420

If includepath is True, return (<roots>::<heads>)."""

1418

If includepath is True, return (<roots>::<heads>)."""

1421

try:

1419

try:

1422

return self.index.reachableroots2(

1420

return self.index.reachableroots2(

1423

minroot, heads, roots, includepath

1421

minroot, heads, roots, includepath

1424

)

1422

)

1425

except AttributeError:

1423

except AttributeError:

1426

return dagop._reachablerootspure(

1424

return dagop._reachablerootspure(

1427

self.parentrevs, minroot, roots, heads, includepath

1425

self.parentrevs, minroot, roots, heads, includepath

1428

)

1426

)

1429

1427

1430

def ancestor(self, a, b):

1428

def ancestor(self, a, b):

1431

"""calculate the "best" common ancestor of nodes a and b"""

1429

"""calculate the "best" common ancestor of nodes a and b"""

1432

1430

1433

a, b = self.rev(a), self.rev(b)

1431

a, b = self.rev(a), self.rev(b)

1434

try:

1432

try:

1435

ancs = self.index.ancestors(a, b)

1433

ancs = self.index.ancestors(a, b)

1436

except (AttributeError, OverflowError):

1434

except (AttributeError, OverflowError):

1437

ancs = ancestor.ancestors(self.parentrevs, a, b)

1435

ancs = ancestor.ancestors(self.parentrevs, a, b)

1438

if ancs:

1436

if ancs:

1439

# choose a consistent winner when there's a tie

1437

# choose a consistent winner when there's a tie

1440

return min(map(self.node, ancs))

1438

return min(map(self.node, ancs))

1441

return self.nullid

1439

return self.nullid

1442

1440

1443

def _match(self, id):

1441

def _match(self, id):

1444

if isinstance(id, int):

1442

if isinstance(id, int):

1445

# rev

1443

# rev

1446

return self.node(id)

1444

return self.node(id)

1447

if len(id) == self.nodeconstants.nodelen:

1445

if len(id) == self.nodeconstants.nodelen:

1448

# possibly a binary node

1446

# possibly a binary node

1449

# odds of a binary node being all hex in ASCII are 1 in 10**25

1447

# odds of a binary node being all hex in ASCII are 1 in 10**25

1450

try:

1448

try:

1451

node = id

1449

node = id

1452

self.rev(node) # quick search the index

1450

self.rev(node) # quick search the index

1453

return node

1451

return node

1454

except error.LookupError:

1452

except error.LookupError:

1455

pass # may be partial hex id

1453

pass # may be partial hex id

1456

try:

1454

try:

1457

# str(rev)

1455

# str(rev)

1458

rev = int(id)

1456

rev = int(id)

1459

if b"%d" % rev != id:

1457

if b"%d" % rev != id:

1460

raise ValueError

1458

raise ValueError

1461

if rev < 0:

1459

if rev < 0:

1462

rev = len(self) + rev

1460

rev = len(self) + rev

1463

if rev < 0 or rev >= len(self):

1461

if rev < 0 or rev >= len(self):

1464

raise ValueError

1462

raise ValueError

1465

return self.node(rev)

1463

return self.node(rev)

1466

except (ValueError, OverflowError):

1464

except (ValueError, OverflowError):

1467

pass

1465

pass

1468

if len(id) == 2 * self.nodeconstants.nodelen:

1466

if len(id) == 2 * self.nodeconstants.nodelen:

1469

try:

1467

try:

1470

# a full hex nodeid?

1468

# a full hex nodeid?

1471

node = bin(id)

1469

node = bin(id)

1472

self.rev(node)

1470

self.rev(node)

1473

return node

1471

return node

1474

except (TypeError, error.LookupError):

1472

except (TypeError, error.LookupError):

1475

pass

1473

pass

1476

1474

1477

def _partialmatch(self, id):

1475

def _partialmatch(self, id):

1478

# we don't care wdirfilenodeids as they should be always full hash

1476

# we don't care wdirfilenodeids as they should be always full hash

1479

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1477

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1480

ambiguous = False

1478

ambiguous = False

1481

try:

1479

try:

1482

partial = self.index.partialmatch(id)

1480

partial = self.index.partialmatch(id)

1483

if partial and self.hasnode(partial):

1481

if partial and self.hasnode(partial):

1484

if maybewdir:

1482

if maybewdir:

1485

# single 'ff...' match in radix tree, ambiguous with wdir

1483

# single 'ff...' match in radix tree, ambiguous with wdir

1486

ambiguous = True

1484

ambiguous = True

1487

else:

1485

else:

1488

return partial

1486

return partial

1489

elif maybewdir:

1487

elif maybewdir:

1490

# no 'ff...' match in radix tree, wdir identified

1488

# no 'ff...' match in radix tree, wdir identified

1491

raise error.WdirUnsupported

1489

raise error.WdirUnsupported

1492

else:

1490

else:

1493

return None

1491

return None

1494

except error.RevlogError:

1492

except error.RevlogError:

1495

# parsers.c radix tree lookup gave multiple matches

1493

# parsers.c radix tree lookup gave multiple matches

1496

# fast path: for unfiltered changelog, radix tree is accurate

1494

# fast path: for unfiltered changelog, radix tree is accurate

1497

if not getattr(self, 'filteredrevs', None):

1495

if not getattr(self, 'filteredrevs', None):

1498

ambiguous = True

1496

ambiguous = True

1499

# fall through to slow path that filters hidden revisions

1497

# fall through to slow path that filters hidden revisions

1500

except (AttributeError, ValueError):

1498

except (AttributeError, ValueError):

1501

# we are pure python, or key was too short to search radix tree

1499

# we are pure python, or key was too short to search radix tree

1502

pass

1500

pass

1503

if ambiguous:

1501

if ambiguous:

1504

raise error.AmbiguousPrefixLookupError(

1502

raise error.AmbiguousPrefixLookupError(

1505

id, self.display_id, _(b'ambiguous identifier')

1503

id, self.display_id, _(b'ambiguous identifier')

1506

)

1504

)

1507

1505

1508

if id in self._pcache:

1506

if id in self._pcache:

1509

return self._pcache[id]

1507

return self._pcache[id]

1510

1508

1511

if len(id) <= 40:

1509

if len(id) <= 40:

1512

try:

1510

try:

1513

# hex(node)[:...]

1511

# hex(node)[:...]

1514

l = len(id) // 2 # grab an even number of digits

1512

l = len(id) // 2 # grab an even number of digits

1515

prefix = bin(id[: l * 2])

1513

prefix = bin(id[: l * 2])

1516

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1514

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1517

nl = [

1515

nl = [

1518

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1516

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1519

]

1517

]

1520

if self.nodeconstants.nullhex.startswith(id):

1518

if self.nodeconstants.nullhex.startswith(id):

1521

nl.append(self.nullid)

1519

nl.append(self.nullid)

1522

if len(nl) > 0:

1520

if len(nl) > 0:

1523

if len(nl) == 1 and not maybewdir:

1521

if len(nl) == 1 and not maybewdir:

1524

self._pcache[id] = nl[0]

1522

self._pcache[id] = nl[0]

1525

return nl[0]

1523

return nl[0]

1526

raise error.AmbiguousPrefixLookupError(

1524

raise error.AmbiguousPrefixLookupError(

1527

id, self.display_id, _(b'ambiguous identifier')

1525

id, self.display_id, _(b'ambiguous identifier')

1528

)

1526

)

1529

if maybewdir:

1527

if maybewdir:

1530

raise error.WdirUnsupported

1528

raise error.WdirUnsupported

1531

return None

1529

return None

1532

except TypeError:

1530

except TypeError:

1533

pass

1531

pass

1534

1532

1535

def lookup(self, id):

1533

def lookup(self, id):

1536

"""locate a node based on:

1534

"""locate a node based on:

1537

- revision number or str(revision number)

1535

- revision number or str(revision number)

1538

- nodeid or subset of hex nodeid

1536

- nodeid or subset of hex nodeid

1539

"""

1537

"""

1540

n = self._match(id)

1538

n = self._match(id)

1541

if n is not None:

1539

if n is not None:

1542

return n

1540

return n

1543

n = self._partialmatch(id)

1541

n = self._partialmatch(id)

1544

if n:

1542

if n:

1545

return n

1543

return n

1546

1544

1547

raise error.LookupError(id, self.display_id, _(b'no match found'))

1545

raise error.LookupError(id, self.display_id, _(b'no match found'))

1548

1546

1549

def shortest(self, node, minlength=1):

1547

def shortest(self, node, minlength=1):

1550

"""Find the shortest unambiguous prefix that matches node."""

1548

"""Find the shortest unambiguous prefix that matches node."""

1551

1549

1552

def isvalid(prefix):

1550

def isvalid(prefix):

1553

try:

1551

try:

1554

matchednode = self._partialmatch(prefix)

1552

matchednode = self._partialmatch(prefix)

1555

except error.AmbiguousPrefixLookupError:

1553

except error.AmbiguousPrefixLookupError:

1556

return False

1554

return False

1557

except error.WdirUnsupported:

1555

except error.WdirUnsupported:

1558

# single 'ff...' match

1556

# single 'ff...' match

1559

return True

1557

return True

1560

if matchednode is None:

1558

if matchednode is None:

1561

raise error.LookupError(node, self.display_id, _(b'no node'))

1559

raise error.LookupError(node, self.display_id, _(b'no node'))

1562

return True

1560

return True

1563

1561

1564

def maybewdir(prefix):

1562

def maybewdir(prefix):

1565

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1563

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1566

1564

1567

hexnode = hex(node)

1565

hexnode = hex(node)

1568

1566

1569

def disambiguate(hexnode, minlength):

1567

def disambiguate(hexnode, minlength):

1570

"""Disambiguate against wdirid."""

1568

"""Disambiguate against wdirid."""

1571

for length in range(minlength, len(hexnode) + 1):

1569

for length in range(minlength, len(hexnode) + 1):

1572

prefix = hexnode[:length]

1570

prefix = hexnode[:length]

1573

if not maybewdir(prefix):

1571

if not maybewdir(prefix):

1574

return prefix

1572

return prefix

1575

1573

1576

if not getattr(self, 'filteredrevs', None):

1574

if not getattr(self, 'filteredrevs', None):

1577

try:

1575

try:

1578

length = max(self.index.shortest(node), minlength)

1576

length = max(self.index.shortest(node), minlength)

1579

return disambiguate(hexnode, length)

1577

return disambiguate(hexnode, length)

1580

except error.RevlogError:

1578

except error.RevlogError:

1581

if node != self.nodeconstants.wdirid:

1579

if node != self.nodeconstants.wdirid:

1582

raise error.LookupError(

1580

raise error.LookupError(

1583

node, self.display_id, _(b'no node')

1581

node, self.display_id, _(b'no node')

1584

)

1582

)

1585

except AttributeError:

1583

except AttributeError:

1586

# Fall through to pure code

1584

# Fall through to pure code

1587

pass

1585

pass

1588

1586

1589

if node == self.nodeconstants.wdirid:

1587

if node == self.nodeconstants.wdirid:

1590

for length in range(minlength, len(hexnode) + 1):

1588

for length in range(minlength, len(hexnode) + 1):

1591

prefix = hexnode[:length]

1589

prefix = hexnode[:length]

1592

if isvalid(prefix):

1590

if isvalid(prefix):

1593

return prefix

1591

return prefix

1594

1592

1595

for length in range(minlength, len(hexnode) + 1):

1593

for length in range(minlength, len(hexnode) + 1):

1596

prefix = hexnode[:length]

1594

prefix = hexnode[:length]

1597

if isvalid(prefix):

1595

if isvalid(prefix):

1598

return disambiguate(hexnode, length)

1596

return disambiguate(hexnode, length)

1599

1597

1600

def cmp(self, node, text):

1598

def cmp(self, node, text):

1601

"""compare text with a given file revision

1599

"""compare text with a given file revision

1602

1600

1603

returns True if text is different than what is stored.

1601

returns True if text is different than what is stored.

1604

"""

1602

"""

1605

p1, p2 = self.parents(node)

1603

p1, p2 = self.parents(node)

1606

return storageutil.hashrevisionsha1(text, p1, p2) != node

1604

return storageutil.hashrevisionsha1(text, p1, p2) != node

1607

1605

1608

def _getsegmentforrevs(self, startrev, endrev, df=None):

1606

def _getsegmentforrevs(self, startrev, endrev, df=None):

1609

"""Obtain a segment of raw data corresponding to a range of revisions.

1607

"""Obtain a segment of raw data corresponding to a range of revisions.

1610

1608

1611

Accepts the start and end revisions and an optional already-open

1609

Accepts the start and end revisions and an optional already-open

1612

file handle to be used for reading. If the file handle is read, its

1610

file handle to be used for reading. If the file handle is read, its

1613

seek position will not be preserved.

1611

seek position will not be preserved.

1614

1612

1615

Requests for data may be satisfied by a cache.

1613

Requests for data may be satisfied by a cache.

1616

1614

1617

Returns a 2-tuple of (offset, data) for the requested range of

1615

Returns a 2-tuple of (offset, data) for the requested range of

1618

revisions. Offset is the integer offset from the beginning of the

1616

revisions. Offset is the integer offset from the beginning of the

1619

revlog and data is a str or buffer of the raw byte data.

1617

revlog and data is a str or buffer of the raw byte data.

1620

1618

1621

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1619

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1622

to determine where each revision's data begins and ends.

1620

to determine where each revision's data begins and ends.

1623

"""

1621

"""

1624

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1622

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1625

# (functions are expensive).

1623

# (functions are expensive).

1626

index = self.index

1624

index = self.index

1627

istart = index[startrev]

1625

istart = index[startrev]

1628

start = int(istart[0] >> 16)

1626

start = int(istart[0] >> 16)

1629

if startrev == endrev:

1627

if startrev == endrev:

1630

end = start + istart[1]

1628

end = start + istart[1]

1631

else:

1629

else:

1632

iend = index[endrev]

1630

iend = index[endrev]

1633

end = int(iend[0] >> 16) + iend[1]

1631

end = int(iend[0] >> 16) + iend[1]

1634

1632

1635

if self._inline:

1633

if self._inline:

1636

start += (startrev + 1) * self.index.entry_size

1634

start += (startrev + 1) * self.index.entry_size

1637

end += (endrev + 1) * self.index.entry_size

1635

end += (endrev + 1) * self.index.entry_size

1638

length = end - start

1636

length = end - start

1639

1637

1640

return start, self._segmentfile.read_chunk(start, length, df)

1638

return start, self._segmentfile.read_chunk(start, length, df)

1641

1639

1642

def _chunk(self, rev, df=None):

1640

def _chunk(self, rev, df=None):

1643

"""Obtain a single decompressed chunk for a revision.

1641

"""Obtain a single decompressed chunk for a revision.

1644

1642

1645

Accepts an integer revision and an optional already-open file handle

1643

Accepts an integer revision and an optional already-open file handle

1646

to be used for reading. If used, the seek position of the file will not

1644

to be used for reading. If used, the seek position of the file will not

1647

be preserved.

1645

be preserved.

1648

1646

1649

Returns a str holding uncompressed data for the requested revision.

1647

Returns a str holding uncompressed data for the requested revision.

1650

"""

1648

"""

1651

compression_mode = self.index[rev][10]

1649

compression_mode = self.index[rev][10]

1652

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1650

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1653

if compression_mode == COMP_MODE_PLAIN:

1651

if compression_mode == COMP_MODE_PLAIN:

1654

return data

1652

return data

1655

elif compression_mode == COMP_MODE_DEFAULT:

1653

elif compression_mode == COMP_MODE_DEFAULT:

1656

return self._decompressor(data)

1654

return self._decompressor(data)

1657

elif compression_mode == COMP_MODE_INLINE:

1655

elif compression_mode == COMP_MODE_INLINE:

1658

return self.decompress(data)

1656

return self.decompress(data)

1659

else:

1657

else:

1660

msg = b'unknown compression mode %d'

1658

msg = b'unknown compression mode %d'

1661

msg %= compression_mode

1659

msg %= compression_mode

1662

raise error.RevlogError(msg)

1660

raise error.RevlogError(msg)

1663

1661

1664

def _chunks(self, revs, df=None, targetsize=None):

1662

def _chunks(self, revs, df=None, targetsize=None):

1665

"""Obtain decompressed chunks for the specified revisions.

1663

"""Obtain decompressed chunks for the specified revisions.

1666

1664

1667

Accepts an iterable of numeric revisions that are assumed to be in

1665

Accepts an iterable of numeric revisions that are assumed to be in

1668

ascending order. Also accepts an optional already-open file handle

1666

ascending order. Also accepts an optional already-open file handle

1669

to be used for reading. If used, the seek position of the file will

1667

to be used for reading. If used, the seek position of the file will

1670

not be preserved.

1668

not be preserved.

1671

1669

1672

This function is similar to calling ``self._chunk()`` multiple times,

1670

This function is similar to calling ``self._chunk()`` multiple times,

1673

but is faster.

1671

but is faster.

1674

1672

1675

Returns a list with decompressed data for each requested revision.

1673

Returns a list with decompressed data for each requested revision.

1676

"""

1674

"""

1677

if not revs:

1675

if not revs:

1678

return []

1676

return []

1679

start = self.start

1677

start = self.start

1680

length = self.length

1678

length = self.length

1681

inline = self._inline

1679

inline = self._inline

1682

iosize = self.index.entry_size

1680

iosize = self.index.entry_size

1683

buffer = util.buffer

1681

buffer = util.buffer

1684

1682

1685

l = []

1683

l = []

1686

ladd = l.append

1684

ladd = l.append

1687

1685

1688

if not self._withsparseread:

1686

if not self._withsparseread:

1689

slicedchunks = (revs,)

1687

slicedchunks = (revs,)

1690

else:

1688

else:

1691

slicedchunks = deltautil.slicechunk(

1689

slicedchunks = deltautil.slicechunk(

1692

self, revs, targetsize=targetsize

1690

self, revs, targetsize=targetsize

1693

)

1691

)

1694

1692

1695

for revschunk in slicedchunks:

1693

for revschunk in slicedchunks:

1696

firstrev = revschunk[0]

1694

firstrev = revschunk[0]

1697

# Skip trailing revisions with empty diff

1695

# Skip trailing revisions with empty diff

1698

for lastrev in revschunk[::-1]:

1696

for lastrev in revschunk[::-1]:

1699

if length(lastrev) != 0:

1697

if length(lastrev) != 0:

1700

break

1698

break

1701

1699

1702

try:

1700

try:

1703

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1701

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1704

except OverflowError:

1702

except OverflowError:

1705

# issue4215 - we can't cache a run of chunks greater than

1703

# issue4215 - we can't cache a run of chunks greater than

1706

# 2G on Windows

1704

# 2G on Windows

1707

return [self._chunk(rev, df=df) for rev in revschunk]

1705

return [self._chunk(rev, df=df) for rev in revschunk]

1708

1706

1709

decomp = self.decompress

1707

decomp = self.decompress

1710

# self._decompressor might be None, but will not be used in that case

1708

# self._decompressor might be None, but will not be used in that case

1711

def_decomp = self._decompressor

1709

def_decomp = self._decompressor

1712

for rev in revschunk:

1710

for rev in revschunk:

1713

chunkstart = start(rev)

1711

chunkstart = start(rev)

1714

if inline:

1712

if inline:

1715

chunkstart += (rev + 1) * iosize

1713

chunkstart += (rev + 1) * iosize

1716

chunklength = length(rev)

1714

chunklength = length(rev)

1717

comp_mode = self.index[rev][10]

1715

comp_mode = self.index[rev][10]

1718

c = buffer(data, chunkstart - offset, chunklength)

1716

c = buffer(data, chunkstart - offset, chunklength)

1719

if comp_mode == COMP_MODE_PLAIN:

1717

if comp_mode == COMP_MODE_PLAIN:

1720

ladd(c)

1718

ladd(c)

1721

elif comp_mode == COMP_MODE_INLINE:

1719

elif comp_mode == COMP_MODE_INLINE:

1722

ladd(decomp(c))

1720

ladd(decomp(c))

1723

elif comp_mode == COMP_MODE_DEFAULT:

1721

elif comp_mode == COMP_MODE_DEFAULT:

1724

ladd(def_decomp(c))

1722

ladd(def_decomp(c))

1725

else:

1723

else:

1726

msg = b'unknown compression mode %d'

1724

msg = b'unknown compression mode %d'

1727

msg %= comp_mode

1725

msg %= comp_mode

1728

raise error.RevlogError(msg)

1726

raise error.RevlogError(msg)

1729

1727

1730

return l

1728

return l

1731

1729

1732

def deltaparent(self, rev):

1730

def deltaparent(self, rev):

1733

"""return deltaparent of the given revision"""

1731

"""return deltaparent of the given revision"""

1734

base = self.index[rev][3]

1732

base = self.index[rev][3]

1735

if base == rev:

1733

if base == rev:

1736

return nullrev

1734

return nullrev

1737

elif self._generaldelta:

1735

elif self._generaldelta:

1738

return base

1736

return base

1739

else:

1737

else:

1740

return rev - 1

1738

return rev - 1

1741

1739

1742

def issnapshot(self, rev):

1740

def issnapshot(self, rev):

1743

"""tells whether rev is a snapshot"""

1741

"""tells whether rev is a snapshot"""

1744

if not self._sparserevlog:

1742

if not self._sparserevlog:

1745

return self.deltaparent(rev) == nullrev

1743

return self.deltaparent(rev) == nullrev

1746

elif util.safehasattr(self.index, b'issnapshot'):

1744

elif util.safehasattr(self.index, b'issnapshot'):

1747

# directly assign the method to cache the testing and access

1745

# directly assign the method to cache the testing and access

1748

self.issnapshot = self.index.issnapshot

1746

self.issnapshot = self.index.issnapshot

1749

return self.issnapshot(rev)

1747

return self.issnapshot(rev)

1750

if rev == nullrev:

1748

if rev == nullrev:

1751

return True

1749

return True

1752

entry = self.index[rev]

1750

entry = self.index[rev]

1753

base = entry[3]

1751

base = entry[3]

1754

if base == rev:

1752

if base == rev:

1755

return True

1753

return True

1756

if base == nullrev:

1754

if base == nullrev:

1757

return True

1755

return True

1758

p1 = entry[5]

1756

p1 = entry[5]

1759

p2 = entry[6]

1757

p2 = entry[6]

1760

if base == p1 or base == p2:

1758

if base == p1 or base == p2:

1761

return False

1759

return False

1762

return self.issnapshot(base)

1760

return self.issnapshot(base)

1763

1761

1764

def snapshotdepth(self, rev):

1762

def snapshotdepth(self, rev):

1765

"""number of snapshot in the chain before this one"""

1763

"""number of snapshot in the chain before this one"""

1766

if not self.issnapshot(rev):

1764

if not self.issnapshot(rev):

1767

raise error.ProgrammingError(b'revision %d not a snapshot')

1765

raise error.ProgrammingError(b'revision %d not a snapshot')

1768

return len(self._deltachain(rev)[0]) - 1

1766

return len(self._deltachain(rev)[0]) - 1

1769

1767

1770

def revdiff(self, rev1, rev2):

1768

def revdiff(self, rev1, rev2):

1771

"""return or calculate a delta between two revisions

1769

"""return or calculate a delta between two revisions

1772

1770

1773

The delta calculated is in binary form and is intended to be written to

1771

The delta calculated is in binary form and is intended to be written to

1774

revlog data directly. So this function needs raw revision data.

1772

revlog data directly. So this function needs raw revision data.

1775

"""

1773

"""

1776

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1774

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1777

return bytes(self._chunk(rev2))

1775

return bytes(self._chunk(rev2))

1778

1776

1779

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1777

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1780

1778

1781

def revision(self, nodeorrev, _df=None):

1779

def revision(self, nodeorrev, _df=None):

1782

"""return an uncompressed revision of a given node or revision

1780

"""return an uncompressed revision of a given node or revision

1783

number.

1781

number.

1784

1782

1785

_df - an existing file handle to read from. (internal-only)

1783

_df - an existing file handle to read from. (internal-only)

1786

"""

1784

"""

1787

return self._revisiondata(nodeorrev, _df)

1785

return self._revisiondata(nodeorrev, _df)

1788

1786

1789

def sidedata(self, nodeorrev, _df=None):

1787

def sidedata(self, nodeorrev, _df=None):

1790

"""a map of extra data related to the changeset but not part of the hash

1788

"""a map of extra data related to the changeset but not part of the hash

1791

1789

1792

This function currently return a dictionary. However, more advanced

1790

This function currently return a dictionary. However, more advanced

1793

mapping object will likely be used in the future for a more

1791

mapping object will likely be used in the future for a more

1794

efficient/lazy code.

1792

efficient/lazy code.

1795

"""

1793

"""

1796

# deal with <nodeorrev> argument type

1794

# deal with <nodeorrev> argument type

1797

if isinstance(nodeorrev, int):

1795

if isinstance(nodeorrev, int):

1798

rev = nodeorrev

1796

rev = nodeorrev

1799

else:

1797

else:

1800

rev = self.rev(nodeorrev)

1798

rev = self.rev(nodeorrev)

1801

return self._sidedata(rev)

1799

return self._sidedata(rev)

1802

1800

1803

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1801

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1804

# deal with <nodeorrev> argument type

1802

# deal with <nodeorrev> argument type

1805

if isinstance(nodeorrev, int):

1803

if isinstance(nodeorrev, int):

1806

rev = nodeorrev

1804

rev = nodeorrev

1807

node = self.node(rev)

1805

node = self.node(rev)

1808

else:

1806

else:

1809

node = nodeorrev

1807

node = nodeorrev

1810

rev = None

1808

rev = None

1811

1809

1812

# fast path the special `nullid` rev

1810

# fast path the special `nullid` rev

1813

if node == self.nullid:

1811

if node == self.nullid:

1814

return b""

1812

return b""

1815

1813

1816

# ``rawtext`` is the text as stored inside the revlog. Might be the

1814

# ``rawtext`` is the text as stored inside the revlog. Might be the

1817

# revision or might need to be processed to retrieve the revision.

1815

# revision or might need to be processed to retrieve the revision.

1818

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1816

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1819

1817

1820

if raw and validated:

1818

if raw and validated:

1821

# if we don't want to process the raw text and that raw

1819

# if we don't want to process the raw text and that raw

1822

# text is cached, we can exit early.

1820

# text is cached, we can exit early.

1823

return rawtext

1821

return rawtext

1824

if rev is None:

1822

if rev is None:

1825

rev = self.rev(node)

1823

rev = self.rev(node)

1826

# the revlog's flag for this revision

1824

# the revlog's flag for this revision

1827

# (usually alter its state or content)

1825

# (usually alter its state or content)

1828

flags = self.flags(rev)

1826

flags = self.flags(rev)

1829

1827

1830

if validated and flags == REVIDX_DEFAULT_FLAGS:

1828

if validated and flags == REVIDX_DEFAULT_FLAGS:

1831

# no extra flags set, no flag processor runs, text = rawtext

1829

# no extra flags set, no flag processor runs, text = rawtext

1832

return rawtext

1830

return rawtext

1833

1831

1834

if raw:

1832

if raw:

1835

validatehash = flagutil.processflagsraw(self, rawtext, flags)

1833

validatehash = flagutil.processflagsraw(self, rawtext, flags)

1836

text = rawtext

1834

text = rawtext

1837

else:

1835

else:

1838

r = flagutil.processflagsread(self, rawtext, flags)

1836

r = flagutil.processflagsread(self, rawtext, flags)

1839

text, validatehash = r

1837

text, validatehash = r

1840

if validatehash:

1838

if validatehash:

1841

self.checkhash(text, node, rev=rev)

1839

self.checkhash(text, node, rev=rev)

1842

if not validated:

1840

if not validated:

1843

self._revisioncache = (node, rev, rawtext)

1841

self._revisioncache = (node, rev, rawtext)

1844

1842

1845

return text

1843

return text

1846

1844

1847

def _rawtext(self, node, rev, _df=None):

1845

def _rawtext(self, node, rev, _df=None):

1848

"""return the possibly unvalidated rawtext for a revision

1846

"""return the possibly unvalidated rawtext for a revision

1849

1847

1850

returns (rev, rawtext, validated)

1848

returns (rev, rawtext, validated)

1851

"""

1849

"""

1852

1850

1853

# revision in the cache (could be useful to apply delta)

1851

# revision in the cache (could be useful to apply delta)

1854

cachedrev = None

1852

cachedrev = None

1855

# An intermediate text to apply deltas to

1853

# An intermediate text to apply deltas to

1856

basetext = None

1854

basetext = None

1857

1855

1858

# Check if we have the entry in cache

1856

# Check if we have the entry in cache

1859

# The cache entry looks like (node, rev, rawtext)

1857

# The cache entry looks like (node, rev, rawtext)

1860

if self._revisioncache:

1858

if self._revisioncache:

1861

if self._revisioncache[0] == node:

1859

if self._revisioncache[0] == node:

1862

return (rev, self._revisioncache[2], True)

1860

return (rev, self._revisioncache[2], True)

1863

cachedrev = self._revisioncache[1]

1861

cachedrev = self._revisioncache[1]

1864

1862

1865

if rev is None:

1863

if rev is None:

1866

rev = self.rev(node)

1864

rev = self.rev(node)

1867

1865

1868

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1866

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1869

if stopped:

1867

if stopped:

1870

basetext = self._revisioncache[2]

1868

basetext = self._revisioncache[2]

1871

1869

1872

# drop cache to save memory, the caller is expected to

1870

# drop cache to save memory, the caller is expected to

1873

# update self._revisioncache after validating the text

1871

# update self._revisioncache after validating the text

1874

self._revisioncache = None

1872

self._revisioncache = None

1875

1873

1876

targetsize = None

1874

targetsize = None

1877

rawsize = self.index[rev][2]

1875

rawsize = self.index[rev][2]

1878

if 0 <= rawsize:

1876

if 0 <= rawsize:

1879

targetsize = 4 * rawsize

1877

targetsize = 4 * rawsize

1880

1878

1881

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1879

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1882

if basetext is None:

1880

if basetext is None:

1883

basetext = bytes(bins[0])

1881

basetext = bytes(bins[0])

1884

bins = bins[1:]

1882

bins = bins[1:]

1885

1883

1886

rawtext = mdiff.patches(basetext, bins)

1884

rawtext = mdiff.patches(basetext, bins)

1887

del basetext # let us have a chance to free memory early

1885

del basetext # let us have a chance to free memory early

1888

return (rev, rawtext, False)

1886

return (rev, rawtext, False)

1889

1887

1890

def _sidedata(self, rev):

1888

def _sidedata(self, rev):

1891

"""Return the sidedata for a given revision number."""

1889

"""Return the sidedata for a given revision number."""

1892

index_entry = self.index[rev]

1890

index_entry = self.index[rev]

1893

sidedata_offset = index_entry[8]

1891

sidedata_offset = index_entry[8]

1894

sidedata_size = index_entry[9]

1892

sidedata_size = index_entry[9]

1895

1893

1896

if self._inline:

1894

if self._inline:

1897

sidedata_offset += self.index.entry_size * (1 + rev)

1895

sidedata_offset += self.index.entry_size * (1 + rev)

1898

if sidedata_size == 0:

1896

if sidedata_size == 0:

1899

return {}

1897

return {}

1900

1898

1901

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

1899

if self._docket.sidedata_end < sidedata_offset + sidedata_size:

1902

filename = self._sidedatafile

1900

filename = self._sidedatafile

1903

end = self._docket.sidedata_end

1901

end = self._docket.sidedata_end

1904

offset = sidedata_offset

1902

offset = sidedata_offset

1905

length = sidedata_size

1903

length = sidedata_size

1906

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

1904

m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)

1907

raise error.RevlogError(m)

1905

raise error.RevlogError(m)

1908

1906

1909

comp_segment = self._segmentfile_sidedata.read_chunk(

1907

comp_segment = self._segmentfile_sidedata.read_chunk(

1910

sidedata_offset, sidedata_size

1908

sidedata_offset, sidedata_size

1911

)

1909

)

1912

1910

1913

comp = self.index[rev][11]

1911

comp = self.index[rev][11]

1914

if comp == COMP_MODE_PLAIN:

1912

if comp == COMP_MODE_PLAIN:

1915

segment = comp_segment

1913

segment = comp_segment

1916

elif comp == COMP_MODE_DEFAULT:

1914

elif comp == COMP_MODE_DEFAULT:

1917

segment = self._decompressor(comp_segment)

1915

segment = self._decompressor(comp_segment)

1918

elif comp == COMP_MODE_INLINE:

1916

elif comp == COMP_MODE_INLINE:

1919

segment = self.decompress(comp_segment)

1917

segment = self.decompress(comp_segment)

1920

else:

1918

else:

1921

msg = b'unknown compression mode %d'

1919

msg = b'unknown compression mode %d'

1922

msg %= comp

1920

msg %= comp

1923

raise error.RevlogError(msg)

1921

raise error.RevlogError(msg)

1924

1922

1925

sidedata = sidedatautil.deserialize_sidedata(segment)

1923

sidedata = sidedatautil.deserialize_sidedata(segment)

1926

return sidedata

1924

return sidedata

1927

1925

1928

def rawdata(self, nodeorrev, _df=None):

1926

def rawdata(self, nodeorrev, _df=None):

1929

"""return an uncompressed raw data of a given node or revision number.

1927

"""return an uncompressed raw data of a given node or revision number.

1930

1928

1931

_df - an existing file handle to read from. (internal-only)

1929

_df - an existing file handle to read from. (internal-only)

1932

"""

1930

"""

1933

return self._revisiondata(nodeorrev, _df, raw=True)

1931

return self._revisiondata(nodeorrev, _df, raw=True)

1934

1932

1935

def hash(self, text, p1, p2):

1933

def hash(self, text, p1, p2):

1936

"""Compute a node hash.

1934

"""Compute a node hash.

1937

1935

1938

Available as a function so that subclasses can replace the hash

1936

Available as a function so that subclasses can replace the hash

1939

as needed.

1937

as needed.

1940

"""

1938

"""

1941

return storageutil.hashrevisionsha1(text, p1, p2)

1939

return storageutil.hashrevisionsha1(text, p1, p2)

1942

1940

1943

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1941

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1944

"""Check node hash integrity.

1942

"""Check node hash integrity.

1945

1943

1946

Available as a function so that subclasses can extend hash mismatch

1944

Available as a function so that subclasses can extend hash mismatch

1947

behaviors as needed.

1945

behaviors as needed.

1948

"""

1946

"""

1949

try:

1947

try:

1950

if p1 is None and p2 is None:

1948

if p1 is None and p2 is None:

1951

p1, p2 = self.parents(node)

1949

p1, p2 = self.parents(node)

1952

if node != self.hash(text, p1, p2):

1950

if node != self.hash(text, p1, p2):

1953

# Clear the revision cache on hash failure. The revision cache

1951

# Clear the revision cache on hash failure. The revision cache

1954

# only stores the raw revision and clearing the cache does have

1952

# only stores the raw revision and clearing the cache does have

1955

# the side-effect that we won't have a cache hit when the raw

1953

# the side-effect that we won't have a cache hit when the raw

1956

# revision data is accessed. But this case should be rare and

1954

# revision data is accessed. But this case should be rare and

1957

# it is extra work to teach the cache about the hash

1955

# it is extra work to teach the cache about the hash

1958

# verification state.

1956

# verification state.

1959

if self._revisioncache and self._revisioncache[0] == node:

1957

if self._revisioncache and self._revisioncache[0] == node:

1960

self._revisioncache = None

1958

self._revisioncache = None

1961

1959

1962

revornode = rev

1960

revornode = rev

1963

if revornode is None:

1961

if revornode is None:

1964

revornode = templatefilters.short(hex(node))

1962

revornode = templatefilters.short(hex(node))

1965

raise error.RevlogError(

1963

raise error.RevlogError(

1966

_(b"integrity check failed on %s:%s")

1964

_(b"integrity check failed on %s:%s")

1967

% (self.display_id, pycompat.bytestr(revornode))

1965

% (self.display_id, pycompat.bytestr(revornode))

1968

)

1966

)

1969

except error.RevlogError:

1967

except error.RevlogError:

1970

if self._censorable and storageutil.iscensoredtext(text):

1968

if self._censorable and storageutil.iscensoredtext(text):

1971

raise error.CensoredNodeError(self.display_id, node, text)

1969

raise error.CensoredNodeError(self.display_id, node, text)

1972

raise

1970

raise

1973

1971

1974

def _enforceinlinesize(self, tr):

1972

def _enforceinlinesize(self, tr):

1975

"""Check if the revlog is too big for inline and convert if so.

1973

"""Check if the revlog is too big for inline and convert if so.

1976

1974

1977

This should be called after revisions are added to the revlog. If the

1975

This should be called after revisions are added to the revlog. If the

1978

revlog has grown too large to be an inline revlog, it will convert it

1976

revlog has grown too large to be an inline revlog, it will convert it

1979

to use multiple index and data files.

1977

to use multiple index and data files.

1980

"""

1978

"""

1981

tiprev = len(self) - 1

1979

tiprev = len(self) - 1

1982

total_size = self.start(tiprev) + self.length(tiprev)

1980

total_size = self.start(tiprev) + self.length(tiprev)

1983

if not self._inline or total_size < _maxinline:

1981

if not self._inline or total_size < _maxinline:

1984

return

1982

return

1985

1983

1986

troffset = tr.findoffset(self._indexfile)

1984

troffset = tr.findoffset(self._indexfile)

1987

if troffset is None:

1985

if troffset is None:

1988

raise error.RevlogError(

1986

raise error.RevlogError(

1989

_(b"%s not found in the transaction") % self._indexfile

1987

_(b"%s not found in the transaction") % self._indexfile

1990

)

1988

)

1991

trindex = None

1989

trindex = None

1992

tr.add(self._datafile, 0)

1990

tr.add(self._datafile, 0)

1993

1991

1994

existing_handles = False

1992

existing_handles = False

1995

if self._writinghandles is not None:

1993

if self._writinghandles is not None:

1996

existing_handles = True

1994

existing_handles = True

1997

fp = self._writinghandles[0]

1995

fp = self._writinghandles[0]

1998

fp.flush()

1996

fp.flush()

1999

fp.close()

1997

fp.close()

2000

# We can't use the cached file handle after close(). So prevent

1998

# We can't use the cached file handle after close(). So prevent

2001

# its usage.

1999

# its usage.

2002

self._writinghandles = None

2000

self._writinghandles = None

2003

self._segmentfile.writing_handle = None

2001

self._segmentfile.writing_handle = None

2004

# No need to deal with sidedata writing handle as it is only

2002

# No need to deal with sidedata writing handle as it is only

2005

# relevant with revlog-v2 which is never inline, not reaching

2003

# relevant with revlog-v2 which is never inline, not reaching

2006

# this code

2004

# this code

2007

2005

2008

new_dfh = self._datafp(b'w+')

2006

new_dfh = self._datafp(b'w+')

2009

new_dfh.truncate(0) # drop any potentially existing data

2007

new_dfh.truncate(0) # drop any potentially existing data

2010

try:

2008

try:

2011

with self._indexfp() as read_ifh:

2009

with self._indexfp() as read_ifh:

2012

for r in self:

2010

for r in self:

2013

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2011

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2014

if (

2012

if (

2015

trindex is None

2013

trindex is None

2016

and troffset

2014

and troffset

2017

<= self.start(r) + r * self.index.entry_size

2015

<= self.start(r) + r * self.index.entry_size

2018

):

2016

):

2019

trindex = r

2017

trindex = r

2020

new_dfh.flush()

2018

new_dfh.flush()

2021

2019

2022

if trindex is None:

2020

if trindex is None:

2023

trindex = 0

2021

trindex = 0

2024

2022

2025

with self.__index_new_fp() as fp:

2023

with self.__index_new_fp() as fp:

2026

self._format_flags &= ~FLAG_INLINE_DATA

2024

self._format_flags &= ~FLAG_INLINE_DATA

2027

self._inline = False

2025

self._inline = False

2028

for i in self:

2026

for i in self:

2029

e = self.index.entry_binary(i)

2027

e = self.index.entry_binary(i)

2030

if i == 0 and self._docket is None:

2028

if i == 0 and self._docket is None:

2031

header = self._format_flags | self._format_version

2029

header = self._format_flags | self._format_version

2032

header = self.index.pack_header(header)

2030

header = self.index.pack_header(header)

2033

e = header + e

2031

e = header + e

2034

fp.write(e)

2032

fp.write(e)

2035

if self._docket is not None:

2033

if self._docket is not None:

2036

self._docket.index_end = fp.tell()

2034

self._docket.index_end = fp.tell()

2037

2035

2038

# There is a small transactional race here. If the rename of

2036

# There is a small transactional race here. If the rename of

2039

# the index fails, we should remove the datafile. It is more

2037

# the index fails, we should remove the datafile. It is more

2040

# important to ensure that the data file is not truncated

2038

# important to ensure that the data file is not truncated

2041

# when the index is replaced as otherwise data is lost.

2039

# when the index is replaced as otherwise data is lost.

2042

tr.replace(self._datafile, self.start(trindex))

2040

tr.replace(self._datafile, self.start(trindex))

2043

2041

2044

# the temp file replace the real index when we exit the context

2042

# the temp file replace the real index when we exit the context

2045

# manager

2043

# manager

2046

2044

2047

tr.replace(self._indexfile, trindex * self.index.entry_size)

2045

tr.replace(self._indexfile, trindex * self.index.entry_size)

2048

nodemaputil.setup_persistent_nodemap(tr, self)

2046

nodemaputil.setup_persistent_nodemap(tr, self)

2049

self._segmentfile = randomaccessfile.randomaccessfile(

2047

self._segmentfile = randomaccessfile.randomaccessfile(

2050

self.opener,

2048

self.opener,

2051

self._datafile,

2049

self._datafile,

2052

self._chunkcachesize,

2050

self._chunkcachesize,

2053

)

2051

)

2054

2052

2055

if existing_handles:

2053

if existing_handles:

2056

# switched from inline to conventional reopen the index

2054

# switched from inline to conventional reopen the index

2057

ifh = self.__index_write_fp()

2055

ifh = self.__index_write_fp()

2058

self._writinghandles = (ifh, new_dfh, None)

2056

self._writinghandles = (ifh, new_dfh, None)

2059

self._segmentfile.writing_handle = new_dfh

2057

self._segmentfile.writing_handle = new_dfh

2060

new_dfh = None

2058

new_dfh = None

2061

# No need to deal with sidedata writing handle as it is only

2059

# No need to deal with sidedata writing handle as it is only

2062

# relevant with revlog-v2 which is never inline, not reaching

2060

# relevant with revlog-v2 which is never inline, not reaching

2063

# this code

2061

# this code

2064

finally:

2062

finally:

2065

if new_dfh is not None:

2063

if new_dfh is not None:

2066

new_dfh.close()

2064

new_dfh.close()

2067

2065

2068

def _nodeduplicatecallback(self, transaction, node):

2066

def _nodeduplicatecallback(self, transaction, node):

2069

"""called when trying to add a node already stored."""

2067

"""called when trying to add a node already stored."""

2070

2068

2071

@contextlib.contextmanager

2069

@contextlib.contextmanager

2072

def reading(self):

2070

def reading(self):

2073

"""Context manager that keeps data and sidedata files open for reading"""

2071

"""Context manager that keeps data and sidedata files open for reading"""

2074

with self._segmentfile.reading():

2072

with self._segmentfile.reading():

2075

with self._segmentfile_sidedata.reading():

2073

with self._segmentfile_sidedata.reading():

2076

yield

2074

yield

2077

2075

2078

@contextlib.contextmanager

2076

@contextlib.contextmanager

2079

def _writing(self, transaction):

2077

def _writing(self, transaction):

2080

if self._trypending:

2078

if self._trypending:

2081

msg = b'try to write in a `trypending` revlog: %s'

2079

msg = b'try to write in a `trypending` revlog: %s'

2082

msg %= self.display_id

2080

msg %= self.display_id

2083

raise error.ProgrammingError(msg)

2081

raise error.ProgrammingError(msg)

2084

if self._writinghandles is not None:

2082

if self._writinghandles is not None:

2085

yield

2083

yield

2086

else:

2084

else:

2087

ifh = dfh = sdfh = None

2085

ifh = dfh = sdfh = None

2088

try:

2086

try:

2089

r = len(self)

2087

r = len(self)

2090

# opening the data file.

2088

# opening the data file.

2091

dsize = 0

2089

dsize = 0

2092

if r:

2090

if r:

2093

dsize = self.end(r - 1)

2091

dsize = self.end(r - 1)

2094

dfh = None

2092

dfh = None

2095

if not self._inline:

2093

if not self._inline:

2096

try:

2094

try:

2097

dfh = self._datafp(b"r+")

2095

dfh = self._datafp(b"r+")

2098

if self._docket is None:

2096

if self._docket is None:

2099

dfh.seek(0, os.SEEK_END)

2097

dfh.seek(0, os.SEEK_END)

2100

else:

2098

else:

2101

dfh.seek(self._docket.data_end, os.SEEK_SET)

2099

dfh.seek(self._docket.data_end, os.SEEK_SET)

2102

except IOError as inst:

2100

except IOError as inst:

2103

if inst.errno != errno.ENOENT:

2101

if inst.errno != errno.ENOENT:

2104

raise

2102

raise

2105

dfh = self._datafp(b"w+")

2103

dfh = self._datafp(b"w+")

2106

transaction.add(self._datafile, dsize)

2104

transaction.add(self._datafile, dsize)

2107

if self._sidedatafile is not None:

2105

if self._sidedatafile is not None:

2108

# revlog-v2 does not inline, help Pytype

2106

# revlog-v2 does not inline, help Pytype

2109

assert dfh is not None

2107

assert dfh is not None

2110

try:

2108

try:

2111

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2109

sdfh = self.opener(self._sidedatafile, mode=b"r+")

2112

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2110

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2113

except IOError as inst:

2111

except IOError as inst:

2114

if inst.errno != errno.ENOENT:

2112

if inst.errno != errno.ENOENT:

2115

raise

2113

raise

2116

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2114

sdfh = self.opener(self._sidedatafile, mode=b"w+")

2117

transaction.add(

2115

transaction.add(

2118

self._sidedatafile, self._docket.sidedata_end

2116

self._sidedatafile, self._docket.sidedata_end

2119

)

2117

)

2120

2118

2121

# opening the index file.

2119

# opening the index file.

2122

isize = r * self.index.entry_size

2120

isize = r * self.index.entry_size

2123

ifh = self.__index_write_fp()

2121

ifh = self.__index_write_fp()

2124

if self._inline:

2122

if self._inline:

2125

transaction.add(self._indexfile, dsize + isize)

2123

transaction.add(self._indexfile, dsize + isize)

2126

else:

2124

else:

2127

transaction.add(self._indexfile, isize)

2125

transaction.add(self._indexfile, isize)

2128

# exposing all file handle for writing.

2126

# exposing all file handle for writing.

2129

self._writinghandles = (ifh, dfh, sdfh)

2127

self._writinghandles = (ifh, dfh, sdfh)

2130

self._segmentfile.writing_handle = ifh if self._inline else dfh

2128

self._segmentfile.writing_handle = ifh if self._inline else dfh

2131

self._segmentfile_sidedata.writing_handle = sdfh

2129

self._segmentfile_sidedata.writing_handle = sdfh

2132

yield

2130

yield

2133

if self._docket is not None:

2131

if self._docket is not None:

2134

self._write_docket(transaction)

2132

self._write_docket(transaction)

2135

finally:

2133

finally:

2136

self._writinghandles = None

2134

self._writinghandles = None

2137

self._segmentfile.writing_handle = None

2135

self._segmentfile.writing_handle = None

2138

self._segmentfile_sidedata.writing_handle = None

2136

self._segmentfile_sidedata.writing_handle = None

2139

if dfh is not None:

2137

if dfh is not None:

2140

dfh.close()

2138

dfh.close()

2141

if sdfh is not None:

2139

if sdfh is not None:

2142

sdfh.close()

2140

sdfh.close()

2143

# closing the index file last to avoid exposing referent to

2141

# closing the index file last to avoid exposing referent to

2144

# potential unflushed data content.

2142

# potential unflushed data content.

2145

if ifh is not None:

2143

if ifh is not None:

2146

ifh.close()

2144

ifh.close()

2147

2145

2148

def _write_docket(self, transaction):

2146

def _write_docket(self, transaction):

2149

"""write the current docket on disk

2147

"""write the current docket on disk

2150

2148

2151

Exist as a method to help changelog to implement transaction logic

2149

Exist as a method to help changelog to implement transaction logic

2152

2150

2153

We could also imagine using the same transaction logic for all revlog

2151

We could also imagine using the same transaction logic for all revlog

2154

since docket are cheap."""

2152

since docket are cheap."""

2155

self._docket.write(transaction)

2153

self._docket.write(transaction)

2156

2154

2157

def addrevision(

2155

def addrevision(

2158

self,

2156

self,

2159

text,

2157

text,

2160

transaction,

2158

transaction,

2161

link,

2159

link,

2162

p1,

2160

p1,

2163

p2,

2161

p2,

2164

cachedelta=None,

2162

cachedelta=None,

2165

node=None,

2163

node=None,

2166

flags=REVIDX_DEFAULT_FLAGS,

2164

flags=REVIDX_DEFAULT_FLAGS,

2167

deltacomputer=None,

2165

deltacomputer=None,

2168

sidedata=None,

2166

sidedata=None,

2169

):

2167

):

2170

"""add a revision to the log

2168

"""add a revision to the log

2171

2169

2172

text - the revision data to add

2170

text - the revision data to add

2173

transaction - the transaction object used for rollback

2171

transaction - the transaction object used for rollback

2174

link - the linkrev data to add

2172

link - the linkrev data to add

2175

p1, p2 - the parent nodeids of the revision

2173

p1, p2 - the parent nodeids of the revision

2176

cachedelta - an optional precomputed delta

2174

cachedelta - an optional precomputed delta

2177

node - nodeid of revision; typically node is not specified, and it is

2175

node - nodeid of revision; typically node is not specified, and it is

2178

computed by default as hash(text, p1, p2), however subclasses might

2176

computed by default as hash(text, p1, p2), however subclasses might

2179

use different hashing method (and override checkhash() in such case)

2177

use different hashing method (and override checkhash() in such case)

2180

flags - the known flags to set on the revision

2178

flags - the known flags to set on the revision

2181

deltacomputer - an optional deltacomputer instance shared between

2179

deltacomputer - an optional deltacomputer instance shared between

2182

multiple calls

2180

multiple calls

2183

"""

2181

"""

2184

if link == nullrev:

2182

if link == nullrev:

2185

raise error.RevlogError(

2183

raise error.RevlogError(

2186

_(b"attempted to add linkrev -1 to %s") % self.display_id

2184

_(b"attempted to add linkrev -1 to %s") % self.display_id

2187

)

2185

)

2188

2186

2189

if sidedata is None:

2187

if sidedata is None:

2190

sidedata = {}

2188

sidedata = {}

2191

elif sidedata and not self.hassidedata:

2189

elif sidedata and not self.hassidedata:

2192

raise error.ProgrammingError(

2190

raise error.ProgrammingError(

2193

_(b"trying to add sidedata to a revlog who don't support them")

2191

_(b"trying to add sidedata to a revlog who don't support them")

2194

)

2192

)

2195

2193

2196

if flags:

2194

if flags:

2197

node = node or self.hash(text, p1, p2)

2195

node = node or self.hash(text, p1, p2)

2198

2196

2199

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2197

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2200

2198

2201

# If the flag processor modifies the revision data, ignore any provided

2199

# If the flag processor modifies the revision data, ignore any provided

2202

# cachedelta.

2200

# cachedelta.

2203

if rawtext != text:

2201

if rawtext != text:

2204

cachedelta = None

2202

cachedelta = None

2205

2203

2206

if len(rawtext) > _maxentrysize:

2204

if len(rawtext) > _maxentrysize:

2207

raise error.RevlogError(

2205

raise error.RevlogError(

2208

_(

2206

_(

2209

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2207

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2210

)

2208

)

2211

% (self.display_id, len(rawtext))

2209

% (self.display_id, len(rawtext))

2212

)

2210

)

2213

2211

2214

node = node or self.hash(rawtext, p1, p2)

2212

node = node or self.hash(rawtext, p1, p2)

2215

rev = self.index.get_rev(node)

2213

rev = self.index.get_rev(node)

2216

if rev is not None:

2214

if rev is not None:

2217

return rev

2215

return rev

2218

2216

2219

if validatehash:

2217

if validatehash:

2220

self.checkhash(rawtext, node, p1=p1, p2=p2)

2218

self.checkhash(rawtext, node, p1=p1, p2=p2)

2221

2219

2222

return self.addrawrevision(

2220

return self.addrawrevision(

2223

rawtext,

2221

rawtext,

2224

transaction,

2222

transaction,

2225

link,

2223

link,

2226

p1,

2224

p1,

2227

p2,

2225

p2,

2228

node,

2226

node,

2229

flags,

2227

flags,

2230

cachedelta=cachedelta,

2228

cachedelta=cachedelta,

2231

deltacomputer=deltacomputer,

2229

deltacomputer=deltacomputer,

2232

sidedata=sidedata,

2230

sidedata=sidedata,

2233

)

2231

)

2234

2232

2235

def addrawrevision(

2233

def addrawrevision(

2236

self,

2234

self,

2237

rawtext,

2235

rawtext,

2238

transaction,

2236

transaction,

2239

link,

2237

link,

2240

p1,

2238

p1,

2241

p2,

2239

p2,

2242

node,

2240

node,

2243

flags,

2241

flags,

2244

cachedelta=None,

2242

cachedelta=None,

2245

deltacomputer=None,

2243

deltacomputer=None,

2246

sidedata=None,

2244

sidedata=None,

2247

):

2245

):

2248

"""add a raw revision with known flags, node and parents

2246

"""add a raw revision with known flags, node and parents

2249

useful when reusing a revision not stored in this revlog (ex: received

2247

useful when reusing a revision not stored in this revlog (ex: received

2250

over wire, or read from an external bundle).

2248

over wire, or read from an external bundle).

2251

"""

2249

"""

2252

with self._writing(transaction):

2250

with self._writing(transaction):

2253

return self._addrevision(

2251

return self._addrevision(

2254

node,

2252

node,

2255

rawtext,

2253

rawtext,

2256

transaction,

2254

transaction,

2257

link,

2255

link,

2258

p1,

2256

p1,

2259

p2,

2257

p2,

2260

flags,

2258

flags,

2261

cachedelta,

2259

cachedelta,

2262

deltacomputer=deltacomputer,

2260

deltacomputer=deltacomputer,

2263

sidedata=sidedata,

2261

sidedata=sidedata,

2264

)

2262

)

2265

2263

2266

def compress(self, data):

2264

def compress(self, data):

2267

"""Generate a possibly-compressed representation of data."""

2265

"""Generate a possibly-compressed representation of data."""

2268

if not data:

2266

if not data:

2269

return b'', data

2267

return b'', data

2270

2268

2271

compressed = self._compressor.compress(data)

2269

compressed = self._compressor.compress(data)

2272

2270

2273

if compressed:

2271

if compressed:

2274

# The revlog compressor added the header in the returned data.

2272

# The revlog compressor added the header in the returned data.

2275

return b'', compressed

2273

return b'', compressed

2276

2274

2277

if data[0:1] == b'\0':

2275

if data[0:1] == b'\0':

2278

return b'', data

2276

return b'', data

2279

return b'u', data

2277

return b'u', data

2280

2278

2281

def decompress(self, data):

2279

def decompress(self, data):

2282

"""Decompress a revlog chunk.

2280

"""Decompress a revlog chunk.

2283

2281

2284

The chunk is expected to begin with a header identifying the

2282

The chunk is expected to begin with a header identifying the

2285

format type so it can be routed to an appropriate decompressor.

2283

format type so it can be routed to an appropriate decompressor.

2286

"""

2284

"""

2287

if not data:

2285

if not data:

2288

return data

2286

return data

2289

2287

2290

# Revlogs are read much more frequently than they are written and many

2288

# Revlogs are read much more frequently than they are written and many

2291

# chunks only take microseconds to decompress, so performance is

2289

# chunks only take microseconds to decompress, so performance is

2292

# important here.

2290

# important here.

2293

#

2291

#

2294

# We can make a few assumptions about revlogs:

2292

# We can make a few assumptions about revlogs:

2295

#

2293

#

2296

# 1) the majority of chunks will be compressed (as opposed to inline

2294

# 1) the majority of chunks will be compressed (as opposed to inline

2297

# raw data).

2295

# raw data).

2298

# 2) decompressing *any* data will likely by at least 10x slower than

2296

# 2) decompressing *any* data will likely by at least 10x slower than

2299

# returning raw inline data.

2297

# returning raw inline data.

2300

# 3) we want to prioritize common and officially supported compression

2298

# 3) we want to prioritize common and officially supported compression

2301

# engines

2299

# engines

2302

#

2300

#

2303

# It follows that we want to optimize for "decompress compressed data

2301

# It follows that we want to optimize for "decompress compressed data

2304

# when encoded with common and officially supported compression engines"

2302

# when encoded with common and officially supported compression engines"

2305

# case over "raw data" and "data encoded by less common or non-official

2303

# case over "raw data" and "data encoded by less common or non-official

2306

# compression engines." That is why we have the inline lookup first

2304

# compression engines." That is why we have the inline lookup first

2307

# followed by the compengines lookup.

2305

# followed by the compengines lookup.

2308

#

2306

#

2309

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2307

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2310

# compressed chunks. And this matters for changelog and manifest reads.

2308

# compressed chunks. And this matters for changelog and manifest reads.

2311

t = data[0:1]

2309

t = data[0:1]

2312

2310

2313

if t == b'x':

2311

if t == b'x':

2314

try:

2312

try:

2315

return _zlibdecompress(data)

2313

return _zlibdecompress(data)

2316

except zlib.error as e:

2314

except zlib.error as e:

2317

raise error.RevlogError(

2315

raise error.RevlogError(

2318

_(b'revlog decompress error: %s')

2316

_(b'revlog decompress error: %s')

2319

% stringutil.forcebytestr(e)

2317

% stringutil.forcebytestr(e)

2320

)

2318

)

2321

# '\0' is more common than 'u' so it goes first.

2319

# '\0' is more common than 'u' so it goes first.

2322

elif t == b'\0':

2320

elif t == b'\0':

2323

return data

2321

return data

2324

elif t == b'u':

2322

elif t == b'u':

2325

return util.buffer(data, 1)

2323

return util.buffer(data, 1)

2326

2324

2327

compressor = self._get_decompressor(t)

2325

compressor = self._get_decompressor(t)

2328

2326

2329

return compressor.decompress(data)

2327

return compressor.decompress(data)

2330

2328

2331

def _addrevision(

2329

def _addrevision(

2332

self,

2330

self,

2333

node,

2331

node,

2334

rawtext,

2332

rawtext,

2335

transaction,

2333

transaction,

2336

link,

2334

link,

2337

p1,

2335

p1,

2338

p2,

2336

p2,

2339

flags,

2337

flags,

2340

cachedelta,

2338

cachedelta,

2341

alwayscache=False,

2339

alwayscache=False,

2342

deltacomputer=None,

2340

deltacomputer=None,

2343

sidedata=None,

2341

sidedata=None,

2344

):

2342

):

2345

"""internal function to add revisions to the log

2343

"""internal function to add revisions to the log

2346

2344

2347

see addrevision for argument descriptions.

2345

see addrevision for argument descriptions.

2348

2346

2349

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2347

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2350

2348

2351

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2349

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2352

be used.

2350

be used.

2353

2351

2354

invariants:

2352

invariants:

2355

- rawtext is optional (can be None); if not set, cachedelta must be set.

2353

- rawtext is optional (can be None); if not set, cachedelta must be set.

2356

if both are set, they must correspond to each other.

2354

if both are set, they must correspond to each other.

2357

"""

2355

"""

2358

if node == self.nullid:

2356

if node == self.nullid:

2359

raise error.RevlogError(

2357

raise error.RevlogError(

2360

_(b"%s: attempt to add null revision") % self.display_id

2358

_(b"%s: attempt to add null revision") % self.display_id

2361

)

2359

)

2362

if (

2360

if (

2363

node == self.nodeconstants.wdirid

2361

node == self.nodeconstants.wdirid

2364

or node in self.nodeconstants.wdirfilenodeids

2362

or node in self.nodeconstants.wdirfilenodeids

2365

):

2363

):

2366

raise error.RevlogError(

2364

raise error.RevlogError(

2367

_(b"%s: attempt to add wdir revision") % self.display_id

2365

_(b"%s: attempt to add wdir revision") % self.display_id

2368

)

2366

)

2369

if self._writinghandles is None:

2367

if self._writinghandles is None:

2370

msg = b'adding revision outside `revlog._writing` context'

2368

msg = b'adding revision outside `revlog._writing` context'

2371

raise error.ProgrammingError(msg)

2369

raise error.ProgrammingError(msg)

2372

2370

2373

if self._inline:

2371

if self._inline:

2374

fh = self._writinghandles[0]

2372

fh = self._writinghandles[0]

2375

else:

2373

else:

2376

fh = self._writinghandles[1]

2374

fh = self._writinghandles[1]

2377

2375

2378

btext = [rawtext]

2376

btext = [rawtext]

2379

2377

2380

curr = len(self)

2378

curr = len(self)

2381

prev = curr - 1

2379

prev = curr - 1

2382

2380

2383

offset = self._get_data_offset(prev)

2381

offset = self._get_data_offset(prev)

2384

2382

2385

if self._concurrencychecker:

2383

if self._concurrencychecker:

2386

ifh, dfh, sdfh = self._writinghandles

2384

ifh, dfh, sdfh = self._writinghandles

2387

# XXX no checking for the sidedata file

2385

# XXX no checking for the sidedata file

2388

if self._inline:

2386

if self._inline:

2389

# offset is "as if" it were in the .d file, so we need to add on

2387

# offset is "as if" it were in the .d file, so we need to add on

2390

# the size of the entry metadata.

2388

# the size of the entry metadata.

2391

self._concurrencychecker(

2389

self._concurrencychecker(

2392

ifh, self._indexfile, offset + curr * self.index.entry_size

2390

ifh, self._indexfile, offset + curr * self.index.entry_size

2393

)

2391

)

2394

else:

2392

else:

2395

# Entries in the .i are a consistent size.

2393

# Entries in the .i are a consistent size.

2396

self._concurrencychecker(

2394

self._concurrencychecker(

2397

ifh, self._indexfile, curr * self.index.entry_size

2395

ifh, self._indexfile, curr * self.index.entry_size

2398

)

2396

)

2399

self._concurrencychecker(dfh, self._datafile, offset)

2397

self._concurrencychecker(dfh, self._datafile, offset)

2400

2398

2401

p1r, p2r = self.rev(p1), self.rev(p2)

2399

p1r, p2r = self.rev(p1), self.rev(p2)

2402

2400

2403

# full versions are inserted when the needed deltas

2401

# full versions are inserted when the needed deltas

2404

# become comparable to the uncompressed text

2402

# become comparable to the uncompressed text

2405

if rawtext is None:

2403

if rawtext is None:

2406

# need rawtext size, before changed by flag processors, which is

2404

# need rawtext size, before changed by flag processors, which is

2407

# the non-raw size. use revlog explicitly to avoid filelog's extra

2405

# the non-raw size. use revlog explicitly to avoid filelog's extra

2408

# logic that might remove metadata size.

2406

# logic that might remove metadata size.

2409

textlen = mdiff.patchedsize(

2407

textlen = mdiff.patchedsize(

2410

revlog.size(self, cachedelta[0]), cachedelta[1]

2408

revlog.size(self, cachedelta[0]), cachedelta[1]

2411

)

2409

)

2412

else:

2410

else:

2413

textlen = len(rawtext)

2411

textlen = len(rawtext)

2414

2412

2415

if deltacomputer is None:

2413

if deltacomputer is None:

2416

deltacomputer = deltautil.deltacomputer(self)

2414

deltacomputer = deltautil.deltacomputer(self)

2417

2415

2418

revinfo = revlogutils.revisioninfo(

2416

revinfo = revlogutils.revisioninfo(

2419

node,

2417

node,

2420

p1,

2418

p1,

2421

p2,

2419

p2,

2422

btext,

2420

btext,

2423

textlen,

2421

textlen,

2424

cachedelta,

2422

cachedelta,

2425

flags,

2423

flags,

2426

)

2424

)

2427

2425

2428

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2426

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2429

2427

2430

compression_mode = COMP_MODE_INLINE

2428

compression_mode = COMP_MODE_INLINE

2431

if self._docket is not None:

2429

if self._docket is not None:

2432

default_comp = self._docket.default_compression_header

2430

default_comp = self._docket.default_compression_header

2433

r = deltautil.delta_compression(default_comp, deltainfo)

2431

r = deltautil.delta_compression(default_comp, deltainfo)

2434

compression_mode, deltainfo = r

2432

compression_mode, deltainfo = r

2435

2433

2436

sidedata_compression_mode = COMP_MODE_INLINE

2434

sidedata_compression_mode = COMP_MODE_INLINE

2437

if sidedata and self.hassidedata:

2435

if sidedata and self.hassidedata:

2438

sidedata_compression_mode = COMP_MODE_PLAIN

2436

sidedata_compression_mode = COMP_MODE_PLAIN

2439

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2437

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2440

sidedata_offset = self._docket.sidedata_end

2438

sidedata_offset = self._docket.sidedata_end

2441

h, comp_sidedata = self.compress(serialized_sidedata)

2439

h, comp_sidedata = self.compress(serialized_sidedata)

2442

if (

2440

if (

2443

h != b'u'

2441

h != b'u'

2444

and comp_sidedata[0:1] != b'\0'

2442

and comp_sidedata[0:1] != b'\0'

2445

and len(comp_sidedata) < len(serialized_sidedata)

2443

and len(comp_sidedata) < len(serialized_sidedata)

2446

):

2444

):

2447

assert not h

2445

assert not h

2448

if (

2446

if (

2449

comp_sidedata[0:1]

2447

comp_sidedata[0:1]

2450

== self._docket.default_compression_header

2448

== self._docket.default_compression_header

2451

):

2449

):

2452

sidedata_compression_mode = COMP_MODE_DEFAULT

2450

sidedata_compression_mode = COMP_MODE_DEFAULT

2453

serialized_sidedata = comp_sidedata

2451

serialized_sidedata = comp_sidedata

2454

else:

2452

else:

2455

sidedata_compression_mode = COMP_MODE_INLINE

2453

sidedata_compression_mode = COMP_MODE_INLINE

2456

serialized_sidedata = comp_sidedata

2454

serialized_sidedata = comp_sidedata

2457

else:

2455

else:

2458

serialized_sidedata = b""

2456

serialized_sidedata = b""

2459

# Don't store the offset if the sidedata is empty, that way

2457

# Don't store the offset if the sidedata is empty, that way

2460

# we can easily detect empty sidedata and they will be no different

2458

# we can easily detect empty sidedata and they will be no different

2461

# than ones we manually add.

2459

# than ones we manually add.

2462

sidedata_offset = 0

2460

sidedata_offset = 0

2463

2461

2464

rank = RANK_UNKNOWN

2462

rank = RANK_UNKNOWN

2465

if self._format_version == CHANGELOGV2:

2463

if self._format_version == CHANGELOGV2:

2466

if (p1r, p2r) == (nullrev, nullrev):

2464

if (p1r, p2r) == (nullrev, nullrev):

2467

rank = 1

2465

rank = 1

2468

elif p1r != nullrev and p2r == nullrev:

2466

elif p1r != nullrev and p2r == nullrev:

2469

rank = 1 + self.fast_rank(p1r)

2467

rank = 1 + self.fast_rank(p1r)

2470

elif p1r == nullrev and p2r != nullrev:

2468

elif p1r == nullrev and p2r != nullrev:

2471

rank = 1 + self.fast_rank(p2r)

2469

rank = 1 + self.fast_rank(p2r)

2472

else: # merge node

2470

else: # merge node

2473

if rustdagop is not None and self.index.rust_ext_compat:

2471

if rustdagop is not None and self.index.rust_ext_compat:

2474

rank = rustdagop.rank(self.index, p1r, p2r)

2472

rank = rustdagop.rank(self.index, p1r, p2r)

2475

else:

2473

else:

2476

pmin, pmax = sorted((p1r, p2r))

2474

pmin, pmax = sorted((p1r, p2r))

2477

rank = 1 + self.fast_rank(pmax)

2475

rank = 1 + self.fast_rank(pmax)

2478

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2476

rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))

2479

2477

2480

e = revlogutils.entry(

2478

e = revlogutils.entry(

2481

flags=flags,

2479

flags=flags,

2482

data_offset=offset,

2480

data_offset=offset,

2483

data_compressed_length=deltainfo.deltalen,

2481

data_compressed_length=deltainfo.deltalen,

2484

data_uncompressed_length=textlen,

2482

data_uncompressed_length=textlen,

2485

data_compression_mode=compression_mode,

2483

data_compression_mode=compression_mode,

2486

data_delta_base=deltainfo.base,

2484

data_delta_base=deltainfo.base,

2487

link_rev=link,

2485

link_rev=link,

2488

parent_rev_1=p1r,

2486

parent_rev_1=p1r,

2489

parent_rev_2=p2r,

2487

parent_rev_2=p2r,

2490

node_id=node,

2488

node_id=node,

2491

sidedata_offset=sidedata_offset,

2489

sidedata_offset=sidedata_offset,

2492

sidedata_compressed_length=len(serialized_sidedata),

2490

sidedata_compressed_length=len(serialized_sidedata),

2493

sidedata_compression_mode=sidedata_compression_mode,

2491

sidedata_compression_mode=sidedata_compression_mode,

2494

rank=rank,

2492

rank=rank,

2495

)

2493

)

2496

2494

2497

self.index.append(e)

2495

self.index.append(e)

2498

entry = self.index.entry_binary(curr)

2496

entry = self.index.entry_binary(curr)

2499

if curr == 0 and self._docket is None:

2497

if curr == 0 and self._docket is None:

2500

header = self._format_flags | self._format_version

2498

header = self._format_flags | self._format_version

2501

header = self.index.pack_header(header)

2499

header = self.index.pack_header(header)

2502

entry = header + entry

2500

entry = header + entry

2503

self._writeentry(

2501

self._writeentry(

2504

transaction,

2502

transaction,

2505

entry,

2503

entry,

2506

deltainfo.data,

2504

deltainfo.data,

2507

link,

2505

link,

2508

offset,

2506

offset,

2509

serialized_sidedata,

2507

serialized_sidedata,

2510

sidedata_offset,

2508

sidedata_offset,

2511

)

2509

)

2512

2510

2513

rawtext = btext[0]

2511

rawtext = btext[0]

2514

2512

2515

if alwayscache and rawtext is None:

2513

if alwayscache and rawtext is None:

2516

rawtext = deltacomputer.buildtext(revinfo, fh)

2514

rawtext = deltacomputer.buildtext(revinfo, fh)

2517

2515

2518

if type(rawtext) == bytes: # only accept immutable objects

2516

if type(rawtext) == bytes: # only accept immutable objects

2519

self._revisioncache = (node, curr, rawtext)

2517

self._revisioncache = (node, curr, rawtext)

2520

self._chainbasecache[curr] = deltainfo.chainbase

2518

self._chainbasecache[curr] = deltainfo.chainbase

2521

return curr

2519

return curr

2522

2520

2523

def _get_data_offset(self, prev):

2521

def _get_data_offset(self, prev):

2524

"""Returns the current offset in the (in-transaction) data file.

2522

"""Returns the current offset in the (in-transaction) data file.

2525

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2523

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2526

file to store that information: since sidedata can be rewritten to the

2524

file to store that information: since sidedata can be rewritten to the

2527

end of the data file within a transaction, you can have cases where, for

2525

end of the data file within a transaction, you can have cases where, for

2528

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2526

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2529

to `n - 1`'s sidedata being written after `n`'s data.

2527

to `n - 1`'s sidedata being written after `n`'s data.

2530

2528

2531

TODO cache this in a docket file before getting out of experimental."""

2529

TODO cache this in a docket file before getting out of experimental."""

2532

if self._docket is None:

2530

if self._docket is None:

2533

return self.end(prev)

2531

return self.end(prev)

2534

else:

2532

else:

2535

return self._docket.data_end

2533

return self._docket.data_end

2536

2534

2537

def _writeentry(

2535

def _writeentry(

2538

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2536

self, transaction, entry, data, link, offset, sidedata, sidedata_offset

2539

):

2537

):

2540

# Files opened in a+ mode have inconsistent behavior on various

2538

# Files opened in a+ mode have inconsistent behavior on various

2541

# platforms. Windows requires that a file positioning call be made

2539

# platforms. Windows requires that a file positioning call be made

2542

# when the file handle transitions between reads and writes. See

2540

# when the file handle transitions between reads and writes. See

2543

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2541

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2544

# platforms, Python or the platform itself can be buggy. Some versions

2542

# platforms, Python or the platform itself can be buggy. Some versions

2545

# of Solaris have been observed to not append at the end of the file

2543

# of Solaris have been observed to not append at the end of the file

2546

# if the file was seeked to before the end. See issue4943 for more.

2544

# if the file was seeked to before the end. See issue4943 for more.

2547

#

2545

#

2548

# We work around this issue by inserting a seek() before writing.

2546

# We work around this issue by inserting a seek() before writing.

2549

# Note: This is likely not necessary on Python 3. However, because

2547

# Note: This is likely not necessary on Python 3. However, because

2550

# the file handle is reused for reads and may be seeked there, we need

2548

# the file handle is reused for reads and may be seeked there, we need

2551

# to be careful before changing this.

2549

# to be careful before changing this.

2552

if self._writinghandles is None:

2550

if self._writinghandles is None:

2553

msg = b'adding revision outside `revlog._writing` context'

2551

msg = b'adding revision outside `revlog._writing` context'

2554

raise error.ProgrammingError(msg)

2552

raise error.ProgrammingError(msg)

2555

ifh, dfh, sdfh = self._writinghandles

2553

ifh, dfh, sdfh = self._writinghandles

2556

if self._docket is None:

2554

if self._docket is None:

2557

ifh.seek(0, os.SEEK_END)

2555

ifh.seek(0, os.SEEK_END)

2558

else:

2556

else:

2559

ifh.seek(self._docket.index_end, os.SEEK_SET)

2557

ifh.seek(self._docket.index_end, os.SEEK_SET)

2560

if dfh:

2558

if dfh:

2561

if self._docket is None:

2559

if self._docket is None:

2562

dfh.seek(0, os.SEEK_END)

2560

dfh.seek(0, os.SEEK_END)

2563

else:

2561

else:

2564

dfh.seek(self._docket.data_end, os.SEEK_SET)

2562

dfh.seek(self._docket.data_end, os.SEEK_SET)

2565

if sdfh:

2563

if sdfh:

2566

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2564

sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)

2567

2565

2568

curr = len(self) - 1

2566

curr = len(self) - 1

2569

if not self._inline:

2567

if not self._inline:

2570

transaction.add(self._datafile, offset)

2568

transaction.add(self._datafile, offset)

2571

if self._sidedatafile:

2569

if self._sidedatafile:

2572

transaction.add(self._sidedatafile, sidedata_offset)

2570

transaction.add(self._sidedatafile, sidedata_offset)

2573

transaction.add(self._indexfile, curr * len(entry))

2571

transaction.add(self._indexfile, curr * len(entry))

2574

if data[0]:

2572

if data[0]:

2575

dfh.write(data[0])

2573

dfh.write(data[0])

2576

dfh.write(data[1])

2574

dfh.write(data[1])

2577

if sidedata:

2575

if sidedata:

2578

sdfh.write(sidedata)

2576

sdfh.write(sidedata)

2579

ifh.write(entry)

2577

ifh.write(entry)

2580

else:

2578

else:

2581

offset += curr * self.index.entry_size

2579

offset += curr * self.index.entry_size

2582

transaction.add(self._indexfile, offset)

2580

transaction.add(self._indexfile, offset)

2583

ifh.write(entry)

2581

ifh.write(entry)

2584

ifh.write(data[0])

2582

ifh.write(data[0])

2585

ifh.write(data[1])

2583

ifh.write(data[1])

2586

assert not sidedata

2584

assert not sidedata

2587

self._enforceinlinesize(transaction)

2585

self._enforceinlinesize(transaction)

2588

if self._docket is not None:

2586

if self._docket is not None:

2589

# revlog-v2 always has 3 writing handles, help Pytype

2587

# revlog-v2 always has 3 writing handles, help Pytype

2590

wh1 = self._writinghandles[0]

2588

wh1 = self._writinghandles[0]

2591

wh2 = self._writinghandles[1]

2589

wh2 = self._writinghandles[1]

2592

wh3 = self._writinghandles[2]

2590

wh3 = self._writinghandles[2]

2593

assert wh1 is not None

2591

assert wh1 is not None

2594

assert wh2 is not None

2592

assert wh2 is not None

2595

assert wh3 is not None

2593

assert wh3 is not None

2596

self._docket.index_end = wh1.tell()

2594

self._docket.index_end = wh1.tell()

2597

self._docket.data_end = wh2.tell()

2595

self._docket.data_end = wh2.tell()

2598

self._docket.sidedata_end = wh3.tell()

2596

self._docket.sidedata_end = wh3.tell()

2599

2597

2600

nodemaputil.setup_persistent_nodemap(transaction, self)

2598

nodemaputil.setup_persistent_nodemap(transaction, self)

2601

2599

2602

def addgroup(

2600

def addgroup(

2603

self,

2601

self,

2604

deltas,

2602

deltas,

2605

linkmapper,

2603

linkmapper,

2606

transaction,

2604

transaction,

2607

alwayscache=False,

2605

alwayscache=False,

2608

addrevisioncb=None,

2606

addrevisioncb=None,

2609

duplicaterevisioncb=None,

2607

duplicaterevisioncb=None,

2610

):

2608

):

2611

"""

2609

"""

2612

add a delta group

2610

add a delta group

2613

2611

2614

given a set of deltas, add them to the revision log. the

2612

given a set of deltas, add them to the revision log. the

2615

first delta is against its parent, which should be in our

2613

first delta is against its parent, which should be in our

2616

log, the rest are against the previous delta.

2614

log, the rest are against the previous delta.

2617

2615

2618

If ``addrevisioncb`` is defined, it will be called with arguments of

2616

If ``addrevisioncb`` is defined, it will be called with arguments of

2619

this revlog and the node that was added.

2617

this revlog and the node that was added.

2620

"""

2618

"""

2621

2619

2622

if self._adding_group:

2620

if self._adding_group:

2623

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2621

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2624

2622

2625

self._adding_group = True

2623

self._adding_group = True

2626

empty = True

2624

empty = True

2627

try:

2625

try:

2628

with self._writing(transaction):

2626

with self._writing(transaction):

2629

deltacomputer = deltautil.deltacomputer(self)

2627

deltacomputer = deltautil.deltacomputer(self)

2630

# loop through our set of deltas

2628

# loop through our set of deltas

2631

for data in deltas:

2629

for data in deltas:

2632

(

2630

(

2633

node,

2631

node,

2634

p1,

2632

p1,

2635

p2,

2633

p2,

2636

linknode,

2634

linknode,

2637

deltabase,

2635

deltabase,

2638

delta,

2636

delta,

2639

flags,

2637

flags,

2640

sidedata,

2638

sidedata,

2641

) = data

2639

) = data

2642

link = linkmapper(linknode)

2640

link = linkmapper(linknode)

2643

flags = flags or REVIDX_DEFAULT_FLAGS

2641

flags = flags or REVIDX_DEFAULT_FLAGS

2644

2642

2645

rev = self.index.get_rev(node)

2643

rev = self.index.get_rev(node)

2646

if rev is not None:

2644

if rev is not None:

2647

# this can happen if two branches make the same change

2645

# this can happen if two branches make the same change

2648

self._nodeduplicatecallback(transaction, rev)

2646

self._nodeduplicatecallback(transaction, rev)

2649

if duplicaterevisioncb:

2647

if duplicaterevisioncb:

2650

duplicaterevisioncb(self, rev)

2648

duplicaterevisioncb(self, rev)

2651

empty = False

2649

empty = False

2652

continue

2650

continue

2653

2651

2654

for p in (p1, p2):

2652

for p in (p1, p2):

2655

if not self.index.has_node(p):

2653

if not self.index.has_node(p):

2656

raise error.LookupError(

2654

raise error.LookupError(

2657

p, self.radix, _(b'unknown parent')

2655

p, self.radix, _(b'unknown parent')

2658

)

2656

)

2659

2657

2660

if not self.index.has_node(deltabase):

2658

if not self.index.has_node(deltabase):

2661

raise error.LookupError(

2659

raise error.LookupError(

2662

deltabase, self.display_id, _(b'unknown delta base')

2660

deltabase, self.display_id, _(b'unknown delta base')

2663

)

2661

)

2664

2662

2665

baserev = self.rev(deltabase)

2663

baserev = self.rev(deltabase)

2666

2664

2667

if baserev != nullrev and self.iscensored(baserev):

2665

if baserev != nullrev and self.iscensored(baserev):

2668

# if base is censored, delta must be full replacement in a

2666

# if base is censored, delta must be full replacement in a

2669

# single patch operation

2667

# single patch operation

2670

hlen = struct.calcsize(b">lll")

2668

hlen = struct.calcsize(b">lll")

2671

oldlen = self.rawsize(baserev)

2669

oldlen = self.rawsize(baserev)

2672

newlen = len(delta) - hlen

2670

newlen = len(delta) - hlen

2673

if delta[:hlen] != mdiff.replacediffheader(

2671

if delta[:hlen] != mdiff.replacediffheader(

2674

oldlen, newlen

2672

oldlen, newlen

2675

):

2673

):

2676

raise error.CensoredBaseError(

2674

raise error.CensoredBaseError(

2677

self.display_id, self.node(baserev)

2675

self.display_id, self.node(baserev)

2678

)

2676

)

2679

2677

2680

if not flags and self._peek_iscensored(baserev, delta):

2678

if not flags and self._peek_iscensored(baserev, delta):

2681

flags |= REVIDX_ISCENSORED

2679

flags |= REVIDX_ISCENSORED

2682

2680

2683

# We assume consumers of addrevisioncb will want to retrieve

2681

# We assume consumers of addrevisioncb will want to retrieve

2684

# the added revision, which will require a call to

2682

# the added revision, which will require a call to

2685

# revision(). revision() will fast path if there is a cache

2683

# revision(). revision() will fast path if there is a cache

2686

# hit. So, we tell _addrevision() to always cache in this case.

2684

# hit. So, we tell _addrevision() to always cache in this case.

2687

# We're only using addgroup() in the context of changegroup

2685

# We're only using addgroup() in the context of changegroup

2688

# generation so the revision data can always be handled as raw

2686

# generation so the revision data can always be handled as raw

2689

# by the flagprocessor.

2687

# by the flagprocessor.

2690

rev = self._addrevision(

2688

rev = self._addrevision(

2691

node,

2689

node,

2692

None,

2690

None,

2693

transaction,

2691

transaction,

2694

link,

2692

link,

2695

p1,

2693

p1,

2696

p2,

2694

p2,

2697

flags,

2695

flags,

2698

(baserev, delta),

2696

(baserev, delta),

2699

alwayscache=alwayscache,

2697

alwayscache=alwayscache,

2700

deltacomputer=deltacomputer,

2698

deltacomputer=deltacomputer,

2701

sidedata=sidedata,

2699

sidedata=sidedata,

2702

)

2700

)

2703

2701

2704

if addrevisioncb:

2702

if addrevisioncb:

2705

addrevisioncb(self, rev)

2703

addrevisioncb(self, rev)

2706

empty = False

2704

empty = False

2707

finally:

2705

finally:

2708

self._adding_group = False

2706

self._adding_group = False

2709

return not empty

2707

return not empty

2710

2708

2711

def iscensored(self, rev):

2709

def iscensored(self, rev):

2712

"""Check if a file revision is censored."""

2710

"""Check if a file revision is censored."""

2713

if not self._censorable:

2711

if not self._censorable:

2714

return False

2712

return False

2715

2713

2716

return self.flags(rev) & REVIDX_ISCENSORED

2714

return self.flags(rev) & REVIDX_ISCENSORED

2717

2715

2718

def _peek_iscensored(self, baserev, delta):

2716

def _peek_iscensored(self, baserev, delta):

2719

"""Quickly check if a delta produces a censored revision."""

2717

"""Quickly check if a delta produces a censored revision."""

2720

if not self._censorable:

2718

if not self._censorable:

2721

return False

2719

return False

2722

2720

2723

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2721

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2724

2722

2725

def getstrippoint(self, minlink):

2723

def getstrippoint(self, minlink):

2726

"""find the minimum rev that must be stripped to strip the linkrev

2724

"""find the minimum rev that must be stripped to strip the linkrev

2727

2725

2728

Returns a tuple containing the minimum rev and a set of all revs that

2726

Returns a tuple containing the minimum rev and a set of all revs that

2729

have linkrevs that will be broken by this strip.

2727

have linkrevs that will be broken by this strip.

2730

"""

2728

"""

2731

return storageutil.resolvestripinfo(

2729

return storageutil.resolvestripinfo(

2732

minlink,

2730

minlink,

2733

len(self) - 1,

2731

len(self) - 1,

2734

self.headrevs(),

2732

self.headrevs(),

2735

self.linkrev,

2733

self.linkrev,

2736

self.parentrevs,

2734

self.parentrevs,

2737

)

2735

)

2738

2736

2739

def strip(self, minlink, transaction):

2737

def strip(self, minlink, transaction):

2740

"""truncate the revlog on the first revision with a linkrev >= minlink

2738

"""truncate the revlog on the first revision with a linkrev >= minlink

2741

2739

2742

This function is called when we're stripping revision minlink and

2740

This function is called when we're stripping revision minlink and

2743

its descendants from the repository.

2741

its descendants from the repository.

2744

2742

2745

We have to remove all revisions with linkrev >= minlink, because

2743

We have to remove all revisions with linkrev >= minlink, because

2746

the equivalent changelog revisions will be renumbered after the

2744

the equivalent changelog revisions will be renumbered after the

2747

strip.

2745

strip.

2748

2746

2749

So we truncate the revlog on the first of these revisions, and

2747

So we truncate the revlog on the first of these revisions, and

2750

trust that the caller has saved the revisions that shouldn't be

2748

trust that the caller has saved the revisions that shouldn't be

2751

removed and that it'll re-add them after this truncation.

2749

removed and that it'll re-add them after this truncation.

2752

"""

2750

"""

2753

if len(self) == 0:

2751

if len(self) == 0:

2754

return

2752

return

2755

2753

2756

rev, _ = self.getstrippoint(minlink)

2754

rev, _ = self.getstrippoint(minlink)

2757

if rev == len(self):

2755

if rev == len(self):

2758

return

2756

return

2759

2757

2760

# first truncate the files on disk

2758

# first truncate the files on disk

2761

data_end = self.start(rev)

2759

data_end = self.start(rev)

2762

if not self._inline:

2760

if not self._inline:

2763

transaction.add(self._datafile, data_end)

2761

transaction.add(self._datafile, data_end)

2764

end = rev * self.index.entry_size

2762

end = rev * self.index.entry_size

2765

else:

2763

else:

2766

end = data_end + (rev * self.index.entry_size)

2764

end = data_end + (rev * self.index.entry_size)

2767

2765

2768

if self._sidedatafile:

2766

if self._sidedatafile:

2769

sidedata_end = self.sidedata_cut_off(rev)

2767

sidedata_end = self.sidedata_cut_off(rev)

2770

transaction.add(self._sidedatafile, sidedata_end)

2768

transaction.add(self._sidedatafile, sidedata_end)

2771

2769

2772

transaction.add(self._indexfile, end)

2770

transaction.add(self._indexfile, end)

2773

if self._docket is not None:

2771

if self._docket is not None:

2774

# XXX we could, leverage the docket while stripping. However it is

2772

# XXX we could, leverage the docket while stripping. However it is

2775

# not powerfull enough at the time of this comment

2773

# not powerfull enough at the time of this comment

2776

self._docket.index_end = end

2774

self._docket.index_end = end

2777

self._docket.data_end = data_end

2775

self._docket.data_end = data_end

2778

self._docket.sidedata_end = sidedata_end

2776

self._docket.sidedata_end = sidedata_end

2779

self._docket.write(transaction, stripping=True)

2777

self._docket.write(transaction, stripping=True)

2780

2778

2781

# then reset internal state in memory to forget those revisions

2779

# then reset internal state in memory to forget those revisions

2782

self._revisioncache = None

2780

self._revisioncache = None

2783

self._chaininfocache = util.lrucachedict(500)

2781

self._chaininfocache = util.lrucachedict(500)

2784

self._segmentfile.clear_cache()

2782

self._segmentfile.clear_cache()

2785

self._segmentfile_sidedata.clear_cache()

2783

self._segmentfile_sidedata.clear_cache()

2786

2784

2787

del self.index[rev:-1]

2785

del self.index[rev:-1]

2788

2786

2789

def checksize(self):

2787

def checksize(self):

2790

"""Check size of index and data files

2788

"""Check size of index and data files

2791

2789

2792

return a (dd, di) tuple.

2790

return a (dd, di) tuple.

2793

- dd: extra bytes for the "data" file

2791

- dd: extra bytes for the "data" file

2794

- di: extra bytes for the "index" file

2792

- di: extra bytes for the "index" file

2795

2793

2796

A healthy revlog will return (0, 0).

2794

A healthy revlog will return (0, 0).

2797

"""

2795

"""

2798

expected = 0

2796

expected = 0

2799

if len(self):

2797

if len(self):

2800

expected = max(0, self.end(len(self) - 1))

2798

expected = max(0, self.end(len(self) - 1))

2801

2799

2802

try:

2800

try:

2803

with self._datafp() as f:

2801

with self._datafp() as f:

2804

f.seek(0, io.SEEK_END)

2802

f.seek(0, io.SEEK_END)

2805

actual = f.tell()

2803

actual = f.tell()

2806

dd = actual - expected

2804

dd = actual - expected

2807

except IOError as inst:

2805

except IOError as inst:

2808

if inst.errno != errno.ENOENT:

2806

if inst.errno != errno.ENOENT:

2809

raise

2807

raise

2810

dd = 0

2808

dd = 0

2811

2809

2812

try:

2810

try:

2813

f = self.opener(self._indexfile)

2811

f = self.opener(self._indexfile)

2814

f.seek(0, io.SEEK_END)

2812

f.seek(0, io.SEEK_END)

2815

actual = f.tell()

2813

actual = f.tell()

2816

f.close()

2814

f.close()

2817

s = self.index.entry_size

2815

s = self.index.entry_size

2818

i = max(0, actual // s)

2816

i = max(0, actual // s)

2819

di = actual - (i * s)

2817

di = actual - (i * s)

2820

if self._inline:

2818

if self._inline:

2821

databytes = 0

2819

databytes = 0

2822

for r in self:

2820

for r in self:

2823

databytes += max(0, self.length(r))

2821

databytes += max(0, self.length(r))

2824

dd = 0

2822

dd = 0

2825

di = actual - len(self) * s - databytes

2823

di = actual - len(self) * s - databytes

2826

except IOError as inst:

2824

except IOError as inst:

2827

if inst.errno != errno.ENOENT:

2825

if inst.errno != errno.ENOENT:

2828

raise

2826

raise

2829

di = 0

2827

di = 0

2830

2828

2831

return (dd, di)

2829

return (dd, di)

2832

2830

2833

def files(self):

2831

def files(self):

2834

res = [self._indexfile]

2832

res = [self._indexfile]

2835

if self._docket_file is None:

2833

if self._docket_file is None:

2836

if not self._inline:

2834

if not self._inline:

2837

res.append(self._datafile)

2835

res.append(self._datafile)

2838

else:

2836

else:

2839

res.append(self._docket_file)

2837

res.append(self._docket_file)

2840

res.extend(self._docket.old_index_filepaths(include_empty=False))

2838

res.extend(self._docket.old_index_filepaths(include_empty=False))

2841

if self._docket.data_end:

2839

if self._docket.data_end:

2842

res.append(self._datafile)

2840

res.append(self._datafile)

2843

res.extend(self._docket.old_data_filepaths(include_empty=False))

2841

res.extend(self._docket.old_data_filepaths(include_empty=False))

2844

if self._docket.sidedata_end:

2842

if self._docket.sidedata_end:

2845

res.append(self._sidedatafile)

2843

res.append(self._sidedatafile)

2846

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

2844

res.extend(self._docket.old_sidedata_filepaths(include_empty=False))

2847

return res

2845

return res

2848

2846

2849

def emitrevisions(

2847

def emitrevisions(

2850

self,

2848

self,

2851

nodes,

2849

nodes,

2852

nodesorder=None,

2850

nodesorder=None,

2853

revisiondata=False,

2851

revisiondata=False,

2854

assumehaveparentrevisions=False,

2852

assumehaveparentrevisions=False,

2855

deltamode=repository.CG_DELTAMODE_STD,

2853

deltamode=repository.CG_DELTAMODE_STD,

2856

sidedata_helpers=None,

2854

sidedata_helpers=None,

2857

):

2855

):

2858

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2856

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2859

raise error.ProgrammingError(

2857

raise error.ProgrammingError(

2860

b'unhandled value for nodesorder: %s' % nodesorder

2858

b'unhandled value for nodesorder: %s' % nodesorder

2861

)

2859

)

2862

2860

2863

if nodesorder is None and not self._generaldelta:

2861

if nodesorder is None and not self._generaldelta:

2864

nodesorder = b'storage'

2862

nodesorder = b'storage'

2865

2863

2866

if (

2864

if (

2867

not self._storedeltachains

2865

not self._storedeltachains

2868

and deltamode != repository.CG_DELTAMODE_PREV

2866

and deltamode != repository.CG_DELTAMODE_PREV

2869

):

2867

):

2870

deltamode = repository.CG_DELTAMODE_FULL

2868

deltamode = repository.CG_DELTAMODE_FULL

2871

2869

2872

return storageutil.emitrevisions(

2870

return storageutil.emitrevisions(

2873

self,

2871

self,

2874

nodes,

2872

nodes,

2875

nodesorder,

2873

nodesorder,

2876

revlogrevisiondelta,

2874

revlogrevisiondelta,

2877

deltaparentfn=self.deltaparent,

2875

deltaparentfn=self.deltaparent,

2878

candeltafn=self.candelta,

2876

candeltafn=self.candelta,

2879

rawsizefn=self.rawsize,

2877

rawsizefn=self.rawsize,

2880

revdifffn=self.revdiff,

2878

revdifffn=self.revdiff,

2881

flagsfn=self.flags,

2879

flagsfn=self.flags,

2882

deltamode=deltamode,

2880

deltamode=deltamode,

2883

revisiondata=revisiondata,

2881

revisiondata=revisiondata,

2884

assumehaveparentrevisions=assumehaveparentrevisions,

2882

assumehaveparentrevisions=assumehaveparentrevisions,

2885

sidedata_helpers=sidedata_helpers,

2883

sidedata_helpers=sidedata_helpers,

2886

)

2884

)

2887

2885

2888

DELTAREUSEALWAYS = b'always'

2886

DELTAREUSEALWAYS = b'always'

2889

DELTAREUSESAMEREVS = b'samerevs'

2887

DELTAREUSESAMEREVS = b'samerevs'

2890

DELTAREUSENEVER = b'never'

2888

DELTAREUSENEVER = b'never'

2891

2889

2892

DELTAREUSEFULLADD = b'fulladd'

2890

DELTAREUSEFULLADD = b'fulladd'

2893

2891

2894

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2892

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2895

2893

2896

def clone(

2894

def clone(

2897

self,

2895

self,

2898

tr,

2896

tr,

2899

destrevlog,

2897

destrevlog,

2900

addrevisioncb=None,

2898

addrevisioncb=None,

2901

deltareuse=DELTAREUSESAMEREVS,

2899

deltareuse=DELTAREUSESAMEREVS,

2902

forcedeltabothparents=None,

2900

forcedeltabothparents=None,

2903

sidedata_helpers=None,

2901

sidedata_helpers=None,

2904

):

2902

):

2905

"""Copy this revlog to another, possibly with format changes.

2903

"""Copy this revlog to another, possibly with format changes.

2906

2904

2907

The destination revlog will contain the same revisions and nodes.

2905

The destination revlog will contain the same revisions and nodes.

2908

However, it may not be bit-for-bit identical due to e.g. delta encoding

2906

However, it may not be bit-for-bit identical due to e.g. delta encoding

2909

differences.

2907

differences.

2910

2908

2911

The ``deltareuse`` argument control how deltas from the existing revlog

2909

The ``deltareuse`` argument control how deltas from the existing revlog

2912

are preserved in the destination revlog. The argument can have the

2910

are preserved in the destination revlog. The argument can have the

2913

following values:

2911

following values:

2914

2912

2915

DELTAREUSEALWAYS

2913

DELTAREUSEALWAYS

2916

Deltas will always be reused (if possible), even if the destination

2914

Deltas will always be reused (if possible), even if the destination

2917

revlog would not select the same revisions for the delta. This is the

2915

revlog would not select the same revisions for the delta. This is the

2918

fastest mode of operation.

2916

fastest mode of operation.

2919

DELTAREUSESAMEREVS

2917

DELTAREUSESAMEREVS

2920

Deltas will be reused if the destination revlog would pick the same

2918

Deltas will be reused if the destination revlog would pick the same

2921

revisions for the delta. This mode strikes a balance between speed

2919

revisions for the delta. This mode strikes a balance between speed

2922

and optimization.

2920

and optimization.

2923

DELTAREUSENEVER

2921

DELTAREUSENEVER

2924

Deltas will never be reused. This is the slowest mode of execution.

2922

Deltas will never be reused. This is the slowest mode of execution.

2925

This mode can be used to recompute deltas (e.g. if the diff/delta

2923

This mode can be used to recompute deltas (e.g. if the diff/delta

2926

algorithm changes).

2924

algorithm changes).

2927

DELTAREUSEFULLADD

2925

DELTAREUSEFULLADD

2928

Revision will be re-added as if their were new content. This is

2926

Revision will be re-added as if their were new content. This is

2929

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

2927

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

2930

eg: large file detection and handling.

2928

eg: large file detection and handling.

2931

2929

2932

Delta computation can be slow, so the choice of delta reuse policy can

2930

Delta computation can be slow, so the choice of delta reuse policy can

2933

significantly affect run time.

2931

significantly affect run time.

2934

2932

2935

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2933

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2936

two extremes. Deltas will be reused if they are appropriate. But if the

2934

two extremes. Deltas will be reused if they are appropriate. But if the

2937

delta could choose a better revision, it will do so. This means if you

2935

delta could choose a better revision, it will do so. This means if you

2938

are converting a non-generaldelta revlog to a generaldelta revlog,

2936

are converting a non-generaldelta revlog to a generaldelta revlog,

2939

deltas will be recomputed if the delta's parent isn't a parent of the

2937

deltas will be recomputed if the delta's parent isn't a parent of the

2940

revision.

2938

revision.

2941

2939

2942

In addition to the delta policy, the ``forcedeltabothparents``

2940

In addition to the delta policy, the ``forcedeltabothparents``

2943

argument controls whether to force compute deltas against both parents

2941

argument controls whether to force compute deltas against both parents

2944

for merges. By default, the current default is used.

2942

for merges. By default, the current default is used.

2945

2943

2946

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

2944

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

2947

`sidedata_helpers`.

2945

`sidedata_helpers`.

2948

"""

2946

"""

2949

if deltareuse not in self.DELTAREUSEALL:

2947

if deltareuse not in self.DELTAREUSEALL:

2950

raise ValueError(

2948

raise ValueError(

2951

_(b'value for deltareuse invalid: %s') % deltareuse

2949

_(b'value for deltareuse invalid: %s') % deltareuse

2952

)

2950

)

2953

2951

2954

if len(destrevlog):

2952

if len(destrevlog):

2955

raise ValueError(_(b'destination revlog is not empty'))

2953

raise ValueError(_(b'destination revlog is not empty'))

2956

2954

2957

if getattr(self, 'filteredrevs', None):

2955

if getattr(self, 'filteredrevs', None):

2958

raise ValueError(_(b'source revlog has filtered revisions'))

2956

raise ValueError(_(b'source revlog has filtered revisions'))

2959

if getattr(destrevlog, 'filteredrevs', None):

2957

if getattr(destrevlog, 'filteredrevs', None):

2960

raise ValueError(_(b'destination revlog has filtered revisions'))

2958

raise ValueError(_(b'destination revlog has filtered revisions'))

2961

2959

2962

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

2960

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

2963

# if possible.

2961

# if possible.

2964

oldlazydelta = destrevlog._lazydelta

2962

oldlazydelta = destrevlog._lazydelta

2965

oldlazydeltabase = destrevlog._lazydeltabase

2963

oldlazydeltabase = destrevlog._lazydeltabase

2966

oldamd = destrevlog._deltabothparents

2964

oldamd = destrevlog._deltabothparents

2967

2965

2968

try:

2966

try:

2969

if deltareuse == self.DELTAREUSEALWAYS:

2967

if deltareuse == self.DELTAREUSEALWAYS:

2970

destrevlog._lazydeltabase = True

2968

destrevlog._lazydeltabase = True

2971

destrevlog._lazydelta = True

2969

destrevlog._lazydelta = True

2972

elif deltareuse == self.DELTAREUSESAMEREVS:

2970

elif deltareuse == self.DELTAREUSESAMEREVS:

2973

destrevlog._lazydeltabase = False

2971

destrevlog._lazydeltabase = False

2974

destrevlog._lazydelta = True

2972

destrevlog._lazydelta = True

2975

elif deltareuse == self.DELTAREUSENEVER:

2973

elif deltareuse == self.DELTAREUSENEVER:

2976

destrevlog._lazydeltabase = False

2974

destrevlog._lazydeltabase = False

2977

destrevlog._lazydelta = False

2975

destrevlog._lazydelta = False

2978

2976

2979

destrevlog._deltabothparents = forcedeltabothparents or oldamd

2977

destrevlog._deltabothparents = forcedeltabothparents or oldamd

2980

2978

2981

self._clone(

2979

self._clone(

2982

tr,

2980

tr,

2983

destrevlog,

2981

destrevlog,

2984

addrevisioncb,

2982

addrevisioncb,

2985

deltareuse,

2983

deltareuse,

2986

forcedeltabothparents,

2984

forcedeltabothparents,

2987

sidedata_helpers,

2985

sidedata_helpers,

2988

)

2986

)

2989

2987

2990

finally:

2988

finally:

2991

destrevlog._lazydelta = oldlazydelta

2989

destrevlog._lazydelta = oldlazydelta

2992

destrevlog._lazydeltabase = oldlazydeltabase

2990

destrevlog._lazydeltabase = oldlazydeltabase

2993

destrevlog._deltabothparents = oldamd

2991

destrevlog._deltabothparents = oldamd

2994

2992

2995

def _clone(

2993

def _clone(

2996

self,

2994

self,

2997

tr,

2995

tr,

2998

destrevlog,

2996

destrevlog,

2999

addrevisioncb,

2997

addrevisioncb,

3000

deltareuse,

2998

deltareuse,

3001

forcedeltabothparents,

2999

forcedeltabothparents,

3002

sidedata_helpers,

3000

sidedata_helpers,

3003

):

3001

):

3004

"""perform the core duty of `revlog.clone` after parameter processing"""

3002

"""perform the core duty of `revlog.clone` after parameter processing"""

3005

deltacomputer = deltautil.deltacomputer(destrevlog)

3003

deltacomputer = deltautil.deltacomputer(destrevlog)

3006

index = self.index

3004

index = self.index

3007

for rev in self:

3005

for rev in self:

3008

entry = index[rev]

3006

entry = index[rev]

3009

3007

3010

# Some classes override linkrev to take filtered revs into

3008

# Some classes override linkrev to take filtered revs into

3011

# account. Use raw entry from index.

3009

# account. Use raw entry from index.

3012

flags = entry[0] & 0xFFFF

3010

flags = entry[0] & 0xFFFF

3013

linkrev = entry[4]

3011

linkrev = entry[4]

3014

p1 = index[entry[5]][7]

3012

p1 = index[entry[5]][7]

3015

p2 = index[entry[6]][7]

3013

p2 = index[entry[6]][7]

3016

node = entry[7]

3014

node = entry[7]

3017

3015

3018

# (Possibly) reuse the delta from the revlog if allowed and

3016

# (Possibly) reuse the delta from the revlog if allowed and

3019

# the revlog chunk is a delta.

3017

# the revlog chunk is a delta.

3020

cachedelta = None

3018

cachedelta = None

3021

rawtext = None

3019

rawtext = None

3022

if deltareuse == self.DELTAREUSEFULLADD:

3020

if deltareuse == self.DELTAREUSEFULLADD:

3023

text = self._revisiondata(rev)

3021

text = self._revisiondata(rev)

3024

sidedata = self.sidedata(rev)

3022

sidedata = self.sidedata(rev)

3025

3023

3026

if sidedata_helpers is not None:

3024

if sidedata_helpers is not None:

3027

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3025

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3028

self, sidedata_helpers, sidedata, rev

3026

self, sidedata_helpers, sidedata, rev

3029

)

3027

)

3030

flags = flags | new_flags[0] & ~new_flags[1]

3028

flags = flags | new_flags[0] & ~new_flags[1]

3031

3029

3032

destrevlog.addrevision(

3030

destrevlog.addrevision(

3033

text,

3031

text,

3034

tr,

3032

tr,

3035

linkrev,

3033

linkrev,

3036

p1,

3034

p1,

3037

p2,

3035

p2,

3038

cachedelta=cachedelta,

3036

cachedelta=cachedelta,

3039

node=node,

3037

node=node,

3040

flags=flags,

3038

flags=flags,

3041

deltacomputer=deltacomputer,

3039

deltacomputer=deltacomputer,

3042

sidedata=sidedata,

3040

sidedata=sidedata,

3043

)

3041

)

3044

else:

3042

else:

3045

if destrevlog._lazydelta:

3043

if destrevlog._lazydelta:

3046

dp = self.deltaparent(rev)

3044

dp = self.deltaparent(rev)

3047

if dp != nullrev:

3045

if dp != nullrev:

3048

cachedelta = (dp, bytes(self._chunk(rev)))

3046

cachedelta = (dp, bytes(self._chunk(rev)))

3049

3047

3050

sidedata = None

3048

sidedata = None

3051

if not cachedelta:

3049

if not cachedelta:

3052

rawtext = self._revisiondata(rev)

3050

rawtext = self._revisiondata(rev)

3053

sidedata = self.sidedata(rev)

3051

sidedata = self.sidedata(rev)

3054

if sidedata is None:

3052

if sidedata is None:

3055

sidedata = self.sidedata(rev)

3053

sidedata = self.sidedata(rev)

3056

3054

3057

if sidedata_helpers is not None:

3055

if sidedata_helpers is not None:

3058

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3056

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3059

self, sidedata_helpers, sidedata, rev

3057

self, sidedata_helpers, sidedata, rev

3060

)

3058

)

3061

flags = flags | new_flags[0] & ~new_flags[1]

3059

flags = flags | new_flags[0] & ~new_flags[1]

3062

3060

3063

with destrevlog._writing(tr):

3061

with destrevlog._writing(tr):

3064

destrevlog._addrevision(

3062

destrevlog._addrevision(

3065

node,

3063

node,

3066

rawtext,

3064

rawtext,

3067

tr,

3065

tr,

3068

linkrev,

3066

linkrev,

3069

p1,

3067

p1,

3070

p2,

3068

p2,

3071

flags,

3069

flags,

3072

cachedelta,

3070

cachedelta,

3073

deltacomputer=deltacomputer,

3071

deltacomputer=deltacomputer,

3074

sidedata=sidedata,

3072

sidedata=sidedata,

3075

)

3073

)

3076

3074

3077

if addrevisioncb:

3075

if addrevisioncb:

3078

addrevisioncb(self, rev, node)

3076

addrevisioncb(self, rev, node)

3079

3077

3080

def censorrevision(self, tr, censornode, tombstone=b''):

3078

def censorrevision(self, tr, censornode, tombstone=b''):

3081

if self._format_version == REVLOGV0:

3079

if self._format_version == REVLOGV0:

3082

raise error.RevlogError(

3080

raise error.RevlogError(

3083

_(b'cannot censor with version %d revlogs')

3081

_(b'cannot censor with version %d revlogs')

3084

% self._format_version

3082

% self._format_version

3085

)

3083

)

3086

elif self._format_version == REVLOGV1:

3084

elif self._format_version == REVLOGV1:

3087

rewrite.v1_censor(self, tr, censornode, tombstone)

3085

rewrite.v1_censor(self, tr, censornode, tombstone)

3088

else:

3086

else:

3089

rewrite.v2_censor(self, tr, censornode, tombstone)

3087

rewrite.v2_censor(self, tr, censornode, tombstone)

3090

3088

3091

def verifyintegrity(self, state):

3089

def verifyintegrity(self, state):

3092

"""Verifies the integrity of the revlog.

3090

"""Verifies the integrity of the revlog.

3093

3091

3094

Yields ``revlogproblem`` instances describing problems that are

3092

Yields ``revlogproblem`` instances describing problems that are

3095

found.

3093

found.

3096

"""

3094

"""

3097

dd, di = self.checksize()

3095

dd, di = self.checksize()

3098

if dd:

3096

if dd:

3099

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3097

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3100

if di:

3098

if di:

3101

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3099

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3102

3100

3103

version = self._format_version

3101

version = self._format_version

3104

3102

3105

# The verifier tells us what version revlog we should be.

3103

# The verifier tells us what version revlog we should be.

3106

if version != state[b'expectedversion']:

3104

if version != state[b'expectedversion']:

3107

yield revlogproblem(

3105

yield revlogproblem(

3108

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3106

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3109

% (self.display_id, version, state[b'expectedversion'])

3107

% (self.display_id, version, state[b'expectedversion'])

3110

)

3108

)

3111

3109

3112

state[b'skipread'] = set()

3110

state[b'skipread'] = set()

3113

state[b'safe_renamed'] = set()

3111

state[b'safe_renamed'] = set()

3114

3112

3115

for rev in self:

3113

for rev in self:

3116

node = self.node(rev)

3114

node = self.node(rev)

3117

3115

3118

# Verify contents. 4 cases to care about:

3116

# Verify contents. 4 cases to care about:

3119

#

3117

#

3120

# common: the most common case

3118

# common: the most common case

3121

# rename: with a rename

3119

# rename: with a rename

3122

# meta: file content starts with b'\1\n', the metadata

3120

# meta: file content starts with b'\1\n', the metadata

3123

# header defined in filelog.py, but without a rename

3121

# header defined in filelog.py, but without a rename

3124

# ext: content stored externally

3122

# ext: content stored externally

3125

#

3123

#

3126

# More formally, their differences are shown below:

3124

# More formally, their differences are shown below:

3127

#

3125

#

3128

# | common | rename | meta | ext

3126

# | common | rename | meta | ext

3129

# -------------------------------------------------------

3127

# -------------------------------------------------------

3130

# flags() | 0 | 0 | 0 | not 0

3128

# flags() | 0 | 0 | 0 | not 0

3131

# renamed() | False | True | False | ?

3129

# renamed() | False | True | False | ?

3132

# rawtext[0:2]=='\1\n'| False | True | True | ?

3130

# rawtext[0:2]=='\1\n'| False | True | True | ?

3133

#

3131

#

3134

# "rawtext" means the raw text stored in revlog data, which

3132

# "rawtext" means the raw text stored in revlog data, which

3135

# could be retrieved by "rawdata(rev)". "text"

3133

# could be retrieved by "rawdata(rev)". "text"

3136

# mentioned below is "revision(rev)".

3134

# mentioned below is "revision(rev)".

3137

#

3135

#

3138

# There are 3 different lengths stored physically:

3136

# There are 3 different lengths stored physically:

3139

# 1. L1: rawsize, stored in revlog index

3137

# 1. L1: rawsize, stored in revlog index

3140

# 2. L2: len(rawtext), stored in revlog data

3138

# 2. L2: len(rawtext), stored in revlog data

3141

# 3. L3: len(text), stored in revlog data if flags==0, or

3139

# 3. L3: len(text), stored in revlog data if flags==0, or

3142

# possibly somewhere else if flags!=0

3140

# possibly somewhere else if flags!=0

3143

#

3141

#

3144

# L1 should be equal to L2. L3 could be different from them.

3142

# L1 should be equal to L2. L3 could be different from them.

3145

# "text" may or may not affect commit hash depending on flag

3143

# "text" may or may not affect commit hash depending on flag

3146

# processors (see flagutil.addflagprocessor).

3144

# processors (see flagutil.addflagprocessor).

3147

#

3145

#

3148

# | common | rename | meta | ext

3146

# | common | rename | meta | ext

3149

# -------------------------------------------------

3147

# -------------------------------------------------

3150

# rawsize() | L1 | L1 | L1 | L1

3148

# rawsize() | L1 | L1 | L1 | L1

3151

# size() | L1 | L2-LM | L1(*) | L1 (?)

3149

# size() | L1 | L2-LM | L1(*) | L1 (?)

3152

# len(rawtext) | L2 | L2 | L2 | L2

3150

# len(rawtext) | L2 | L2 | L2 | L2

3153

# len(text) | L2 | L2 | L2 | L3

3151

# len(text) | L2 | L2 | L2 | L3

3154

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3152

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3155

#

3153

#

3156

# LM: length of metadata, depending on rawtext

3154

# LM: length of metadata, depending on rawtext

3157

# (*): not ideal, see comment in filelog.size

3155

# (*): not ideal, see comment in filelog.size

3158

# (?): could be "- len(meta)" if the resolved content has

3156

# (?): could be "- len(meta)" if the resolved content has

3159

# rename metadata

3157

# rename metadata

3160

#

3158

#

3161

# Checks needed to be done:

3159

# Checks needed to be done:

3162

# 1. length check: L1 == L2, in all cases.

3160

# 1. length check: L1 == L2, in all cases.

3163

# 2. hash check: depending on flag processor, we may need to

3161

# 2. hash check: depending on flag processor, we may need to

3164

# use either "text" (external), or "rawtext" (in revlog).

3162

# use either "text" (external), or "rawtext" (in revlog).

3165

3163

3166

try:

3164

try:

3167

skipflags = state.get(b'skipflags', 0)

3165

skipflags = state.get(b'skipflags', 0)

3168

if skipflags:

3166

if skipflags:

3169

skipflags &= self.flags(rev)

3167

skipflags &= self.flags(rev)

3170

3168

3171

_verify_revision(self, skipflags, state, node)

3169

_verify_revision(self, skipflags, state, node)

3172

3170

3173

l1 = self.rawsize(rev)

3171

l1 = self.rawsize(rev)

3174

l2 = len(self.rawdata(node))

3172

l2 = len(self.rawdata(node))

3175

3173

3176

if l1 != l2:

3174

if l1 != l2:

3177

yield revlogproblem(

3175

yield revlogproblem(

3178

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3176

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3179

node=node,

3177

node=node,

3180

)

3178

)

3181

3179

3182

except error.CensoredNodeError:

3180

except error.CensoredNodeError:

3183

if state[b'erroroncensored']:

3181

if state[b'erroroncensored']:

3184

yield revlogproblem(

3182

yield revlogproblem(

3185

error=_(b'censored file data'), node=node

3183

error=_(b'censored file data'), node=node

3186

)

3184

)

3187

state[b'skipread'].add(node)

3185

state[b'skipread'].add(node)

3188

except Exception as e:

3186

except Exception as e:

3189

yield revlogproblem(

3187

yield revlogproblem(

3190

error=_(b'unpacking %s: %s')

3188

error=_(b'unpacking %s: %s')

3191

% (short(node), stringutil.forcebytestr(e)),

3189

% (short(node), stringutil.forcebytestr(e)),

3192

node=node,

3190

node=node,

3193

)

3191

)

3194

state[b'skipread'].add(node)

3192

state[b'skipread'].add(node)

3195

3193

3196

def storageinfo(

3194

def storageinfo(

3197

self,

3195

self,

3198

exclusivefiles=False,

3196

exclusivefiles=False,

3199

sharedfiles=False,

3197

sharedfiles=False,

3200

revisionscount=False,

3198

revisionscount=False,

3201

trackedsize=False,

3199

trackedsize=False,

3202

storedsize=False,

3200

storedsize=False,

3203

):

3201

):

3204

d = {}

3202

d = {}

3205

3203

3206

if exclusivefiles:

3204

if exclusivefiles:

3207

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3205

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3208

if not self._inline:

3206

if not self._inline:

3209

d[b'exclusivefiles'].append((self.opener, self._datafile))

3207

d[b'exclusivefiles'].append((self.opener, self._datafile))

3210

3208

3211

if sharedfiles:

3209

if sharedfiles:

3212

d[b'sharedfiles'] = []

3210

d[b'sharedfiles'] = []

3213

3211

3214

if revisionscount:

3212

if revisionscount:

3215

d[b'revisionscount'] = len(self)

3213

d[b'revisionscount'] = len(self)

3216

3214

3217

if trackedsize:

3215

if trackedsize:

3218

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3216

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3219

3217

3220

if storedsize:

3218

if storedsize:

3221

d[b'storedsize'] = sum(

3219

d[b'storedsize'] = sum(

3222

self.opener.stat(path).st_size for path in self.files()

3220

self.opener.stat(path).st_size for path in self.files()

3223

)

3221

)

3224

3222

3225

return d

3223

return d

3226

3224

3227

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3225

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3228

if not self.hassidedata:

3226

if not self.hassidedata:

3229

return

3227

return

3230

# revlog formats with sidedata support does not support inline

3228

# revlog formats with sidedata support does not support inline

3231

assert not self._inline

3229

assert not self._inline

3232

if not helpers[1] and not helpers[2]:

3230

if not helpers[1] and not helpers[2]:

3233

# Nothing to generate or remove

3231

# Nothing to generate or remove

3234

return

3232

return

3235

3233

3236

new_entries = []

3234

new_entries = []

3237

# append the new sidedata

3235

# append the new sidedata

3238

with self._writing(transaction):

3236

with self._writing(transaction):

3239

ifh, dfh, sdfh = self._writinghandles

3237

ifh, dfh, sdfh = self._writinghandles

3240

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3238

dfh.seek(self._docket.sidedata_end, os.SEEK_SET)

3241

3239

3242

current_offset = sdfh.tell()

3240

current_offset = sdfh.tell()

3243

for rev in range(startrev, endrev + 1):

3241

for rev in range(startrev, endrev + 1):

3244

entry = self.index[rev]

3242

entry = self.index[rev]

3245

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3243

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3246

store=self,

3244

store=self,

3247

sidedata_helpers=helpers,

3245

sidedata_helpers=helpers,

3248

sidedata={},

3246

sidedata={},

3249

rev=rev,

3247

rev=rev,

3250

)

3248

)

3251

3249

3252

serialized_sidedata = sidedatautil.serialize_sidedata(

3250

serialized_sidedata = sidedatautil.serialize_sidedata(

3253

new_sidedata

3251

new_sidedata

3254

)

3252

)

3255

3253

3256

sidedata_compression_mode = COMP_MODE_INLINE

3254

sidedata_compression_mode = COMP_MODE_INLINE

3257

if serialized_sidedata and self.hassidedata:

3255

if serialized_sidedata and self.hassidedata:

3258

sidedata_compression_mode = COMP_MODE_PLAIN

3256

sidedata_compression_mode = COMP_MODE_PLAIN

3259

h, comp_sidedata = self.compress(serialized_sidedata)

3257

h, comp_sidedata = self.compress(serialized_sidedata)

3260

if (

3258

if (

3261

h != b'u'

3259

h != b'u'

3262

and comp_sidedata[0] != b'\0'

3260

and comp_sidedata[0] != b'\0'

3263

and len(comp_sidedata) < len(serialized_sidedata)

3261

and len(comp_sidedata) < len(serialized_sidedata)

3264

):

3262

):

3265

assert not h

3263

assert not h

3266

if (

3264

if (

3267

comp_sidedata[0]

3265

comp_sidedata[0]

3268

== self._docket.default_compression_header

3266

== self._docket.default_compression_header

3269

):

3267

):

3270

sidedata_compression_mode = COMP_MODE_DEFAULT

3268

sidedata_compression_mode = COMP_MODE_DEFAULT

3271

serialized_sidedata = comp_sidedata

3269

serialized_sidedata = comp_sidedata

3272

else:

3270

else:

3273

sidedata_compression_mode = COMP_MODE_INLINE

3271

sidedata_compression_mode = COMP_MODE_INLINE

3274

serialized_sidedata = comp_sidedata

3272

serialized_sidedata = comp_sidedata

3275

if entry[8] != 0 or entry[9] != 0:

3273

if entry[8] != 0 or entry[9] != 0:

3276

# rewriting entries that already have sidedata is not

3274

# rewriting entries that already have sidedata is not

3277

# supported yet, because it introduces garbage data in the

3275

# supported yet, because it introduces garbage data in the

3278

# revlog.

3276

# revlog.

3279

msg = b"rewriting existing sidedata is not supported yet"

3277

msg = b"rewriting existing sidedata is not supported yet"

3280

raise error.Abort(msg)

3278

raise error.Abort(msg)

3281

3279

3282

# Apply (potential) flags to add and to remove after running

3280

# Apply (potential) flags to add and to remove after running

3283

# the sidedata helpers

3281

# the sidedata helpers

3284

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3282

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3285

entry_update = (

3283

entry_update = (

3286

current_offset,

3284

current_offset,

3287

len(serialized_sidedata),

3285

len(serialized_sidedata),

3288

new_offset_flags,

3286

new_offset_flags,

3289

sidedata_compression_mode,

3287

sidedata_compression_mode,

3290

)

3288

)

3291

3289

3292

# the sidedata computation might have move the file cursors around

3290

# the sidedata computation might have move the file cursors around

3293

sdfh.seek(current_offset, os.SEEK_SET)

3291

sdfh.seek(current_offset, os.SEEK_SET)

3294

sdfh.write(serialized_sidedata)

3292

sdfh.write(serialized_sidedata)

3295

new_entries.append(entry_update)

3293

new_entries.append(entry_update)

3296

current_offset += len(serialized_sidedata)

3294

current_offset += len(serialized_sidedata)

3297

self._docket.sidedata_end = sdfh.tell()

3295

self._docket.sidedata_end = sdfh.tell()

3298

3296

3299

# rewrite the new index entries

3297

# rewrite the new index entries

3300

ifh.seek(startrev * self.index.entry_size)

3298

ifh.seek(startrev * self.index.entry_size)

3301

for i, e in enumerate(new_entries):

3299

for i, e in enumerate(new_entries):

3302

rev = startrev + i

3300

rev = startrev + i

3303

self.index.replace_sidedata_info(rev, *e)

3301

self.index.replace_sidedata_info(rev, *e)

3304

packed = self.index.entry_binary(rev)

3302

packed = self.index.entry_binary(rev)

3305

if rev == 0 and self._docket is None:

3303

if rev == 0 and self._docket is None:

3306

header = self._format_flags | self._format_version

3304

header = self._format_flags | self._format_version

3307

header = self.index.pack_header(header)

3305

header = self.index.pack_header(header)

3308

packed = header + packed

3306

packed = header + packed

3309

ifh.write(packed)

3307

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             # coding: utf8
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             import binascii
             import collections
             import contextlib
             import errno
             import io
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .pycompat import getattr
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 ENTRY_RANK,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 RANK_UNKNOWN,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 revlogutils,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 randomaccessfile,
                 revlogv0,
                 rewrite,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             CHANGELOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
                 parsers, 'BaseIndexObject'
             )
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
                 return index, cache
             if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             FILE_TOO_SHORT_MSG = _(
                 b'cannot read from revlog %s;'
                 b'  expected %d bytes from offset %d, data size is %d'
             )
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 See mercurial/revlogutils/contants.py for details about the content of an
                 index entry.
                 """
                 _flagserrorclass = error.RevlogError
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._sidedatafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 3-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
-                    for flag, processor in pycompat.iteritems(
+                    for flag, processor in opts.get(b'flagprocessors', {}).items():
-                        opts.get(b'flagprocessors', {})
-                    ):
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b''
                 def _loadindex(self, docket=None):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     else:
                         entry_point = b'%s.i' % self.radix
                     if docket is not None:
                         self._docket = docket
                         self._docket_file = entry_point
                     else:
                         entry_data = b''
                         self._initempty = True
                         entry_data = self._get_data(entry_point, mmapindexthreshold)
                         if len(entry_data) > 0:
                             header = INDEX_HEADER.unpack(entry_data[:4])[0]
                             self._initempty = False
                         else:
                             header = new_header
                         self._format_flags = header & ~0xFFFF
                         self._format_version = header & 0xFFFF
                         supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                         if supported_flags is None:
                             msg = _(b'unknown version (%d) in revlog %s')
                             msg %= (self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         elif self._format_flags & ~supported_flags:
                             msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                             display_flag = self._format_flags >> 16
                             msg %= (display_flag, self._format_version, self.display_id)
                             raise error.RevlogError(msg)
                         features = FEATURES_BY_VERSION[self._format_version]
                         self._inline = features[b'inline'](self._format_flags)
                         self._generaldelta = features[b'generaldelta'](self._format_flags)
                         self.hassidedata = features[b'sidedata']
                         if not features[b'docket']:
                             self._indexfile = entry_point
                             index_data = entry_data
                         else:
                             self._docket_file = entry_point
                             if self._initempty:
                                 self._docket = docketutil.default_docket(self, header)
                             else:
                                 self._docket = docketutil.parse_docket(
                                     self, entry_data, use_pending=self._trypending
                                 )
                     if self._docket is not None:
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self._docket is not None:
                         self._datafile = self._docket.data_filepath()
                         self._sidedatafile = self._docket.sidedata_filepath()
                     elif self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and util.safehasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index = index
                     self._segmentfile = randomaccessfile.randomaccessfile(
                         self.opener,
                         (self._indexfile if self._inline else self._datafile),
                         self._chunkcachesize,
                         chunkcache,
                     )
                     self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
                         self.opener,
                         self._sidedatafile,
                         self._chunkcachesize,
                     )
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     # Maybe we should build a user facing representation of
                     # revlog.target instead of using `self.radix`
                     return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _sidedatareadfp(self):
                     """file object suitable to read sidedata"""
                     if self._writinghandles:
                         yield self._writinghandles[2]
                     else:
                         with self.opener(self._sidedatafile) as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and util.safehasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def sidedata_cut_off(self, rev):
                     sd_cut_off = self.index[rev][8]
                     if sd_cut_off != 0:
                         return sd_cut_off
                     # This is some annoying dance, because entries without sidedata
                     # currently use 0 as their ofsset. (instead of previous-offset +
                     # previous-size)
                     #
                     # We should reconsider this sidedata → 0 sidata_offset policy.
                     # In the meantime, we need this.
                     while 0 <= rev:
                         e = self.index[rev]
                         if e[9] != 0:
                             return e[8] + e[9]
                         rev -= 1
                     return 0
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev))
                 def fast_rank(self, rev):
                     """Return the rank of a revision if already known, or None otherwise.
                     The rank of a revision is the size of the sub-graph it defines as a
                     head. Equivalently, the rank of a revision `r` is the size of the set
                     `ancestors(r)`, `r` included.
                     This method returns the rank retrieved from the revlog in constant
                     time. It makes no attempt at computing unknown values for versions of
                     the revlog which do not persist the rank.
                     """
                     rank = self.index[rev][ENTRY_RANK]
                     if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
                         return None
                     if rev == nullrev:
                         return 0  # convention
                     return rank
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7]  # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.items() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 ambiguous = True
                             else:
                                 return partial
                         elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             ambiguous = True
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if ambiguous:
                         raise error.AmbiguousPrefixLookupError(
                             id, self.display_id, _(b'ambiguous identifier')
                         )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[: l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._segmentfile.read_chunk(start, length, df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = b'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, b'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def revision(self, nodeorrev, _df=None):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df)
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                     else:
                         rev = self.rev(nodeorrev)
                     return self._sidedata(rev)
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b""
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     if self._docket.sidedata_end < sidedata_offset + sidedata_size:
                         filename = self._sidedatafile
                         end = self._docket.sidedata_end
                         offset = sidedata_offset
                         length = sidedata_size
                         m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
                         raise error.RevlogError(m)
                     comp_segment = self._segmentfile_sidedata.read_chunk(
                         sidedata_offset, sidedata_size
                     )
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = b'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 def _enforceinlinesize(self, tr):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     trindex = None
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                         self._segmentfile.writing_handle = None
                         # No need to deal with sidedata writing handle as it is only
                         # relevant with revlog-v2 which is never inline, not reaching
                         # this code
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                                 if (
                                     trindex is None
                                     and troffset
                                     <= self.start(r) + r * self.index.entry_size
                                 ):
                                     trindex = r
                             new_dfh.flush()
                         if trindex is None:
                             trindex = 0
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # There is a small transactional race here. If the rename of
                             # the index fails, we should remove the datafile. It is more
                             # important to ensure that the data file is not truncated
                             # when the index is replaced as otherwise data is lost.
                             tr.replace(self._datafile, self.start(trindex))
                             # the temp file replace the real index when we exit the context
                             # manager
                         tr.replace(self._indexfile, trindex * self.index.entry_size)
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._segmentfile = randomaccessfile.randomaccessfile(
                             self.opener,
                             self._datafile,
                             self._chunkcachesize,
                         )
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh, None)
                             self._segmentfile.writing_handle = new_dfh
                             new_dfh = None
                             # No need to deal with sidedata writing handle as it is only
                             # relevant with revlog-v2 which is never inline, not reaching
                             # this code
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def reading(self):
                     """Context manager that keeps data and sidedata files open for reading"""
                     with self._segmentfile.reading():
                         with self._segmentfile_sidedata.reading():
                             yield
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         ifh = dfh = sdfh = None
                         try:
                             r = len(self)
                             # opening the data file.
                             dsize = 0
                             if r:
                                 dsize = self.end(r - 1)
                             dfh = None
                             if not self._inline:
                                 try:
                                     dfh = self._datafp(b"r+")
                                     if self._docket is None:
                                         dfh.seek(0, os.SEEK_END)
                                     else:
                                         dfh.seek(self._docket.data_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     dfh = self._datafp(b"w+")
                                 transaction.add(self._datafile, dsize)
                             if self._sidedatafile is not None:
                                 # revlog-v2 does not inline, help Pytype
                                 assert dfh is not None
                                 try:
                                     sdfh = self.opener(self._sidedatafile, mode=b"r+")
                                     dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     sdfh = self.opener(self._sidedatafile, mode=b"w+")
                                 transaction.add(
                                     self._sidedatafile, self._docket.sidedata_end
                                 )
                             # opening the index file.
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             # exposing all file handle for writing.
                             self._writinghandles = (ifh, dfh, sdfh)
                             self._segmentfile.writing_handle = ifh if self._inline else dfh
                             self._segmentfile_sidedata.writing_handle = sdfh
                             yield
                             if self._docket is not None:
                                 self._write_docket(transaction)
                         finally:
                             self._writinghandles = None
                             self._segmentfile.writing_handle = None
                             self._segmentfile_sidedata.writing_handle = None
                             if dfh is not None:
                                 dfh.close()
                             if sdfh is not None:
                                 sdfh.close()
                             # closing the index file last to avoid exposing referent to
                             # potential unflushed data content.
                             if ifh is not None:
                                 ifh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh, sdfh = self._writinghandles
                         # XXX no checking for the sidedata file
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = revlogutils.revisioninfo(
                         node,
                         p1,
                         p2,
                         btext,
                         textlen,
                         cachedelta,
                         flags,
                     )
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         default_comp = self._docket.default_compression_header
                         r = deltautil.delta_compression(default_comp, deltainfo)
                         compression_mode, deltainfo = r
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = self._docket.sidedata_end
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     rank = RANK_UNKNOWN
                     if self._format_version == CHANGELOGV2:
                         if (p1r, p2r) == (nullrev, nullrev):
                             rank = 1
                         elif p1r != nullrev and p2r == nullrev:
                             rank = 1 + self.fast_rank(p1r)
                         elif p1r == nullrev and p2r != nullrev:
                             rank = 1 + self.fast_rank(p2r)
                         else:  # merge node
                             if rustdagop is not None and self.index.rust_ext_compat:
                                 rank = rustdagop.rank(self.index, p1r, p2r)
                             else:
                                 pmin, pmax = sorted((p1r, p2r))
                                 rank = 1 + self.fast_rank(pmax)
                                 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
                     e = revlogutils.entry(
                         flags=flags,
                         data_offset=offset,
                         data_compressed_length=deltainfo.deltalen,
                         data_uncompressed_length=textlen,
                         data_compression_mode=compression_mode,
                         data_delta_base=deltainfo.base,
                         link_rev=link,
                         parent_rev_1=p1r,
                         parent_rev_2=p2r,
                         node_id=node,
                         sidedata_offset=sidedata_offset,
                         sidedata_compressed_length=len(serialized_sidedata),
                         sidedata_compression_mode=sidedata_compression_mode,
                         rank=rank,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                         sidedata_offset,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(
                     self, transaction, entry, data, link, offset, sidedata, sidedata_offset
                 ):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh, sdfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     if sdfh:
                         sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         if self._sidedatafile:
                             transaction.add(self._sidedatafile, sidedata_offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             sdfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         assert not sidedata
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         # revlog-v2 always has 3 writing handles, help Pytype
                         wh1 = self._writinghandles[0]
                         wh2 = self._writinghandles[1]
                         wh3 = self._writinghandles[2]
                         assert wh1 is not None
                         assert wh2 is not None
                         assert wh3 is not None
                         self._docket.index_end = wh1.tell()
                         self._docket.data_end = wh2.tell()
                         self._docket.sidedata_end = wh3.tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             deltacomputer = deltautil.deltacomputer(self)
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     if self._sidedatafile:
                         sidedata_end = self.sidedata_cut_off(rev)
                         transaction.add(self._sidedatafile, sidedata_end)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.sidedata_end = sidedata_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._segmentfile.clear_cache()
                     self._segmentfile_sidedata.clear_cache()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if self._docket_file is None:
                         if not self._inline:
                             res.append(self._datafile)
                     else:
                         res.append(self._docket_file)
                         res.extend(self._docket.old_index_filepaths(include_empty=False))
                         if self._docket.data_end:
                             res.append(self._datafile)
                         res.extend(self._docket.old_data_filepaths(include_empty=False))
                         if self._docket.sidedata_end:
                             res.append(self._sidedatafile)
                         res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     deltacomputer = deltautil.deltacomputer(destrevlog)
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text = self._revisiondata(rev)
                             sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext = self._revisiondata(rev)
                                 sidedata = self.sidedata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     elif self._format_version == REVLOGV1:
                         rewrite.v1_censor(self, tr, censornode, tombstone)
                     else:
                         rewrite.v2_censor(self, tr, censornode, tombstone)
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh, sdfh = self._writinghandles
                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                         current_offset = sdfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             sdfh.seek(current_offset, os.SEEK_SET)
                             sdfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             self._docket.sidedata_end = sdfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)