upstream/mercurial-mirror Commit - r48074:93a0abe0

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import contextlib

18

import contextlib

19

import errno

19

import errno

20

import io

20

import io

21

import os

21

import os

22

import struct

22

import struct

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullrev,

29

nullrev,

30

sha1nodeconstants,

30

sha1nodeconstants,

31

short,

31

short,

32

wdirrev,

32

wdirrev,

33

)

33

)

34

from .i18n import _

34

from .i18n import _

35

from .pycompat import getattr

35

from .pycompat import getattr

36

from .revlogutils.constants import (

36

from .revlogutils.constants import (

37

ALL_KINDS,

37

ALL_KINDS,

38

CHANGELOGV2,

38

CHANGELOGV2,

39

COMP_MODE_DEFAULT,

39

COMP_MODE_DEFAULT,

40

COMP_MODE_INLINE,

40

COMP_MODE_INLINE,

41

COMP_MODE_PLAIN,

41

COMP_MODE_PLAIN,

42

FEATURES_BY_VERSION,

42

FEATURES_BY_VERSION,

43

FLAG_GENERALDELTA,

43

FLAG_GENERALDELTA,

44

FLAG_INLINE_DATA,

44

FLAG_INLINE_DATA,

45

INDEX_HEADER,

45

INDEX_HEADER,

46

KIND_CHANGELOG,

46

KIND_CHANGELOG,

47

REVLOGV0,

47

REVLOGV0,

48

REVLOGV1,

48

REVLOGV1,

49

REVLOGV1_FLAGS,

49

REVLOGV1_FLAGS,

50

REVLOGV2,

50

REVLOGV2,

51

REVLOGV2_FLAGS,

51

REVLOGV2_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FORMAT,

53

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_VERSION,

54

REVLOG_DEFAULT_VERSION,

55

SUPPORTED_FLAGS,

55

SUPPORTED_FLAGS,

56

)

56

)

57

from .revlogutils.flagutil import (

57

from .revlogutils.flagutil import (

58

REVIDX_DEFAULT_FLAGS,

58

REVIDX_DEFAULT_FLAGS,

59

REVIDX_ELLIPSIS,

59

REVIDX_ELLIPSIS,

60

REVIDX_EXTSTORED,

60

REVIDX_EXTSTORED,

61

REVIDX_FLAGS_ORDER,

61

REVIDX_FLAGS_ORDER,

62

REVIDX_HASCOPIESINFO,

62

REVIDX_HASCOPIESINFO,

63

REVIDX_ISCENSORED,

63

REVIDX_ISCENSORED,

64

REVIDX_RAWTEXT_CHANGING_FLAGS,

64

REVIDX_RAWTEXT_CHANGING_FLAGS,

65

)

65

)

66

from .thirdparty import attr

66

from .thirdparty import attr

67

from . import (

67

from . import (

68

ancestor,

68

ancestor,

69

dagop,

69

dagop,

70

error,

70

error,

71

mdiff,

71

mdiff,

72

policy,

72

policy,

73

pycompat,

73

pycompat,

74

templatefilters,

74

templatefilters,

75

util,

75

util,

76

)

76

)

77

from .interfaces import (

77

from .interfaces import (

78

repository,

78

repository,

79

util as interfaceutil,

79

util as interfaceutil,

80

)

80

)

81

from .revlogutils import (

81

from .revlogutils import (

82

deltas as deltautil,

82

deltas as deltautil,

83

docket as docketutil,

83

docket as docketutil,

84

flagutil,

84

flagutil,

85

nodemap as nodemaputil,

85

nodemap as nodemaputil,

86

revlogv0,

86

revlogv0,

87

sidedata as sidedatautil,

87

sidedata as sidedatautil,

88

)

88

)

89

from .utils import (

89

from .utils import (

90

storageutil,

90

storageutil,

91

stringutil,

91

stringutil,

92

)

92

)

93

94

# blanked usage of all the name to prevent pyflakes constraints

94

# blanked usage of all the name to prevent pyflakes constraints

95

# We need these name available in the module for extensions.

95

# We need these name available in the module for extensions.

96

97

REVLOGV0

97

REVLOGV0

98

REVLOGV1

98

REVLOGV1

99

REVLOGV2

99

REVLOGV2

100

FLAG_INLINE_DATA

100

FLAG_INLINE_DATA

101

FLAG_GENERALDELTA

101

FLAG_GENERALDELTA

102

REVLOG_DEFAULT_FLAGS

102

REVLOG_DEFAULT_FLAGS

103

REVLOG_DEFAULT_FORMAT

103

REVLOG_DEFAULT_FORMAT

104

REVLOG_DEFAULT_VERSION

104

REVLOG_DEFAULT_VERSION

105

REVLOGV1_FLAGS

105

REVLOGV1_FLAGS

106

REVLOGV2_FLAGS

106

REVLOGV2_FLAGS

107

REVIDX_ISCENSORED

107

REVIDX_ISCENSORED

108

REVIDX_ELLIPSIS

108

REVIDX_ELLIPSIS

109

REVIDX_HASCOPIESINFO

109

REVIDX_HASCOPIESINFO

110

REVIDX_EXTSTORED

110

REVIDX_EXTSTORED

111

REVIDX_DEFAULT_FLAGS

111

REVIDX_DEFAULT_FLAGS

112

REVIDX_FLAGS_ORDER

112

REVIDX_FLAGS_ORDER

113

REVIDX_RAWTEXT_CHANGING_FLAGS

113

REVIDX_RAWTEXT_CHANGING_FLAGS

114

115

parsers = policy.importmod('parsers')

115

parsers = policy.importmod('parsers')

116

rustancestor = policy.importrust('ancestor')

116

rustancestor = policy.importrust('ancestor')

117

rustdagop = policy.importrust('dagop')

117

rustdagop = policy.importrust('dagop')

118

rustrevlog = policy.importrust('revlog')

118

rustrevlog = policy.importrust('revlog')

119

120

# Aliased for performance.

120

# Aliased for performance.

121

_zlibdecompress = zlib.decompress

121

_zlibdecompress = zlib.decompress

122

123

# max size of revlog with inline data

123

# max size of revlog with inline data

124

_maxinline = 131072

124

_maxinline = 131072

125

_chunksize = 1048576

125

_chunksize = 1048576

126

127

# Flag processors for REVIDX_ELLIPSIS.

127

# Flag processors for REVIDX_ELLIPSIS.

128

def ellipsisreadprocessor(rl, text):

128

def ellipsisreadprocessor(rl, text):

129

return text, False

129

return text, False

130

131

132

def ellipsiswriteprocessor(rl, text):

132

def ellipsiswriteprocessor(rl, text):

133

return text, False

133

return text, False

134

135

136

def ellipsisrawprocessor(rl, text):

136

def ellipsisrawprocessor(rl, text):

137

return False

137

return False

138

139

140

ellipsisprocessor = (

140

ellipsisprocessor = (

141

ellipsisreadprocessor,

141

ellipsisreadprocessor,

142

ellipsiswriteprocessor,

142

ellipsiswriteprocessor,

143

ellipsisrawprocessor,

143

ellipsisrawprocessor,

144

)

144

)

145

146

147

def offset_type(offset, type):

147

def offset_type(offset, type):

148

if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:

148

if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:

149

raise ValueError(b'unknown revlog index flags')

149

raise ValueError(b'unknown revlog index flags')

150

return int(int(offset) << 16 | type)

150

return int(int(offset) << 16 | type)

151

152

153

def _verify_revision(rl, skipflags, state, node):

153

def _verify_revision(rl, skipflags, state, node):

154

"""Verify the integrity of the given revlog ``node`` while providing a hook

154

"""Verify the integrity of the given revlog ``node`` while providing a hook

155

point for extensions to influence the operation."""

155

point for extensions to influence the operation."""

156

if skipflags:

156

if skipflags:

157

state[b'skipread'].add(node)

157

state[b'skipread'].add(node)

158

else:

158

else:

159

# Side-effect: read content and verify hash.

159

# Side-effect: read content and verify hash.

160

rl.revision(node)

160

rl.revision(node)

161

162

163

# True if a fast implementation for persistent-nodemap is available

163

# True if a fast implementation for persistent-nodemap is available

164

#

164

#

165

# We also consider we have a "fast" implementation in "pure" python because

165

# We also consider we have a "fast" implementation in "pure" python because

166

# people using pure don't really have performance consideration (and a

166

# people using pure don't really have performance consideration (and a

167

# wheelbarrow of other slowness source)

167

# wheelbarrow of other slowness source)

168

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

168

HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(

169

parsers, 'BaseIndexObject'

169

parsers, 'BaseIndexObject'

170

)

170

)

171

172

173

@attr.s(slots=True, frozen=True)

173

@attr.s(slots=True, frozen=True)

174

class _revisioninfo(object):

174

class _revisioninfo(object):

175

"""Information about a revision that allows building its fulltext

175

"""Information about a revision that allows building its fulltext

176

node: expected hash of the revision

176

node: expected hash of the revision

177

p1, p2: parent revs of the revision

177

p1, p2: parent revs of the revision

178

btext: built text cache consisting of a one-element list

178

btext: built text cache consisting of a one-element list

179

cachedelta: (baserev, uncompressed_delta) or None

179

cachedelta: (baserev, uncompressed_delta) or None

180

flags: flags associated to the revision storage

180

flags: flags associated to the revision storage

181

182

One of btext[0] or cachedelta must be set.

182

One of btext[0] or cachedelta must be set.

183

"""

183

"""

184

185

node = attr.ib()

185

node = attr.ib()

186

p1 = attr.ib()

186

p1 = attr.ib()

187

p2 = attr.ib()

187

p2 = attr.ib()

188

btext = attr.ib()

188

btext = attr.ib()

189

textlen = attr.ib()

189

textlen = attr.ib()

190

cachedelta = attr.ib()

190

cachedelta = attr.ib()

191

flags = attr.ib()

191

flags = attr.ib()

192

193

194

@interfaceutil.implementer(repository.irevisiondelta)

194

@interfaceutil.implementer(repository.irevisiondelta)

195

@attr.s(slots=True)

195

@attr.s(slots=True)

196

class revlogrevisiondelta(object):

196

class revlogrevisiondelta(object):

197

node = attr.ib()

197

node = attr.ib()

198

p1node = attr.ib()

198

p1node = attr.ib()

199

p2node = attr.ib()

199

p2node = attr.ib()

200

basenode = attr.ib()

200

basenode = attr.ib()

201

flags = attr.ib()

201

flags = attr.ib()

202

baserevisionsize = attr.ib()

202

baserevisionsize = attr.ib()

203

revision = attr.ib()

203

revision = attr.ib()

204

delta = attr.ib()

204

delta = attr.ib()

205

sidedata = attr.ib()

205

sidedata = attr.ib()

206

protocol_flags = attr.ib()

206

protocol_flags = attr.ib()

207

linknode = attr.ib(default=None)

207

linknode = attr.ib(default=None)

208

209

210

@interfaceutil.implementer(repository.iverifyproblem)

210

@interfaceutil.implementer(repository.iverifyproblem)

211

@attr.s(frozen=True)

211

@attr.s(frozen=True)

212

class revlogproblem(object):

212

class revlogproblem(object):

213

warning = attr.ib(default=None)

213

warning = attr.ib(default=None)

214

error = attr.ib(default=None)

214

error = attr.ib(default=None)

215

node = attr.ib(default=None)

215

node = attr.ib(default=None)

216

217

218

def parse_index_v1(data, inline):

218

def parse_index_v1(data, inline):

219

# call the C implementation to parse the index data

219

# call the C implementation to parse the index data

220

index, cache = parsers.parse_index2(data, inline)

220

index, cache = parsers.parse_index2(data, inline)

221

return index, cache

221

return index, cache

222

223

224

def parse_index_v2(data, inline):

224

def parse_index_v2(data, inline):

225

# call the C implementation to parse the index data

225

# call the C implementation to parse the index data

226

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

226

index, cache = parsers.parse_index2(data, inline, revlogv2=True)

227

return index, cache

227

return index, cache

228

229

230

def parse_index_cl_v2(data, inline):

230

def parse_index_cl_v2(data, inline):

231

# call the C implementation to parse the index data

231

# call the C implementation to parse the index data

232

assert not inline

232

assert not inline

233

from .pure.parsers import parse_index_cl_v2

233

from .pure.parsers import parse_index_cl_v2

234

235

index, cache = parse_index_cl_v2(data)

235

index, cache = parse_index_cl_v2(data)

236

return index, cache

236

return index, cache

237

238

239

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

239

if util.safehasattr(parsers, 'parse_index_devel_nodemap'):

240

241

def parse_index_v1_nodemap(data, inline):

241

def parse_index_v1_nodemap(data, inline):

242

index, cache = parsers.parse_index_devel_nodemap(data, inline)

242

index, cache = parsers.parse_index_devel_nodemap(data, inline)

243

return index, cache

243

return index, cache

244

245

246

else:

246

else:

247

parse_index_v1_nodemap = None

247

parse_index_v1_nodemap = None

248

249

250

def parse_index_v1_mixed(data, inline):

250

def parse_index_v1_mixed(data, inline):

251

index, cache = parse_index_v1(data, inline)

251

index, cache = parse_index_v1(data, inline)

252

return rustrevlog.MixedIndex(index), cache

252

return rustrevlog.MixedIndex(index), cache

253

254

255

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

255

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

256

# signed integer)

256

# signed integer)

257

_maxentrysize = 0x7FFFFFFF

257

_maxentrysize = 0x7FFFFFFF

258

259

260

class revlog(object):

260

class revlog(object):

261

"""

261

"""

262

the underlying revision storage object

262

the underlying revision storage object

263

264

A revlog consists of two parts, an index and the revision data.

264

A revlog consists of two parts, an index and the revision data.

265

266

The index is a file with a fixed record size containing

266

The index is a file with a fixed record size containing

267

information on each revision, including its nodeid (hash), the

267

information on each revision, including its nodeid (hash), the

268

nodeids of its parents, the position and offset of its data within

268

nodeids of its parents, the position and offset of its data within

269

the data file, and the revision it's based on. Finally, each entry

269

the data file, and the revision it's based on. Finally, each entry

270

contains a linkrev entry that can serve as a pointer to external

270

contains a linkrev entry that can serve as a pointer to external

271

data.

271

data.

272

273

The revision data itself is a linear collection of data chunks.

273

The revision data itself is a linear collection of data chunks.

274

Each chunk represents a revision and is usually represented as a

274

Each chunk represents a revision and is usually represented as a

275

delta against the previous chunk. To bound lookup time, runs of

275

delta against the previous chunk. To bound lookup time, runs of

276

deltas are limited to about 2 times the length of the original

276

deltas are limited to about 2 times the length of the original

277

version data. This makes retrieval of a version proportional to

277

version data. This makes retrieval of a version proportional to

278

its size, or O(1) relative to the number of revisions.

278

its size, or O(1) relative to the number of revisions.

279

280

Both pieces of the revlog are written to in an append-only

280

Both pieces of the revlog are written to in an append-only

281

fashion, which means we never need to rewrite a file to insert or

281

fashion, which means we never need to rewrite a file to insert or

282

remove data, and can use some simple techniques to avoid the need

282

remove data, and can use some simple techniques to avoid the need

283

for locking while reading.

283

for locking while reading.

284

285

If checkambig, indexfile is opened with checkambig=True at

285

If checkambig, indexfile is opened with checkambig=True at

286

writing, to avoid file stat ambiguity.

286

writing, to avoid file stat ambiguity.

287

288

If mmaplargeindex is True, and an mmapindexthreshold is set, the

288

If mmaplargeindex is True, and an mmapindexthreshold is set, the

289

index will be mmapped rather than read if it is larger than the

289

index will be mmapped rather than read if it is larger than the

290

configured threshold.

290

configured threshold.

291

292

If censorable is True, the revlog can have censored revisions.

292

If censorable is True, the revlog can have censored revisions.

293

294

If `upperboundcomp` is not None, this is the expected maximal gain from

294

If `upperboundcomp` is not None, this is the expected maximal gain from

295

compression for the data content.

295

compression for the data content.

296

297

`concurrencychecker` is an optional function that receives 3 arguments: a

297

`concurrencychecker` is an optional function that receives 3 arguments: a

298

file handle, a filename, and an expected position. It should check whether

298

file handle, a filename, and an expected position. It should check whether

299

the current position in the file handle is valid, and log/warn/fail (by

299

the current position in the file handle is valid, and log/warn/fail (by

300

raising).

300

raising).

301

302

303

Internal details

303

Internal details

304

----------------

304

----------------

305

306

A large part of the revlog logic deals with revisions' "index entries", tuple

306

A large part of the revlog logic deals with revisions' "index entries", tuple

307

objects that contains the same "items" whatever the revlog version.

307

objects that contains the same "items" whatever the revlog version.

308

Different versions will have different ways of storing these items (sometimes

308

Different versions will have different ways of storing these items (sometimes

309

not having them at all), but the tuple will always be the same. New fields

309

not having them at all), but the tuple will always be the same. New fields

310

are usually added at the end to avoid breaking existing code that relies

310

are usually added at the end to avoid breaking existing code that relies

311

on the existing order. The field are defined as follows:

311

on the existing order. The field are defined as follows:

312

313

[0] offset:

313

[0] offset:

314

The byte index of the start of revision data chunk.

314

The byte index of the start of revision data chunk.

315

That value is shifted up by 16 bits. use "offset = field >> 16" to

315

That value is shifted up by 16 bits. use "offset = field >> 16" to

316

retrieve it.

316

retrieve it.

317

318

flags:

318

flags:

319

A flag field that carries special information or changes the behavior

319

A flag field that carries special information or changes the behavior

320

of the revision. (see `REVIDX_*` constants for details)

320

of the revision. (see `REVIDX_*` constants for details)

321

The flag field only occupies the first 16 bits of this field,

321

The flag field only occupies the first 16 bits of this field,

322

use "flags = field & 0xFFFF" to retrieve the value.

322

use "flags = field & 0xFFFF" to retrieve the value.

323

324

[1] compressed length:

324

[1] compressed length:

325

The size, in bytes, of the chunk on disk

325

The size, in bytes, of the chunk on disk

326

327

[2] uncompressed length:

327

[2] uncompressed length:

328

The size, in bytes, of the full revision once reconstructed.

328

The size, in bytes, of the full revision once reconstructed.

329

330

[3] base rev:

330

[3] base rev:

331

Either the base of the revision delta chain (without general

331

Either the base of the revision delta chain (without general

332

delta), or the base of the delta (stored in the data chunk)

332

delta), or the base of the delta (stored in the data chunk)

333

with general delta.

333

with general delta.

334

335

[4] link rev:

335

[4] link rev:

336

Changelog revision number of the changeset introducing this

336

Changelog revision number of the changeset introducing this

337

revision.

337

revision.

338

339

[5] parent 1 rev:

339

[5] parent 1 rev:

340

Revision number of the first parent

340

Revision number of the first parent

341

342

[6] parent 2 rev:

342

[6] parent 2 rev:

343

Revision number of the second parent

343

Revision number of the second parent

344

345

[7] node id:

345

[7] node id:

346

The node id of the current revision

346

The node id of the current revision

347

348

[8] sidedata offset:

348

[8] sidedata offset:

349

The byte index of the start of the revision's side-data chunk.

349

The byte index of the start of the revision's side-data chunk.

350

351

[9] sidedata chunk length:

351

[9] sidedata chunk length:

352

The size, in bytes, of the revision's side-data chunk.

352

The size, in bytes, of the revision's side-data chunk.

353

354

[10] data compression mode:

354

[10] data compression mode:

355

two bits that detail the way the data chunk is compressed on disk.

355

two bits that detail the way the data chunk is compressed on disk.

356

(see "COMP_MODE_*" constants for details). For revlog version 0 and

356

(see "COMP_MODE_*" constants for details). For revlog version 0 and

357

1 this will always be COMP_MODE_INLINE.

357

1 this will always be COMP_MODE_INLINE.

358

359

[11] side-data compression mode:

359

[11] side-data compression mode:

360

two bits that detail the way the sidedata chunk is compressed on disk.

360

two bits that detail the way the sidedata chunk is compressed on disk.

361

(see "COMP_MODE_*" constants for details)

361

(see "COMP_MODE_*" constants for details)

362

"""

362

"""

363

364

_flagserrorclass = error.RevlogError

364

_flagserrorclass = error.RevlogError

365

366

def __init__(

366

def __init__(

367

self,

367

self,

368

opener,

368

opener,

369

target,

369

target,

370

radix,

370

radix,

371

postfix=None, # only exist for `tmpcensored` now

371

postfix=None, # only exist for `tmpcensored` now

372

checkambig=False,

372

checkambig=False,

373

mmaplargeindex=False,

373

mmaplargeindex=False,

374

censorable=False,

374

censorable=False,

375

upperboundcomp=None,

375

upperboundcomp=None,

376

persistentnodemap=False,

376

persistentnodemap=False,

377

concurrencychecker=None,

377

concurrencychecker=None,

378

trypending=False,

378

trypending=False,

379

):

379

):

380

"""

380

"""

381

create a revlog object

381

create a revlog object

382

383

opener is a function that abstracts the file opening operation

383

opener is a function that abstracts the file opening operation

384

and can be used to implement COW semantics or the like.

384

and can be used to implement COW semantics or the like.

385

386

`target`: a (KIND, ID) tuple that identify the content stored in

386

`target`: a (KIND, ID) tuple that identify the content stored in

387

this revlog. It help the rest of the code to understand what the revlog

387

this revlog. It help the rest of the code to understand what the revlog

388

is about without having to resort to heuristic and index filename

388

is about without having to resort to heuristic and index filename

389

analysis. Note: that this must be reliably be set by normal code, but

389

analysis. Note: that this must be reliably be set by normal code, but

390

that test, debug, or performance measurement code might not set this to

390

that test, debug, or performance measurement code might not set this to

391

accurate value.

391

accurate value.

392

"""

392

"""

393

self.upperboundcomp = upperboundcomp

393

self.upperboundcomp = upperboundcomp

394

395

self.radix = radix

395

self.radix = radix

396

397

self._docket_file = None

397

self._docket_file = None

398

self._indexfile = None

398

self._indexfile = None

399

self._datafile = None

399

self._datafile = None

400

self._nodemap_file = None

400

self._nodemap_file = None

401

self.postfix = postfix

401

self.postfix = postfix

402

self._trypending = trypending

402

self._trypending = trypending

403

self.opener = opener

403

self.opener = opener

404

if persistentnodemap:

404

if persistentnodemap:

405

self._nodemap_file = nodemaputil.get_nodemap_file(self)

405

self._nodemap_file = nodemaputil.get_nodemap_file(self)

406

407

assert target[0] in ALL_KINDS

407

assert target[0] in ALL_KINDS

408

assert len(target) == 2

408

assert len(target) == 2

409

self.target = target

409

self.target = target

410

# When True, indexfile is opened with checkambig=True at writing, to

410

# When True, indexfile is opened with checkambig=True at writing, to

411

# avoid file stat ambiguity.

411

# avoid file stat ambiguity.

412

self._checkambig = checkambig

412

self._checkambig = checkambig

413

self._mmaplargeindex = mmaplargeindex

413

self._mmaplargeindex = mmaplargeindex

414

self._censorable = censorable

414

self._censorable = censorable

415

# 3-tuple of (node, rev, text) for a raw revision.

415

# 3-tuple of (node, rev, text) for a raw revision.

416

self._revisioncache = None

416

self._revisioncache = None

417

# Maps rev to chain base rev.

417

# Maps rev to chain base rev.

418

self._chainbasecache = util.lrucachedict(100)

418

self._chainbasecache = util.lrucachedict(100)

419

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

419

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

420

self._chunkcache = (0, b'')

420

self._chunkcache = (0, b'')

421

# How much data to read and cache into the raw revlog data cache.

421

# How much data to read and cache into the raw revlog data cache.

422

self._chunkcachesize = 65536

422

self._chunkcachesize = 65536

423

self._maxchainlen = None

423

self._maxchainlen = None

424

self._deltabothparents = True

424

self._deltabothparents = True

425

self.index = None

425

self.index = None

426

self._docket = None

426

self._docket = None

427

self._nodemap_docket = None

427

self._nodemap_docket = None

428

# Mapping of partial identifiers to full nodes.

428

# Mapping of partial identifiers to full nodes.

429

self._pcache = {}

429

self._pcache = {}

430

# Mapping of revision integer to full node.

430

# Mapping of revision integer to full node.

431

self._compengine = b'zlib'

431

self._compengine = b'zlib'

432

self._compengineopts = {}

432

self._compengineopts = {}

433

self._maxdeltachainspan = -1

433

self._maxdeltachainspan = -1

434

self._withsparseread = False

434

self._withsparseread = False

435

self._sparserevlog = False

435

self._sparserevlog = False

436

self.hassidedata = False

436

self.hassidedata = False

437

self._srdensitythreshold = 0.50

437

self._srdensitythreshold = 0.50

438

self._srmingapsize = 262144

438

self._srmingapsize = 262144

439

440

# Make copy of flag processors so each revlog instance can support

440

# Make copy of flag processors so each revlog instance can support

441

# custom flags.

441

# custom flags.

442

self._flagprocessors = dict(flagutil.flagprocessors)

442

self._flagprocessors = dict(flagutil.flagprocessors)

443

444

# 2-tuple of file handles being used for active writing.

444

# 2-tuple of file handles being used for active writing.

445

self._writinghandles = None

445

self._writinghandles = None

446

# prevent nesting of addgroup

446

# prevent nesting of addgroup

447

self._adding_group = None

447

self._adding_group = None

448

449

self._loadindex()

449

self._loadindex()

450

451

self._concurrencychecker = concurrencychecker

451

self._concurrencychecker = concurrencychecker

452

453

def _init_opts(self):

453

def _init_opts(self):

454

"""process options (from above/config) to setup associated default revlog mode

454

"""process options (from above/config) to setup associated default revlog mode

455

456

These values might be affected when actually reading on disk information.

456

These values might be affected when actually reading on disk information.

457

458

The relevant values are returned for use in _loadindex().

458

The relevant values are returned for use in _loadindex().

459

460

* newversionflags:

460

* newversionflags:

461

version header to use if we need to create a new revlog

461

version header to use if we need to create a new revlog

462

463

* mmapindexthreshold:

463

* mmapindexthreshold:

464

minimal index size for start to use mmap

464

minimal index size for start to use mmap

465

466

* force_nodemap:

466

* force_nodemap:

467

force the usage of a "development" version of the nodemap code

467

force the usage of a "development" version of the nodemap code

468

"""

468

"""

469

mmapindexthreshold = None

469

mmapindexthreshold = None

470

opts = self.opener.options

470

opts = self.opener.options

471

472

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

472

if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:

473

new_header = CHANGELOGV2

473

new_header = CHANGELOGV2

474

elif b'revlogv2' in opts:

474

elif b'revlogv2' in opts:

475

new_header = REVLOGV2

475

new_header = REVLOGV2

476

elif b'revlogv1' in opts:

476

elif b'revlogv1' in opts:

477

new_header = REVLOGV1 | FLAG_INLINE_DATA

477

new_header = REVLOGV1 | FLAG_INLINE_DATA

478

if b'generaldelta' in opts:

478

if b'generaldelta' in opts:

479

new_header |= FLAG_GENERALDELTA

479

new_header |= FLAG_GENERALDELTA

480

elif b'revlogv0' in self.opener.options:

480

elif b'revlogv0' in self.opener.options:

481

new_header = REVLOGV0

481

new_header = REVLOGV0

482

else:

482

else:

483

new_header = REVLOG_DEFAULT_VERSION

483

new_header = REVLOG_DEFAULT_VERSION

484

485

if b'chunkcachesize' in opts:

485

if b'chunkcachesize' in opts:

486

self._chunkcachesize = opts[b'chunkcachesize']

486

self._chunkcachesize = opts[b'chunkcachesize']

487

if b'maxchainlen' in opts:

487

if b'maxchainlen' in opts:

488

self._maxchainlen = opts[b'maxchainlen']

488

self._maxchainlen = opts[b'maxchainlen']

489

if b'deltabothparents' in opts:

489

if b'deltabothparents' in opts:

490

self._deltabothparents = opts[b'deltabothparents']

490

self._deltabothparents = opts[b'deltabothparents']

491

self._lazydelta = bool(opts.get(b'lazydelta', True))

491

self._lazydelta = bool(opts.get(b'lazydelta', True))

492

self._lazydeltabase = False

492

self._lazydeltabase = False

493

if self._lazydelta:

493

if self._lazydelta:

494

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

494

self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))

495

if b'compengine' in opts:

495

if b'compengine' in opts:

496

self._compengine = opts[b'compengine']

496

self._compengine = opts[b'compengine']

497

if b'zlib.level' in opts:

497

if b'zlib.level' in opts:

498

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

498

self._compengineopts[b'zlib.level'] = opts[b'zlib.level']

499

if b'zstd.level' in opts:

499

if b'zstd.level' in opts:

500

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

500

self._compengineopts[b'zstd.level'] = opts[b'zstd.level']

501

if b'maxdeltachainspan' in opts:

501

if b'maxdeltachainspan' in opts:

502

self._maxdeltachainspan = opts[b'maxdeltachainspan']

502

self._maxdeltachainspan = opts[b'maxdeltachainspan']

503

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

503

if self._mmaplargeindex and b'mmapindexthreshold' in opts:

504

mmapindexthreshold = opts[b'mmapindexthreshold']

504

mmapindexthreshold = opts[b'mmapindexthreshold']

505

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

505

self._sparserevlog = bool(opts.get(b'sparse-revlog', False))

506

withsparseread = bool(opts.get(b'with-sparse-read', False))

506

withsparseread = bool(opts.get(b'with-sparse-read', False))

507

# sparse-revlog forces sparse-read

507

# sparse-revlog forces sparse-read

508

self._withsparseread = self._sparserevlog or withsparseread

508

self._withsparseread = self._sparserevlog or withsparseread

509

if b'sparse-read-density-threshold' in opts:

509

if b'sparse-read-density-threshold' in opts:

510

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

510

self._srdensitythreshold = opts[b'sparse-read-density-threshold']

511

if b'sparse-read-min-gap-size' in opts:

511

if b'sparse-read-min-gap-size' in opts:

512

self._srmingapsize = opts[b'sparse-read-min-gap-size']

512

self._srmingapsize = opts[b'sparse-read-min-gap-size']

513

if opts.get(b'enableellipsis'):

513

if opts.get(b'enableellipsis'):

514

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

514

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

515

516

# revlog v0 doesn't have flag processors

516

# revlog v0 doesn't have flag processors

517

for flag, processor in pycompat.iteritems(

517

for flag, processor in pycompat.iteritems(

518

opts.get(b'flagprocessors', {})

518

opts.get(b'flagprocessors', {})

519

):

519

):

520

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

520

flagutil.insertflagprocessor(flag, processor, self._flagprocessors)

521

522

if self._chunkcachesize <= 0:

522

if self._chunkcachesize <= 0:

523

raise error.RevlogError(

523

raise error.RevlogError(

524

_(b'revlog chunk cache size %r is not greater than 0')

524

_(b'revlog chunk cache size %r is not greater than 0')

525

% self._chunkcachesize

525

% self._chunkcachesize

526

)

526

)

527

elif self._chunkcachesize & (self._chunkcachesize - 1):

527

elif self._chunkcachesize & (self._chunkcachesize - 1):

528

raise error.RevlogError(

528

raise error.RevlogError(

529

_(b'revlog chunk cache size %r is not a power of 2')

529

_(b'revlog chunk cache size %r is not a power of 2')

530

% self._chunkcachesize

530

% self._chunkcachesize

531

)

531

)

532

force_nodemap = opts.get(b'devel-force-nodemap', False)

532

force_nodemap = opts.get(b'devel-force-nodemap', False)

533

return new_header, mmapindexthreshold, force_nodemap

533

return new_header, mmapindexthreshold, force_nodemap

534

535

def _get_data(self, filepath, mmap_threshold, size=None):

535

def _get_data(self, filepath, mmap_threshold, size=None):

536

"""return a file content with or without mmap

536

"""return a file content with or without mmap

537

538

If the file is missing return the empty string"""

538

If the file is missing return the empty string"""

539

try:

539

try:

540

with self.opener(filepath) as fp:

540

with self.opener(filepath) as fp:

541

if mmap_threshold is not None:

541

if mmap_threshold is not None:

542

file_size = self.opener.fstat(fp).st_size

542

file_size = self.opener.fstat(fp).st_size

543

if file_size >= mmap_threshold:

543

if file_size >= mmap_threshold:

544

if size is not None:

544

if size is not None:

545

# avoid potentiel mmap crash

545

# avoid potentiel mmap crash

546

size = min(file_size, size)

546

size = min(file_size, size)

547

# TODO: should .close() to release resources without

547

# TODO: should .close() to release resources without

548

# relying on Python GC

548

# relying on Python GC

549

if size is None:

549

if size is None:

550

return util.buffer(util.mmapread(fp))

550

return util.buffer(util.mmapread(fp))

551

else:

551

else:

552

return util.buffer(util.mmapread(fp, size))

552

return util.buffer(util.mmapread(fp, size))

553

if size is None:

553

if size is None:

554

return fp.read()

554

return fp.read()

555

else:

555

else:

556

return fp.read(size)

556

return fp.read(size)

557

except IOError as inst:

557

except IOError as inst:

558

if inst.errno != errno.ENOENT:

558

if inst.errno != errno.ENOENT:

559

raise

559

raise

560

return b''

560

return b''

561

562

def _loadindex(self):

562

def _loadindex(self):

563

564

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

564

new_header, mmapindexthreshold, force_nodemap = self._init_opts()

565

566

if self.postfix is not None:

566

if self.postfix is not None:

567

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

567

entry_point = b'%s.i.%s' % (self.radix, self.postfix)

568

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

568

elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):

569

entry_point = b'%s.i.a' % self.radix

569

entry_point = b'%s.i.a' % self.radix

570

else:

570

else:

571

entry_point = b'%s.i' % self.radix

571

entry_point = b'%s.i' % self.radix

572

573

entry_data = b''

573

entry_data = b''

574

self._initempty = True

574

self._initempty = True

575

entry_data = self._get_data(entry_point, mmapindexthreshold)

575

entry_data = self._get_data(entry_point, mmapindexthreshold)

576

if len(entry_data) > 0:

576

if len(entry_data) > 0:

577

header = INDEX_HEADER.unpack(entry_data[:4])[0]

577

header = INDEX_HEADER.unpack(entry_data[:4])[0]

578

self._initempty = False

578

self._initempty = False

579

else:

579

else:

580

header = new_header

580

header = new_header

581

582

self._format_flags = header & ~0xFFFF

582

self._format_flags = header & ~0xFFFF

583

self._format_version = header & 0xFFFF

583

self._format_version = header & 0xFFFF

584

585

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

585

supported_flags = SUPPORTED_FLAGS.get(self._format_version)

586

if supported_flags is None:

586

if supported_flags is None:

587

msg = _(b'unknown version (%d) in revlog %s')

587

msg = _(b'unknown version (%d) in revlog %s')

588

msg %= (self._format_version, self.display_id)

588

msg %= (self._format_version, self.display_id)

589

raise error.RevlogError(msg)

589

raise error.RevlogError(msg)

590

elif self._format_flags & ~supported_flags:

590

elif self._format_flags & ~supported_flags:

591

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

591

msg = _(b'unknown flags (%#04x) in version %d revlog %s')

592

display_flag = self._format_flags >> 16

592

display_flag = self._format_flags >> 16

593

msg %= (display_flag, self._format_version, self.display_id)

593

msg %= (display_flag, self._format_version, self.display_id)

594

raise error.RevlogError(msg)

594

raise error.RevlogError(msg)

595

596

features = FEATURES_BY_VERSION[self._format_version]

596

features = FEATURES_BY_VERSION[self._format_version]

597

self._inline = features[b'inline'](self._format_flags)

597

self._inline = features[b'inline'](self._format_flags)

598

self._generaldelta = features[b'generaldelta'](self._format_flags)

598

self._generaldelta = features[b'generaldelta'](self._format_flags)

599

self.hassidedata = features[b'sidedata']

599

self.hassidedata = features[b'sidedata']

600

601

if not features[b'docket']:

601

if not features[b'docket']:

602

self._indexfile = entry_point

602

self._indexfile = entry_point

603

index_data = entry_data

603

index_data = entry_data

604

else:

604

else:

605

self._docket_file = entry_point

605

self._docket_file = entry_point

606

if self._initempty:

606

if self._initempty:

607

self._docket = docketutil.default_docket(self, header)

607

self._docket = docketutil.default_docket(self, header)

608

else:

608

else:

609

self._docket = docketutil.parse_docket(

609

self._docket = docketutil.parse_docket(

610

self, entry_data, use_pending=self._trypending

610

self, entry_data, use_pending=self._trypending

611

)

611

)

612

self._indexfile = self._docket.index_filepath()

612

self._indexfile = self._docket.index_filepath()

613

index_data = b''

613

index_data = b''

614

index_size = self._docket.index_end

614

index_size = self._docket.index_end

615

if index_size > 0:

615

if index_size > 0:

616

index_data = self._get_data(

616

index_data = self._get_data(

617

self._indexfile, mmapindexthreshold, size=index_size

617

self._indexfile, mmapindexthreshold, size=index_size

618

)

618

)

619

if len(index_data) < index_size:

619

if len(index_data) < index_size:

620

msg = _(b'too few index data for %s: got %d, expected %d')

620

msg = _(b'too few index data for %s: got %d, expected %d')

621

msg %= (self.display_id, len(index_data), index_size)

621

msg %= (self.display_id, len(index_data), index_size)

622

raise error.RevlogError(msg)

622

raise error.RevlogError(msg)

623

624

self._inline = False

624

self._inline = False

625

# generaldelta implied by version 2 revlogs.

625

# generaldelta implied by version 2 revlogs.

626

self._generaldelta = True

626

self._generaldelta = True

627

# the logic for persistent nodemap will be dealt with within the

627

# the logic for persistent nodemap will be dealt with within the

628

# main docket, so disable it for now.

628

# main docket, so disable it for now.

629

self._nodemap_file = None

629

self._nodemap_file = None

630

631

if self.postfix is None:

631

if self.postfix is None:

632

self._datafile = b'%s.d' % self.radix

632

self._datafile = b'%s.d' % self.radix

633

else:

633

else:

634

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

634

self._datafile = b'%s.d.%s' % (self.radix, self.postfix)

635

636

self.nodeconstants = sha1nodeconstants

636

self.nodeconstants = sha1nodeconstants

637

self.nullid = self.nodeconstants.nullid

637

self.nullid = self.nodeconstants.nullid

638

639

# sparse-revlog can't be on without general-delta (issue6056)

639

# sparse-revlog can't be on without general-delta (issue6056)

640

if not self._generaldelta:

640

if not self._generaldelta:

641

self._sparserevlog = False

641

self._sparserevlog = False

642

643

self._storedeltachains = True

643

self._storedeltachains = True

644

645

devel_nodemap = (

645

devel_nodemap = (

646

self._nodemap_file

646

self._nodemap_file

647

and force_nodemap

647

and force_nodemap

648

and parse_index_v1_nodemap is not None

648

and parse_index_v1_nodemap is not None

649

)

649

)

650

651

use_rust_index = False

651

use_rust_index = False

652

if rustrevlog is not None:

652

if rustrevlog is not None:

653

if self._nodemap_file is not None:

653

if self._nodemap_file is not None:

654

use_rust_index = True

654

use_rust_index = True

655

else:

655

else:

656

use_rust_index = self.opener.options.get(b'rust.index')

656

use_rust_index = self.opener.options.get(b'rust.index')

657

658

self._parse_index = parse_index_v1

658

self._parse_index = parse_index_v1

659

if self._format_version == REVLOGV0:

659

if self._format_version == REVLOGV0:

660

self._parse_index = revlogv0.parse_index_v0

660

self._parse_index = revlogv0.parse_index_v0

661

elif self._format_version == REVLOGV2:

661

elif self._format_version == REVLOGV2:

662

self._parse_index = parse_index_v2

662

self._parse_index = parse_index_v2

663

elif self._format_version == CHANGELOGV2:

663

elif self._format_version == CHANGELOGV2:

664

self._parse_index = parse_index_cl_v2

664

self._parse_index = parse_index_cl_v2

665

elif devel_nodemap:

665

elif devel_nodemap:

666

self._parse_index = parse_index_v1_nodemap

666

self._parse_index = parse_index_v1_nodemap

667

elif use_rust_index:

667

elif use_rust_index:

668

self._parse_index = parse_index_v1_mixed

668

self._parse_index = parse_index_v1_mixed

669

try:

669

try:

670

d = self._parse_index(index_data, self._inline)

670

d = self._parse_index(index_data, self._inline)

671

index, _chunkcache = d

671

index, _chunkcache = d

672

use_nodemap = (

672

use_nodemap = (

673

not self._inline

673

not self._inline

674

and self._nodemap_file is not None

674

and self._nodemap_file is not None

675

and util.safehasattr(index, 'update_nodemap_data')

675

and util.safehasattr(index, 'update_nodemap_data')

676

)

676

)

677

if use_nodemap:

677

if use_nodemap:

678

nodemap_data = nodemaputil.persisted_data(self)

678

nodemap_data = nodemaputil.persisted_data(self)

679

if nodemap_data is not None:

679

if nodemap_data is not None:

680

docket = nodemap_data[0]

680

docket = nodemap_data[0]

681

if (

681

if (

682

len(d[0]) > docket.tip_rev

682

len(d[0]) > docket.tip_rev

683

and d[0][docket.tip_rev][7] == docket.tip_node

683

and d[0][docket.tip_rev][7] == docket.tip_node

684

):

684

):

685

# no changelog tampering

685

# no changelog tampering

686

self._nodemap_docket = docket

686

self._nodemap_docket = docket

687

index.update_nodemap_data(*nodemap_data)

687

index.update_nodemap_data(*nodemap_data)

688

except (ValueError, IndexError):

688

except (ValueError, IndexError):

689

raise error.RevlogError(

689

raise error.RevlogError(

690

_(b"index %s is corrupted") % self.display_id

690

_(b"index %s is corrupted") % self.display_id

691

)

691

)

692

self.index, self._chunkcache = d

692

self.index, self._chunkcache = d

693

if not self._chunkcache:

693

if not self._chunkcache:

694

self._chunkclear()

694

self._chunkclear()

695

# revnum -> (chain-length, sum-delta-length)

695

# revnum -> (chain-length, sum-delta-length)

696

self._chaininfocache = util.lrucachedict(500)

696

self._chaininfocache = util.lrucachedict(500)

697

# revlog header -> revlog compressor

697

# revlog header -> revlog compressor

698

self._decompressors = {}

698

self._decompressors = {}

699

700

@util.propertycache

700

@util.propertycache

701

def revlog_kind(self):

701

def revlog_kind(self):

702

return self.target[0]

702

return self.target[0]

703

704

@util.propertycache

704

@util.propertycache

705

def display_id(self):

705

def display_id(self):

706

"""The public facing "ID" of the revlog that we use in message"""

706

"""The public facing "ID" of the revlog that we use in message"""

707

# Maybe we should build a user facing representation of

707

# Maybe we should build a user facing representation of

708

# revlog.target instead of using `self.radix`

708

# revlog.target instead of using `self.radix`

709

return self.radix

709

return self.radix

710

711

def _get_decompressor(self, t):

711

def _get_decompressor(self, t):

712

try:

712

try:

713

compressor = self._decompressors[t]

713

compressor = self._decompressors[t]

714

except KeyError:

714

except KeyError:

715

try:

715

try:

716

engine = util.compengines.forrevlogheader(t)

716

engine = util.compengines.forrevlogheader(t)

717

compressor = engine.revlogcompressor(self._compengineopts)

717

compressor = engine.revlogcompressor(self._compengineopts)

718

self._decompressors[t] = compressor

718

self._decompressors[t] = compressor

719

except KeyError:

719

except KeyError:

720

raise error.RevlogError(

720

raise error.RevlogError(

721

_(b'unknown compression type %s') % binascii.hexlify(t)

721

_(b'unknown compression type %s') % binascii.hexlify(t)

722

)

722

)

723

return compressor

723

return compressor

724

725

@util.propertycache

725

@util.propertycache

726

def _compressor(self):

726

def _compressor(self):

727

engine = util.compengines[self._compengine]

727

engine = util.compengines[self._compengine]

728

return engine.revlogcompressor(self._compengineopts)

728

return engine.revlogcompressor(self._compengineopts)

729

730

@util.propertycache

730

@util.propertycache

731

def _decompressor(self):

731

def _decompressor(self):

732

"""the default decompressor"""

732

"""the default decompressor"""

733

if self._docket is None:

733

if self._docket is None:

734

return None

734

return None

735

t = self._docket.default_compression_header

735

t = self._docket.default_compression_header

736

c = self._get_decompressor(t)

736

c = self._get_decompressor(t)

737

return c.decompress

737

return c.decompress

738

739

def _indexfp(self):

739

def _indexfp(self):

740

"""file object for the revlog's index file"""

740

"""file object for the revlog's index file"""

741

return self.opener(self._indexfile, mode=b"r")

741

return self.opener(self._indexfile, mode=b"r")

742

743

def __index_write_fp(self):

743

def __index_write_fp(self):

744

# You should not use this directly and use `_writing` instead

744

# You should not use this directly and use `_writing` instead

745

try:

745

try:

746

f = self.opener(

746

f = self.opener(

747

self._indexfile, mode=b"r+", checkambig=self._checkambig

747

self._indexfile, mode=b"r+", checkambig=self._checkambig

748

)

748

)

749

if self._docket is None:

749

if self._docket is None:

750

f.seek(0, os.SEEK_END)

750

f.seek(0, os.SEEK_END)

751

else:

751

else:

752

f.seek(self._docket.index_end, os.SEEK_SET)

752

f.seek(self._docket.index_end, os.SEEK_SET)

753

return f

753

return f

754

except IOError as inst:

754

except IOError as inst:

755

if inst.errno != errno.ENOENT:

755

if inst.errno != errno.ENOENT:

756

raise

756

raise

757

return self.opener(

757

return self.opener(

758

self._indexfile, mode=b"w+", checkambig=self._checkambig

758

self._indexfile, mode=b"w+", checkambig=self._checkambig

759

)

759

)

760

761

def __index_new_fp(self):

761

def __index_new_fp(self):

762

# You should not use this unless you are upgrading from inline revlog

762

# You should not use this unless you are upgrading from inline revlog

763

return self.opener(

763

return self.opener(

764

self._indexfile,

764

self._indexfile,

765

mode=b"w",

765

mode=b"w",

766

checkambig=self._checkambig,

766

checkambig=self._checkambig,

767

atomictemp=True,

767

atomictemp=True,

768

)

768

)

769

770

def _datafp(self, mode=b'r'):

770

def _datafp(self, mode=b'r'):

771

"""file object for the revlog's data file"""

771

"""file object for the revlog's data file"""

772

return self.opener(self._datafile, mode=mode)

772

return self.opener(self._datafile, mode=mode)

773

774

@contextlib.contextmanager

774

@contextlib.contextmanager

775

def _datareadfp(self, existingfp=None):

775

def _datareadfp(self, existingfp=None):

776

"""file object suitable to read data"""

776

"""file object suitable to read data"""

777

# Use explicit file handle, if given.

777

# Use explicit file handle, if given.

778

if existingfp is not None:

778

if existingfp is not None:

779

yield existingfp

779

yield existingfp

780

781

# Use a file handle being actively used for writes, if available.

781

# Use a file handle being actively used for writes, if available.

782

# There is some danger to doing this because reads will seek the

782

# There is some danger to doing this because reads will seek the

783

# file. However, _writeentry() performs a SEEK_END before all writes,

783

# file. However, _writeentry() performs a SEEK_END before all writes,

784

# so we should be safe.

784

# so we should be safe.

785

elif self._writinghandles:

785

elif self._writinghandles:

786

if self._inline:

786

if self._inline:

787

yield self._writinghandles[0]

787

yield self._writinghandles[0]

788

else:

788

else:

789

yield self._writinghandles[1]

789

yield self._writinghandles[1]

790

791

# Otherwise open a new file handle.

791

# Otherwise open a new file handle.

792

else:

792

else:

793

if self._inline:

793

if self._inline:

794

func = self._indexfp

794

func = self._indexfp

795

else:

795

else:

796

func = self._datafp

796

func = self._datafp

797

with func() as fp:

797

with func() as fp:

798

yield fp

798

yield fp

799

800

def tiprev(self):

800

def tiprev(self):

801

return len(self.index) - 1

801

return len(self.index) - 1

802

803

def tip(self):

803

def tip(self):

804

return self.node(self.tiprev())

804

return self.node(self.tiprev())

805

806

def __contains__(self, rev):

806

def __contains__(self, rev):

807

return 0 <= rev < len(self)

807

return 0 <= rev < len(self)

808

809

def __len__(self):

809

def __len__(self):

810

return len(self.index)

810

return len(self.index)

811

812

def __iter__(self):

812

def __iter__(self):

813

return iter(pycompat.xrange(len(self)))

813

return iter(pycompat.xrange(len(self)))

814

815

def revs(self, start=0, stop=None):

815

def revs(self, start=0, stop=None):

816

"""iterate over all rev in this revlog (from start to stop)"""

816

"""iterate over all rev in this revlog (from start to stop)"""

817

return storageutil.iterrevs(len(self), start=start, stop=stop)

817

return storageutil.iterrevs(len(self), start=start, stop=stop)

818

819

@property

819

@property

820

def nodemap(self):

820

def nodemap(self):

821

msg = (

821

msg = (

822

b"revlog.nodemap is deprecated, "

822

b"revlog.nodemap is deprecated, "

823

b"use revlog.index.[has_node|rev|get_rev]"

823

b"use revlog.index.[has_node|rev|get_rev]"

824

)

824

)

825

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

825

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

826

return self.index.nodemap

826

return self.index.nodemap

827

828

@property

828

@property

829

def _nodecache(self):

829

def _nodecache(self):

830

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

830

msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"

831

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

831

util.nouideprecwarn(msg, b'5.3', stacklevel=2)

832

return self.index.nodemap

832

return self.index.nodemap

833

834

def hasnode(self, node):

834

def hasnode(self, node):

835

try:

835

try:

836

self.rev(node)

836

self.rev(node)

837

return True

837

return True

838

except KeyError:

838

except KeyError:

839

return False

839

return False

840

841

def candelta(self, baserev, rev):

841

def candelta(self, baserev, rev):

842

"""whether two revisions (baserev, rev) can be delta-ed or not"""

842

"""whether two revisions (baserev, rev) can be delta-ed or not"""

843

# Disable delta if either rev requires a content-changing flag

843

# Disable delta if either rev requires a content-changing flag

844

# processor (ex. LFS). This is because such flag processor can alter

844

# processor (ex. LFS). This is because such flag processor can alter

845

# the rawtext content that the delta will be based on, and two clients

845

# the rawtext content that the delta will be based on, and two clients

846

# could have a same revlog node with different flags (i.e. different

846

# could have a same revlog node with different flags (i.e. different

847

# rawtext contents) and the delta could be incompatible.

847

# rawtext contents) and the delta could be incompatible.

848

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

848

if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (

849

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

849

self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS

850

):

850

):

851

return False

851

return False

852

return True

852

return True

853

854

def update_caches(self, transaction):

854

def update_caches(self, transaction):

855

if self._nodemap_file is not None:

855

if self._nodemap_file is not None:

856

if transaction is None:

856

if transaction is None:

857

nodemaputil.update_persistent_nodemap(self)

857

nodemaputil.update_persistent_nodemap(self)

858

else:

858

else:

859

nodemaputil.setup_persistent_nodemap(transaction, self)

859

nodemaputil.setup_persistent_nodemap(transaction, self)

860

861

def clearcaches(self):

861

def clearcaches(self):

862

self._revisioncache = None

862

self._revisioncache = None

863

self._chainbasecache.clear()

863

self._chainbasecache.clear()

864

self._chunkcache = (0, b'')

864

self._chunkcache = (0, b'')

865

self._pcache = {}

865

self._pcache = {}

866

self._nodemap_docket = None

866

self._nodemap_docket = None

867

self.index.clearcaches()

867

self.index.clearcaches()

868

# The python code is the one responsible for validating the docket, we

868

# The python code is the one responsible for validating the docket, we

869

# end up having to refresh it here.

869

# end up having to refresh it here.

870

use_nodemap = (

870

use_nodemap = (

871

not self._inline

871

not self._inline

872

and self._nodemap_file is not None

872

and self._nodemap_file is not None

873

and util.safehasattr(self.index, 'update_nodemap_data')

873

and util.safehasattr(self.index, 'update_nodemap_data')

874

)

874

)

875

if use_nodemap:

875

if use_nodemap:

876

nodemap_data = nodemaputil.persisted_data(self)

876

nodemap_data = nodemaputil.persisted_data(self)

877

if nodemap_data is not None:

877

if nodemap_data is not None:

878

self._nodemap_docket = nodemap_data[0]

878

self._nodemap_docket = nodemap_data[0]

879

self.index.update_nodemap_data(*nodemap_data)

879

self.index.update_nodemap_data(*nodemap_data)

880

881

def rev(self, node):

881

def rev(self, node):

882

try:

882

try:

883

return self.index.rev(node)

883

return self.index.rev(node)

884

except TypeError:

884

except TypeError:

885

raise

885

raise

886

except error.RevlogError:

886

except error.RevlogError:

887

# parsers.c radix tree lookup failed

887

# parsers.c radix tree lookup failed

888

if (

888

if (

889

node == self.nodeconstants.wdirid

889

node == self.nodeconstants.wdirid

890

or node in self.nodeconstants.wdirfilenodeids

890

or node in self.nodeconstants.wdirfilenodeids

891

):

891

):

892

raise error.WdirUnsupported

892

raise error.WdirUnsupported

893

raise error.LookupError(node, self.display_id, _(b'no node'))

893

raise error.LookupError(node, self.display_id, _(b'no node'))

894

895

# Accessors for index entries.

895

# Accessors for index entries.

896

897

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

897

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

898

# are flags.

898

# are flags.

899

def start(self, rev):

899

def start(self, rev):

900

return int(self.index[rev][0] >> 16)

900

return int(self.index[rev][0] >> 16)

901

902

def flags(self, rev):

902

def flags(self, rev):

903

return self.index[rev][0] & 0xFFFF

903

return self.index[rev][0] & 0xFFFF

904

905

def length(self, rev):

905

def length(self, rev):

906

return self.index[rev][1]

906

return self.index[rev][1]

907

908

def sidedata_length(self, rev):

908

def sidedata_length(self, rev):

909

if not self.hassidedata:

909

if not self.hassidedata:

910

return 0

910

return 0

911

return self.index[rev][9]

911

return self.index[rev][9]

912

913

def rawsize(self, rev):

913

def rawsize(self, rev):

914

"""return the length of the uncompressed text for a given revision"""

914

"""return the length of the uncompressed text for a given revision"""

915

l = self.index[rev][2]

915

l = self.index[rev][2]

916

if l >= 0:

916

if l >= 0:

917

return l

917

return l

918

919

t = self.rawdata(rev)

919

t = self.rawdata(rev)

920

return len(t)

920

return len(t)

921

922

def size(self, rev):

922

def size(self, rev):

923

"""length of non-raw text (processed by a "read" flag processor)"""

923

"""length of non-raw text (processed by a "read" flag processor)"""

924

# fast path: if no "read" flag processor could change the content,

924

# fast path: if no "read" flag processor could change the content,

925

# size is rawsize. note: ELLIPSIS is known to not change the content.

925

# size is rawsize. note: ELLIPSIS is known to not change the content.

926

flags = self.flags(rev)

926

flags = self.flags(rev)

927

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

927

if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

928

return self.rawsize(rev)

928

return self.rawsize(rev)

929

930

return len(self.revision(rev, raw=False))

930

return len(self.revision(rev, raw=False))

931

932

def chainbase(self, rev):

932

def chainbase(self, rev):

933

base = self._chainbasecache.get(rev)

933

base = self._chainbasecache.get(rev)

934

if base is not None:

934

if base is not None:

935

return base

935

return base

936

937

index = self.index

937

index = self.index

938

iterrev = rev

938

iterrev = rev

939

base = index[iterrev][3]

939

base = index[iterrev][3]

940

while base != iterrev:

940

while base != iterrev:

941

iterrev = base

941

iterrev = base

942

base = index[iterrev][3]

942

base = index[iterrev][3]

943

944

self._chainbasecache[rev] = base

944

self._chainbasecache[rev] = base

945

return base

945

return base

946

947

def linkrev(self, rev):

947

def linkrev(self, rev):

948

return self.index[rev][4]

948

return self.index[rev][4]

949

950

def parentrevs(self, rev):

950

def parentrevs(self, rev):

951

try:

951

try:

952

entry = self.index[rev]

952

entry = self.index[rev]

953

except IndexError:

953

except IndexError:

954

if rev == wdirrev:

954

if rev == wdirrev:

955

raise error.WdirUnsupported

955

raise error.WdirUnsupported

956

raise

956

raise

957

if entry[5] == nullrev:

957

if entry[5] == nullrev:

958

return entry[6], entry[5]

958

return entry[6], entry[5]

959

else:

959

else:

960

return entry[5], entry[6]

960

return entry[5], entry[6]

961

962

# fast parentrevs(rev) where rev isn't filtered

962

# fast parentrevs(rev) where rev isn't filtered

963

_uncheckedparentrevs = parentrevs

963

_uncheckedparentrevs = parentrevs

964

965

def node(self, rev):

965

def node(self, rev):

966

try:

966

try:

967

return self.index[rev][7]

967

return self.index[rev][7]

968

except IndexError:

968

except IndexError:

969

if rev == wdirrev:

969

if rev == wdirrev:

970

raise error.WdirUnsupported

970

raise error.WdirUnsupported

971

raise

971

raise

972

973

# Derived from index values.

973

# Derived from index values.

974

975

def end(self, rev):

975

def end(self, rev):

976

return self.start(rev) + self.length(rev)

976

return self.start(rev) + self.length(rev)

977

978

def parents(self, node):

978

def parents(self, node):

979

i = self.index

979

i = self.index

980

d = i[self.rev(node)]

980

d = i[self.rev(node)]

981

# inline node() to avoid function call overhead

981

# inline node() to avoid function call overhead

982

if d[5] == self.nullid:

982

if d[5] == self.nullid:

983

return i[d[6]][7], i[d[5]][7]

983

return i[d[6]][7], i[d[5]][7]

984

else:

984

else:

985

return i[d[5]][7], i[d[6]][7]

985

return i[d[5]][7], i[d[6]][7]

986

987

def chainlen(self, rev):

987

def chainlen(self, rev):

988

return self._chaininfo(rev)[0]

988

return self._chaininfo(rev)[0]

989

990

def _chaininfo(self, rev):

990

def _chaininfo(self, rev):

991

chaininfocache = self._chaininfocache

991

chaininfocache = self._chaininfocache

992

if rev in chaininfocache:

992

if rev in chaininfocache:

993

return chaininfocache[rev]

993

return chaininfocache[rev]

994

index = self.index

994

index = self.index

995

generaldelta = self._generaldelta

995

generaldelta = self._generaldelta

996

iterrev = rev

996

iterrev = rev

997

e = index[iterrev]

997

e = index[iterrev]

998

clen = 0

998

clen = 0

999

compresseddeltalen = 0

999

compresseddeltalen = 0

1000

while iterrev != e[3]:

1000

while iterrev != e[3]:

1001

clen += 1

1001

clen += 1

1002

compresseddeltalen += e[1]

1002

compresseddeltalen += e[1]

1003

if generaldelta:

1003

if generaldelta:

1004

iterrev = e[3]

1004

iterrev = e[3]

1005

else:

1005

else:

1006

iterrev -= 1

1006

iterrev -= 1

1007

if iterrev in chaininfocache:

1007

if iterrev in chaininfocache:

1008

t = chaininfocache[iterrev]

1008

t = chaininfocache[iterrev]

1009

clen += t[0]

1009

clen += t[0]

1010

compresseddeltalen += t[1]

1010

compresseddeltalen += t[1]

1011

break

1011

break

1012

e = index[iterrev]

1012

e = index[iterrev]

1013

else:

1013

else:

1014

# Add text length of base since decompressing that also takes

1014

# Add text length of base since decompressing that also takes

1015

# work. For cache hits the length is already included.

1015

# work. For cache hits the length is already included.

1016

compresseddeltalen += e[1]

1016

compresseddeltalen += e[1]

1017

r = (clen, compresseddeltalen)

1017

r = (clen, compresseddeltalen)

1018

chaininfocache[rev] = r

1018

chaininfocache[rev] = r

1019

return r

1019

return r

1020

1021

def _deltachain(self, rev, stoprev=None):

1021

def _deltachain(self, rev, stoprev=None):

1022

"""Obtain the delta chain for a revision.

1022

"""Obtain the delta chain for a revision.

1023

1024

``stoprev`` specifies a revision to stop at. If not specified, we

1024

``stoprev`` specifies a revision to stop at. If not specified, we

1025

stop at the base of the chain.

1025

stop at the base of the chain.

1026

1027

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1027

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1028

revs in ascending order and ``stopped`` is a bool indicating whether

1028

revs in ascending order and ``stopped`` is a bool indicating whether

1029

``stoprev`` was hit.

1029

``stoprev`` was hit.

1030

"""

1030

"""

1031

# Try C implementation.

1031

# Try C implementation.

1032

try:

1032

try:

1033

return self.index.deltachain(rev, stoprev, self._generaldelta)

1033

return self.index.deltachain(rev, stoprev, self._generaldelta)

1034

except AttributeError:

1034

except AttributeError:

1035

pass

1035

pass

1036

1037

chain = []

1037

chain = []

1038

1039

# Alias to prevent attribute lookup in tight loop.

1039

# Alias to prevent attribute lookup in tight loop.

1040

index = self.index

1040

index = self.index

1041

generaldelta = self._generaldelta

1041

generaldelta = self._generaldelta

1042

1043

iterrev = rev

1043

iterrev = rev

1044

e = index[iterrev]

1044

e = index[iterrev]

1045

while iterrev != e[3] and iterrev != stoprev:

1045

while iterrev != e[3] and iterrev != stoprev:

1046

chain.append(iterrev)

1046

chain.append(iterrev)

1047

if generaldelta:

1047

if generaldelta:

1048

iterrev = e[3]

1048

iterrev = e[3]

1049

else:

1049

else:

1050

iterrev -= 1

1050

iterrev -= 1

1051

e = index[iterrev]

1051

e = index[iterrev]

1052

1053

if iterrev == stoprev:

1053

if iterrev == stoprev:

1054

stopped = True

1054

stopped = True

1055

else:

1055

else:

1056

chain.append(iterrev)

1056

chain.append(iterrev)

1057

stopped = False

1057

stopped = False

1058

1059

chain.reverse()

1059

chain.reverse()

1060

return chain, stopped

1060

return chain, stopped

1061

1062

def ancestors(self, revs, stoprev=0, inclusive=False):

1062

def ancestors(self, revs, stoprev=0, inclusive=False):

1063

"""Generate the ancestors of 'revs' in reverse revision order.

1063

"""Generate the ancestors of 'revs' in reverse revision order.

1064

Does not generate revs lower than stoprev.

1064

Does not generate revs lower than stoprev.

1065

1066

See the documentation for ancestor.lazyancestors for more details."""

1066

See the documentation for ancestor.lazyancestors for more details."""

1067

1068

# first, make sure start revisions aren't filtered

1068

# first, make sure start revisions aren't filtered

1069

revs = list(revs)

1069

revs = list(revs)

1070

checkrev = self.node

1070

checkrev = self.node

1071

for r in revs:

1071

for r in revs:

1072

checkrev(r)

1072

checkrev(r)

1073

# and we're sure ancestors aren't filtered as well

1073

# and we're sure ancestors aren't filtered as well

1074

1075

if rustancestor is not None and self.index.rust_ext_compat:

1075

if rustancestor is not None and self.index.rust_ext_compat:

1076

lazyancestors = rustancestor.LazyAncestors

1076

lazyancestors = rustancestor.LazyAncestors

1077

arg = self.index

1077

arg = self.index

1078

else:

1078

else:

1079

lazyancestors = ancestor.lazyancestors

1079

lazyancestors = ancestor.lazyancestors

1080

arg = self._uncheckedparentrevs

1080

arg = self._uncheckedparentrevs

1081

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1081

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

1082

1083

def descendants(self, revs):

1083

def descendants(self, revs):

1084

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1084

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

1085

1086

def findcommonmissing(self, common=None, heads=None):

1086

def findcommonmissing(self, common=None, heads=None):

1087

"""Return a tuple of the ancestors of common and the ancestors of heads

1087

"""Return a tuple of the ancestors of common and the ancestors of heads

1088

that are not ancestors of common. In revset terminology, we return the

1088

that are not ancestors of common. In revset terminology, we return the

1089

tuple:

1089

tuple:

1090

1091

::common, (::heads) - (::common)

1091

::common, (::heads) - (::common)

1092

1093

The list is sorted by revision number, meaning it is

1093

The list is sorted by revision number, meaning it is

1094

topologically sorted.

1094

topologically sorted.

1095

1096

'heads' and 'common' are both lists of node IDs. If heads is

1096

'heads' and 'common' are both lists of node IDs. If heads is

1097

not supplied, uses all of the revlog's heads. If common is not

1097

not supplied, uses all of the revlog's heads. If common is not

1098

supplied, uses nullid."""

1098

supplied, uses nullid."""

1099

if common is None:

1099

if common is None:

1100

common = [self.nullid]

1100

common = [self.nullid]

1101

if heads is None:

1101

if heads is None:

1102

heads = self.heads()

1102

heads = self.heads()

1103

1104

common = [self.rev(n) for n in common]

1104

common = [self.rev(n) for n in common]

1105

heads = [self.rev(n) for n in heads]

1105

heads = [self.rev(n) for n in heads]

1106

1107

# we want the ancestors, but inclusive

1107

# we want the ancestors, but inclusive

1108

class lazyset(object):

1108

class lazyset(object):

1109

def __init__(self, lazyvalues):

1109

def __init__(self, lazyvalues):

1110

self.addedvalues = set()

1110

self.addedvalues = set()

1111

self.lazyvalues = lazyvalues

1111

self.lazyvalues = lazyvalues

1112

1113

def __contains__(self, value):

1113

def __contains__(self, value):

1114

return value in self.addedvalues or value in self.lazyvalues

1114

return value in self.addedvalues or value in self.lazyvalues

1115

1116

def __iter__(self):

1116

def __iter__(self):

1117

added = self.addedvalues

1117

added = self.addedvalues

1118

for r in added:

1118

for r in added:

1119

yield r

1119

yield r

1120

for r in self.lazyvalues:

1120

for r in self.lazyvalues:

1121

if not r in added:

1121

if not r in added:

1122

yield r

1122

yield r

1123

1124

def add(self, value):

1124

def add(self, value):

1125

self.addedvalues.add(value)

1125

self.addedvalues.add(value)

1126

1127

def update(self, values):

1127

def update(self, values):

1128

self.addedvalues.update(values)

1128

self.addedvalues.update(values)

1129

1130

has = lazyset(self.ancestors(common))

1130

has = lazyset(self.ancestors(common))

1131

has.add(nullrev)

1131

has.add(nullrev)

1132

has.update(common)

1132

has.update(common)

1133

1134

# take all ancestors from heads that aren't in has

1134

# take all ancestors from heads that aren't in has

1135

missing = set()

1135

missing = set()

1136

visit = collections.deque(r for r in heads if r not in has)

1136

visit = collections.deque(r for r in heads if r not in has)

1137

while visit:

1137

while visit:

1138

r = visit.popleft()

1138

r = visit.popleft()

1139

if r in missing:

1139

if r in missing:

1140

continue

1140

continue

1141

else:

1141

else:

1142

missing.add(r)

1142

missing.add(r)

1143

for p in self.parentrevs(r):

1143

for p in self.parentrevs(r):

1144

if p not in has:

1144

if p not in has:

1145

visit.append(p)

1145

visit.append(p)

1146

missing = list(missing)

1146

missing = list(missing)

1147

missing.sort()

1147

missing.sort()

1148

return has, [self.node(miss) for miss in missing]

1148

return has, [self.node(miss) for miss in missing]

1149

1150

def incrementalmissingrevs(self, common=None):

1150

def incrementalmissingrevs(self, common=None):

1151

"""Return an object that can be used to incrementally compute the

1151

"""Return an object that can be used to incrementally compute the

1152

revision numbers of the ancestors of arbitrary sets that are not

1152

revision numbers of the ancestors of arbitrary sets that are not

1153

ancestors of common. This is an ancestor.incrementalmissingancestors

1153

ancestors of common. This is an ancestor.incrementalmissingancestors

1154

object.

1154

object.

1155

1156

'common' is a list of revision numbers. If common is not supplied, uses

1156

'common' is a list of revision numbers. If common is not supplied, uses

1157

nullrev.

1157

nullrev.

1158

"""

1158

"""

1159

if common is None:

1159

if common is None:

1160

common = [nullrev]

1160

common = [nullrev]

1161

1162

if rustancestor is not None and self.index.rust_ext_compat:

1162

if rustancestor is not None and self.index.rust_ext_compat:

1163

return rustancestor.MissingAncestors(self.index, common)

1163

return rustancestor.MissingAncestors(self.index, common)

1164

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1164

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1165

1166

def findmissingrevs(self, common=None, heads=None):

1166

def findmissingrevs(self, common=None, heads=None):

1167

"""Return the revision numbers of the ancestors of heads that

1167

"""Return the revision numbers of the ancestors of heads that

1168

are not ancestors of common.

1168

are not ancestors of common.

1169

1170

More specifically, return a list of revision numbers corresponding to

1170

More specifically, return a list of revision numbers corresponding to

1171

nodes N such that every N satisfies the following constraints:

1171

nodes N such that every N satisfies the following constraints:

1172

1173

1. N is an ancestor of some node in 'heads'

1173

1. N is an ancestor of some node in 'heads'

1174

2. N is not an ancestor of any node in 'common'

1174

2. N is not an ancestor of any node in 'common'

1175

1176

The list is sorted by revision number, meaning it is

1176

The list is sorted by revision number, meaning it is

1177

topologically sorted.

1177

topologically sorted.

1178

1179

'heads' and 'common' are both lists of revision numbers. If heads is

1179

'heads' and 'common' are both lists of revision numbers. If heads is

1180

not supplied, uses all of the revlog's heads. If common is not

1180

not supplied, uses all of the revlog's heads. If common is not

1181

supplied, uses nullid."""

1181

supplied, uses nullid."""

1182

if common is None:

1182

if common is None:

1183

common = [nullrev]

1183

common = [nullrev]

1184

if heads is None:

1184

if heads is None:

1185

heads = self.headrevs()

1185

heads = self.headrevs()

1186

1187

inc = self.incrementalmissingrevs(common=common)

1187

inc = self.incrementalmissingrevs(common=common)

1188

return inc.missingancestors(heads)

1188

return inc.missingancestors(heads)

1189

1190

def findmissing(self, common=None, heads=None):

1190

def findmissing(self, common=None, heads=None):

1191

"""Return the ancestors of heads that are not ancestors of common.

1191

"""Return the ancestors of heads that are not ancestors of common.

1192

1193

More specifically, return a list of nodes N such that every N

1193

More specifically, return a list of nodes N such that every N

1194

satisfies the following constraints:

1194

satisfies the following constraints:

1195

1196

1. N is an ancestor of some node in 'heads'

1196

1. N is an ancestor of some node in 'heads'

1197

2. N is not an ancestor of any node in 'common'

1197

2. N is not an ancestor of any node in 'common'

1198

1199

The list is sorted by revision number, meaning it is

1199

The list is sorted by revision number, meaning it is

1200

topologically sorted.

1200

topologically sorted.

1201

1202

'heads' and 'common' are both lists of node IDs. If heads is

1202

'heads' and 'common' are both lists of node IDs. If heads is

1203

not supplied, uses all of the revlog's heads. If common is not

1203

not supplied, uses all of the revlog's heads. If common is not

1204

supplied, uses nullid."""

1204

supplied, uses nullid."""

1205

if common is None:

1205

if common is None:

1206

common = [self.nullid]

1206

common = [self.nullid]

1207

if heads is None:

1207

if heads is None:

1208

heads = self.heads()

1208

heads = self.heads()

1209

1210

common = [self.rev(n) for n in common]

1210

common = [self.rev(n) for n in common]

1211

heads = [self.rev(n) for n in heads]

1211

heads = [self.rev(n) for n in heads]

1212

1213

inc = self.incrementalmissingrevs(common=common)

1213

inc = self.incrementalmissingrevs(common=common)

1214

return [self.node(r) for r in inc.missingancestors(heads)]

1214

return [self.node(r) for r in inc.missingancestors(heads)]

1215

1216

def nodesbetween(self, roots=None, heads=None):

1216

def nodesbetween(self, roots=None, heads=None):

1217

"""Return a topological path from 'roots' to 'heads'.

1217

"""Return a topological path from 'roots' to 'heads'.

1218

1219

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1219

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1220

topologically sorted list of all nodes N that satisfy both of

1220

topologically sorted list of all nodes N that satisfy both of

1221

these constraints:

1221

these constraints:

1222

1223

1. N is a descendant of some node in 'roots'

1223

1. N is a descendant of some node in 'roots'

1224

2. N is an ancestor of some node in 'heads'

1224

2. N is an ancestor of some node in 'heads'

1225

1226

Every node is considered to be both a descendant and an ancestor

1226

Every node is considered to be both a descendant and an ancestor

1227

of itself, so every reachable node in 'roots' and 'heads' will be

1227

of itself, so every reachable node in 'roots' and 'heads' will be

1228

included in 'nodes'.

1228

included in 'nodes'.

1229

1230

'outroots' is the list of reachable nodes in 'roots', i.e., the

1230

'outroots' is the list of reachable nodes in 'roots', i.e., the

1231

subset of 'roots' that is returned in 'nodes'. Likewise,

1231

subset of 'roots' that is returned in 'nodes'. Likewise,

1232

'outheads' is the subset of 'heads' that is also in 'nodes'.

1232

'outheads' is the subset of 'heads' that is also in 'nodes'.

1233

1234

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1234

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1235

unspecified, uses nullid as the only root. If 'heads' is

1235

unspecified, uses nullid as the only root. If 'heads' is

1236

unspecified, uses list of all of the revlog's heads."""

1236

unspecified, uses list of all of the revlog's heads."""

1237

nonodes = ([], [], [])

1237

nonodes = ([], [], [])

1238

if roots is not None:

1238

if roots is not None:

1239

roots = list(roots)

1239

roots = list(roots)

1240

if not roots:

1240

if not roots:

1241

return nonodes

1241

return nonodes

1242

lowestrev = min([self.rev(n) for n in roots])

1242

lowestrev = min([self.rev(n) for n in roots])

1243

else:

1243

else:

1244

roots = [self.nullid] # Everybody's a descendant of nullid

1244

roots = [self.nullid] # Everybody's a descendant of nullid

1245

lowestrev = nullrev

1245

lowestrev = nullrev

1246

if (lowestrev == nullrev) and (heads is None):

1246

if (lowestrev == nullrev) and (heads is None):

1247

# We want _all_ the nodes!

1247

# We want _all_ the nodes!

1248

return (

1248

return (

1249

[self.node(r) for r in self],

1249

[self.node(r) for r in self],

1250

[self.nullid],

1250

[self.nullid],

1251

list(self.heads()),

1251

list(self.heads()),

1252

)

1252

)

1253

if heads is None:

1253

if heads is None:

1254

# All nodes are ancestors, so the latest ancestor is the last

1254

# All nodes are ancestors, so the latest ancestor is the last

1255

# node.

1255

# node.

1256

highestrev = len(self) - 1

1256

highestrev = len(self) - 1

1257

# Set ancestors to None to signal that every node is an ancestor.

1257

# Set ancestors to None to signal that every node is an ancestor.

1258

ancestors = None

1258

ancestors = None

1259

# Set heads to an empty dictionary for later discovery of heads

1259

# Set heads to an empty dictionary for later discovery of heads

1260

heads = {}

1260

heads = {}

1261

else:

1261

else:

1262

heads = list(heads)

1262

heads = list(heads)

1263

if not heads:

1263

if not heads:

1264

return nonodes

1264

return nonodes

1265

ancestors = set()

1265

ancestors = set()

1266

# Turn heads into a dictionary so we can remove 'fake' heads.

1266

# Turn heads into a dictionary so we can remove 'fake' heads.

1267

# Also, later we will be using it to filter out the heads we can't

1267

# Also, later we will be using it to filter out the heads we can't

1268

# find from roots.

1268

# find from roots.

1269

heads = dict.fromkeys(heads, False)

1269

heads = dict.fromkeys(heads, False)

1270

# Start at the top and keep marking parents until we're done.

1270

# Start at the top and keep marking parents until we're done.

1271

nodestotag = set(heads)

1271

nodestotag = set(heads)

1272

# Remember where the top was so we can use it as a limit later.

1272

# Remember where the top was so we can use it as a limit later.

1273

highestrev = max([self.rev(n) for n in nodestotag])

1273

highestrev = max([self.rev(n) for n in nodestotag])

1274

while nodestotag:

1274

while nodestotag:

1275

# grab a node to tag

1275

# grab a node to tag

1276

n = nodestotag.pop()

1276

n = nodestotag.pop()

1277

# Never tag nullid

1277

# Never tag nullid

1278

if n == self.nullid:

1278

if n == self.nullid:

1279

continue

1279

continue

1280

# A node's revision number represents its place in a

1280

# A node's revision number represents its place in a

1281

# topologically sorted list of nodes.

1281

# topologically sorted list of nodes.

1282

r = self.rev(n)

1282

r = self.rev(n)

1283

if r >= lowestrev:

1283

if r >= lowestrev:

1284

if n not in ancestors:

1284

if n not in ancestors:

1285

# If we are possibly a descendant of one of the roots

1285

# If we are possibly a descendant of one of the roots

1286

# and we haven't already been marked as an ancestor

1286

# and we haven't already been marked as an ancestor

1287

ancestors.add(n) # Mark as ancestor

1287

ancestors.add(n) # Mark as ancestor

1288

# Add non-nullid parents to list of nodes to tag.

1288

# Add non-nullid parents to list of nodes to tag.

1289

nodestotag.update(

1289

nodestotag.update(

1290

[p for p in self.parents(n) if p != self.nullid]

1290

[p for p in self.parents(n) if p != self.nullid]

1291

)

1291

)

1292

elif n in heads: # We've seen it before, is it a fake head?

1292

elif n in heads: # We've seen it before, is it a fake head?

1293

# So it is, real heads should not be the ancestors of

1293

# So it is, real heads should not be the ancestors of

1294

# any other heads.

1294

# any other heads.

1295

heads.pop(n)

1295

heads.pop(n)

1296

if not ancestors:

1296

if not ancestors:

1297

return nonodes

1297

return nonodes

1298

# Now that we have our set of ancestors, we want to remove any

1298

# Now that we have our set of ancestors, we want to remove any

1299

# roots that are not ancestors.

1299

# roots that are not ancestors.

1300

1301

# If one of the roots was nullid, everything is included anyway.

1301

# If one of the roots was nullid, everything is included anyway.

1302

if lowestrev > nullrev:

1302

if lowestrev > nullrev:

1303

# But, since we weren't, let's recompute the lowest rev to not

1303

# But, since we weren't, let's recompute the lowest rev to not

1304

# include roots that aren't ancestors.

1304

# include roots that aren't ancestors.

1305

1306

# Filter out roots that aren't ancestors of heads

1306

# Filter out roots that aren't ancestors of heads

1307

roots = [root for root in roots if root in ancestors]

1307

roots = [root for root in roots if root in ancestors]

1308

# Recompute the lowest revision

1308

# Recompute the lowest revision

1309

if roots:

1309

if roots:

1310

lowestrev = min([self.rev(root) for root in roots])

1310

lowestrev = min([self.rev(root) for root in roots])

1311

else:

1311

else:

1312

# No more roots? Return empty list

1312

# No more roots? Return empty list

1313

return nonodes

1313

return nonodes

1314

else:

1314

else:

1315

# We are descending from nullid, and don't need to care about

1315

# We are descending from nullid, and don't need to care about

1316

# any other roots.

1316

# any other roots.

1317

lowestrev = nullrev

1317

lowestrev = nullrev

1318

roots = [self.nullid]

1318

roots = [self.nullid]

1319

# Transform our roots list into a set.

1319

# Transform our roots list into a set.

1320

descendants = set(roots)

1320

descendants = set(roots)

1321

# Also, keep the original roots so we can filter out roots that aren't

1321

# Also, keep the original roots so we can filter out roots that aren't

1322

# 'real' roots (i.e. are descended from other roots).

1322

# 'real' roots (i.e. are descended from other roots).

1323

roots = descendants.copy()

1323

roots = descendants.copy()

1324

# Our topologically sorted list of output nodes.

1324

# Our topologically sorted list of output nodes.

1325

orderedout = []

1325

orderedout = []

1326

# Don't start at nullid since we don't want nullid in our output list,

1326

# Don't start at nullid since we don't want nullid in our output list,

1327

# and if nullid shows up in descendants, empty parents will look like

1327

# and if nullid shows up in descendants, empty parents will look like

1328

# they're descendants.

1328

# they're descendants.

1329

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1329

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1330

n = self.node(r)

1330

n = self.node(r)

1331

isdescendant = False

1331

isdescendant = False

1332

if lowestrev == nullrev: # Everybody is a descendant of nullid

1332

if lowestrev == nullrev: # Everybody is a descendant of nullid

1333

isdescendant = True

1333

isdescendant = True

1334

elif n in descendants:

1334

elif n in descendants:

1335

# n is already a descendant

1335

# n is already a descendant

1336

isdescendant = True

1336

isdescendant = True

1337

# This check only needs to be done here because all the roots

1337

# This check only needs to be done here because all the roots

1338

# will start being marked is descendants before the loop.

1338

# will start being marked is descendants before the loop.

1339

if n in roots:

1339

if n in roots:

1340

# If n was a root, check if it's a 'real' root.

1340

# If n was a root, check if it's a 'real' root.

1341

p = tuple(self.parents(n))

1341

p = tuple(self.parents(n))

1342

# If any of its parents are descendants, it's not a root.

1342

# If any of its parents are descendants, it's not a root.

1343

if (p[0] in descendants) or (p[1] in descendants):

1343

if (p[0] in descendants) or (p[1] in descendants):

1344

roots.remove(n)

1344

roots.remove(n)

1345

else:

1345

else:

1346

p = tuple(self.parents(n))

1346

p = tuple(self.parents(n))

1347

# A node is a descendant if either of its parents are

1347

# A node is a descendant if either of its parents are

1348

# descendants. (We seeded the dependents list with the roots

1348

# descendants. (We seeded the dependents list with the roots

1349

# up there, remember?)

1349

# up there, remember?)

1350

if (p[0] in descendants) or (p[1] in descendants):

1350

if (p[0] in descendants) or (p[1] in descendants):

1351

descendants.add(n)

1351

descendants.add(n)

1352

isdescendant = True

1352

isdescendant = True

1353

if isdescendant and ((ancestors is None) or (n in ancestors)):

1353

if isdescendant and ((ancestors is None) or (n in ancestors)):

1354

# Only include nodes that are both descendants and ancestors.

1354

# Only include nodes that are both descendants and ancestors.

1355

orderedout.append(n)

1355

orderedout.append(n)

1356

if (ancestors is not None) and (n in heads):

1356

if (ancestors is not None) and (n in heads):

1357

# We're trying to figure out which heads are reachable

1357

# We're trying to figure out which heads are reachable

1358

# from roots.

1358

# from roots.

1359

# Mark this head as having been reached

1359

# Mark this head as having been reached

1360

heads[n] = True

1360

heads[n] = True

1361

elif ancestors is None:

1361

elif ancestors is None:

1362

# Otherwise, we're trying to discover the heads.

1362

# Otherwise, we're trying to discover the heads.

1363

# Assume this is a head because if it isn't, the next step

1363

# Assume this is a head because if it isn't, the next step

1364

# will eventually remove it.

1364

# will eventually remove it.

1365

heads[n] = True

1365

heads[n] = True

1366

# But, obviously its parents aren't.

1366

# But, obviously its parents aren't.

1367

for p in self.parents(n):

1367

for p in self.parents(n):

1368

heads.pop(p, None)

1368

heads.pop(p, None)

1369

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1369

heads = [head for head, flag in pycompat.iteritems(heads) if flag]

1370

roots = list(roots)

1370

roots = list(roots)

1371

assert orderedout

1371

assert orderedout

1372

assert roots

1372

assert roots

1373

assert heads

1373

assert heads

1374

return (orderedout, roots, heads)

1374

return (orderedout, roots, heads)

1375

1376

def headrevs(self, revs=None):

1376

def headrevs(self, revs=None):

1377

if revs is None:

1377

if revs is None:

1378

try:

1378

try:

1379

return self.index.headrevs()

1379

return self.index.headrevs()

1380

except AttributeError:

1380

except AttributeError:

1381

return self._headrevs()

1381

return self._headrevs()

1382

if rustdagop is not None and self.index.rust_ext_compat:

1382

if rustdagop is not None and self.index.rust_ext_compat:

1383

return rustdagop.headrevs(self.index, revs)

1383

return rustdagop.headrevs(self.index, revs)

1384

return dagop.headrevs(revs, self._uncheckedparentrevs)

1384

return dagop.headrevs(revs, self._uncheckedparentrevs)

1385

1386

def computephases(self, roots):

1386

def computephases(self, roots):

1387

return self.index.computephasesmapsets(roots)

1387

return self.index.computephasesmapsets(roots)

1388

1389

def _headrevs(self):

1389

def _headrevs(self):

1390

count = len(self)

1390

count = len(self)

1391

if not count:

1391

if not count:

1392

return [nullrev]

1392

return [nullrev]

1393

# we won't iter over filtered rev so nobody is a head at start

1393

# we won't iter over filtered rev so nobody is a head at start

1394

ishead = [0] * (count + 1)

1394

ishead = [0] * (count + 1)

1395

index = self.index

1395

index = self.index

1396

for r in self:

1396

for r in self:

1397

ishead[r] = 1 # I may be an head

1397

ishead[r] = 1 # I may be an head

1398

e = index[r]

1398

e = index[r]

1399

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1399

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1400

return [r for r, val in enumerate(ishead) if val]

1400

return [r for r, val in enumerate(ishead) if val]

1401

1402

def heads(self, start=None, stop=None):

1402

def heads(self, start=None, stop=None):

1403

"""return the list of all nodes that have no children

1403

"""return the list of all nodes that have no children

1404

1405

if start is specified, only heads that are descendants of

1405

if start is specified, only heads that are descendants of

1406

start will be returned

1406

start will be returned

1407

if stop is specified, it will consider all the revs from stop

1407

if stop is specified, it will consider all the revs from stop

1408

as if they had no children

1408

as if they had no children

1409

"""

1409

"""

1410

if start is None and stop is None:

1410

if start is None and stop is None:

1411

if not len(self):

1411

if not len(self):

1412

return [self.nullid]

1412

return [self.nullid]

1413

return [self.node(r) for r in self.headrevs()]

1413

return [self.node(r) for r in self.headrevs()]

1414

1415

if start is None:

1415

if start is None:

1416

start = nullrev

1416

start = nullrev

1417

else:

1417

else:

1418

start = self.rev(start)

1418

start = self.rev(start)

1419

1420

stoprevs = {self.rev(n) for n in stop or []}

1420

stoprevs = {self.rev(n) for n in stop or []}

1421

1422

revs = dagop.headrevssubset(

1422

revs = dagop.headrevssubset(

1423

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1423

self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs

1424

)

1424

)

1425

1426

return [self.node(rev) for rev in revs]

1426

return [self.node(rev) for rev in revs]

1427

1428

def children(self, node):

1428

def children(self, node):

1429

"""find the children of a given node"""

1429

"""find the children of a given node"""

1430

c = []

1430

c = []

1431

p = self.rev(node)

1431

p = self.rev(node)

1432

for r in self.revs(start=p + 1):

1432

for r in self.revs(start=p + 1):

1433

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1433

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1434

if prevs:

1434

if prevs:

1435

for pr in prevs:

1435

for pr in prevs:

1436

if pr == p:

1436

if pr == p:

1437

c.append(self.node(r))

1437

c.append(self.node(r))

1438

elif p == nullrev:

1438

elif p == nullrev:

1439

c.append(self.node(r))

1439

c.append(self.node(r))

1440

return c

1440

return c

1441

1442

def commonancestorsheads(self, a, b):

1442

def commonancestorsheads(self, a, b):

1443

"""calculate all the heads of the common ancestors of nodes a and b"""

1443

"""calculate all the heads of the common ancestors of nodes a and b"""

1444

a, b = self.rev(a), self.rev(b)

1444

a, b = self.rev(a), self.rev(b)

1445

ancs = self._commonancestorsheads(a, b)

1445

ancs = self._commonancestorsheads(a, b)

1446

return pycompat.maplist(self.node, ancs)

1446

return pycompat.maplist(self.node, ancs)

1447

1448

def _commonancestorsheads(self, *revs):

1448

def _commonancestorsheads(self, *revs):

1449

"""calculate all the heads of the common ancestors of revs"""

1449

"""calculate all the heads of the common ancestors of revs"""

1450

try:

1450

try:

1451

ancs = self.index.commonancestorsheads(*revs)

1451

ancs = self.index.commonancestorsheads(*revs)

1452

except (AttributeError, OverflowError): # C implementation failed

1452

except (AttributeError, OverflowError): # C implementation failed

1453

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1453

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1454

return ancs

1454

return ancs

1455

1456

def isancestor(self, a, b):

1456

def isancestor(self, a, b):

1457

"""return True if node a is an ancestor of node b

1457

"""return True if node a is an ancestor of node b

1458

1459

A revision is considered an ancestor of itself."""

1459

A revision is considered an ancestor of itself."""

1460

a, b = self.rev(a), self.rev(b)

1460

a, b = self.rev(a), self.rev(b)

1461

return self.isancestorrev(a, b)

1461

return self.isancestorrev(a, b)

1462

1463

def isancestorrev(self, a, b):

1463

def isancestorrev(self, a, b):

1464

"""return True if revision a is an ancestor of revision b

1464

"""return True if revision a is an ancestor of revision b

1465

1466

A revision is considered an ancestor of itself.

1466

A revision is considered an ancestor of itself.

1467

1468

The implementation of this is trivial but the use of

1468

The implementation of this is trivial but the use of

1469

reachableroots is not."""

1469

reachableroots is not."""

1470

if a == nullrev:

1470

if a == nullrev:

1471

return True

1471

return True

1472

elif a == b:

1472

elif a == b:

1473

return True

1473

return True

1474

elif a > b:

1474

elif a > b:

1475

return False

1475

return False

1476

return bool(self.reachableroots(a, [b], [a], includepath=False))

1476

return bool(self.reachableroots(a, [b], [a], includepath=False))

1477

1478

def reachableroots(self, minroot, heads, roots, includepath=False):

1478

def reachableroots(self, minroot, heads, roots, includepath=False):

1479

"""return (heads(::(<roots> and <roots>::<heads>)))

1479

"""return (heads(::(<roots> and <roots>::<heads>)))

1480

1481

If includepath is True, return (<roots>::<heads>)."""

1481

If includepath is True, return (<roots>::<heads>)."""

1482

try:

1482

try:

1483

return self.index.reachableroots2(

1483

return self.index.reachableroots2(

1484

minroot, heads, roots, includepath

1484

minroot, heads, roots, includepath

1485

)

1485

)

1486

except AttributeError:

1486

except AttributeError:

1487

return dagop._reachablerootspure(

1487

return dagop._reachablerootspure(

1488

self.parentrevs, minroot, roots, heads, includepath

1488

self.parentrevs, minroot, roots, heads, includepath

1489

)

1489

)

1490

1491

def ancestor(self, a, b):

1491

def ancestor(self, a, b):

1492

"""calculate the "best" common ancestor of nodes a and b"""

1492

"""calculate the "best" common ancestor of nodes a and b"""

1493

1494

a, b = self.rev(a), self.rev(b)

1494

a, b = self.rev(a), self.rev(b)

1495

try:

1495

try:

1496

ancs = self.index.ancestors(a, b)

1496

ancs = self.index.ancestors(a, b)

1497

except (AttributeError, OverflowError):

1497

except (AttributeError, OverflowError):

1498

ancs = ancestor.ancestors(self.parentrevs, a, b)

1498

ancs = ancestor.ancestors(self.parentrevs, a, b)

1499

if ancs:

1499

if ancs:

1500

# choose a consistent winner when there's a tie

1500

# choose a consistent winner when there's a tie

1501

return min(map(self.node, ancs))

1501

return min(map(self.node, ancs))

1502

return self.nullid

1502

return self.nullid

1503

1504

def _match(self, id):

1504

def _match(self, id):

1505

if isinstance(id, int):

1505

if isinstance(id, int):

1506

# rev

1506

# rev

1507

return self.node(id)

1507

return self.node(id)

1508

if len(id) == self.nodeconstants.nodelen:

1508

if len(id) == self.nodeconstants.nodelen:

1509

# possibly a binary node

1509

# possibly a binary node

1510

# odds of a binary node being all hex in ASCII are 1 in 10**25

1510

# odds of a binary node being all hex in ASCII are 1 in 10**25

1511

try:

1511

try:

1512

node = id

1512

node = id

1513

self.rev(node) # quick search the index

1513

self.rev(node) # quick search the index

1514

return node

1514

return node

1515

except error.LookupError:

1515

except error.LookupError:

1516

pass # may be partial hex id

1516

pass # may be partial hex id

1517

try:

1517

try:

1518

# str(rev)

1518

# str(rev)

1519

rev = int(id)

1519

rev = int(id)

1520

if b"%d" % rev != id:

1520

if b"%d" % rev != id:

1521

raise ValueError

1521

raise ValueError

1522

if rev < 0:

1522

if rev < 0:

1523

rev = len(self) + rev

1523

rev = len(self) + rev

1524

if rev < 0 or rev >= len(self):

1524

if rev < 0 or rev >= len(self):

1525

raise ValueError

1525

raise ValueError

1526

return self.node(rev)

1526

return self.node(rev)

1527

except (ValueError, OverflowError):

1527

except (ValueError, OverflowError):

1528

pass

1528

pass

1529

if len(id) == 2 * self.nodeconstants.nodelen:

1529

if len(id) == 2 * self.nodeconstants.nodelen:

1530

try:

1530

try:

1531

# a full hex nodeid?

1531

# a full hex nodeid?

1532

node = bin(id)

1532

node = bin(id)

1533

self.rev(node)

1533

self.rev(node)

1534

return node

1534

return node

1535

except (TypeError, error.LookupError):

1535

except (TypeError, error.LookupError):

1536

pass

1536

pass

1537

1538

def _partialmatch(self, id):

1538

def _partialmatch(self, id):

1539

# we don't care wdirfilenodeids as they should be always full hash

1539

# we don't care wdirfilenodeids as they should be always full hash

1540

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1540

maybewdir = self.nodeconstants.wdirhex.startswith(id)

1541

ambiguous = False

1541

try:

1542

try:

1542

partial = self.index.partialmatch(id)

1543

partial = self.index.partialmatch(id)

1543

if partial and self.hasnode(partial):

1544

if partial and self.hasnode(partial):

1544

if maybewdir:

1545

if maybewdir:

1545

# single 'ff...' match in radix tree, ambiguous with wdir

1546

# single 'ff...' match in radix tree, ambiguous with wdir

1546

raise error.RevlogError

1547

ambiguous = True

1548

else:

1547

return partial

1549

return partial

1548

if maybewdir:

1550

elif maybewdir:

1549

# no 'ff...' match in radix tree, wdir identified

1551

# no 'ff...' match in radix tree, wdir identified

1550

raise error.WdirUnsupported

1552

raise error.WdirUnsupported

1553

else:

1551

return None

1554

return None

1552

except error.RevlogError:

1555

except error.RevlogError:

1553

# parsers.c radix tree lookup gave multiple matches

1556

# parsers.c radix tree lookup gave multiple matches

1554

# fast path: for unfiltered changelog, radix tree is accurate

1557

# fast path: for unfiltered changelog, radix tree is accurate

1555

if not getattr(self, 'filteredrevs', None):

1558

if not getattr(self, 'filteredrevs', None):

1556

raise ~~error~~.~~AmbiguousPrefixLookupError~~(

1559

ambiguous = True

1557

id, self.display_id, _(b'ambiguous identifier')

1558

)

1559

# fall through to slow path that filters hidden revisions

1560

# fall through to slow path that filters hidden revisions

1560

except (AttributeError, ValueError):

1561

except (AttributeError, ValueError):

1561

# we are pure python, or key was too short to search radix tree

1562

# we are pure python, or key was too short to search radix tree

1562

pass

1563

pass

1564

if ambiguous:

1565

raise error.AmbiguousPrefixLookupError(

1566

id, self.display_id, _(b'ambiguous identifier')

1567

)

1563

1568

1564

if id in self._pcache:

1569

if id in self._pcache:

1565

return self._pcache[id]

1570

return self._pcache[id]

1566

1571

1567

if len(id) <= 40:

1572

if len(id) <= 40:

1568

try:

1573

try:

1569

# hex(node)[:...]

1574

# hex(node)[:...]

1570

l = len(id) // 2 # grab an even number of digits

1575

l = len(id) // 2 # grab an even number of digits

1571

prefix = bin(id[: l * 2])

1576

prefix = bin(id[: l * 2])

1572

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1577

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1573

nl = [

1578

nl = [

1574

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1579

n for n in nl if hex(n).startswith(id) and self.hasnode(n)

1575

]

1580

]

1576

if self.nodeconstants.nullhex.startswith(id):

1581

if self.nodeconstants.nullhex.startswith(id):

1577

nl.append(self.nullid)

1582

nl.append(self.nullid)

1578

if len(nl) > 0:

1583

if len(nl) > 0:

1579

if len(nl) == 1 and not maybewdir:

1584

if len(nl) == 1 and not maybewdir:

1580

self._pcache[id] = nl[0]

1585

self._pcache[id] = nl[0]

1581

return nl[0]

1586

return nl[0]

1582

raise error.AmbiguousPrefixLookupError(

1587

raise error.AmbiguousPrefixLookupError(

1583

id, self.display_id, _(b'ambiguous identifier')

1588

id, self.display_id, _(b'ambiguous identifier')

1584

)

1589

)

1585

if maybewdir:

1590

if maybewdir:

1586

raise error.WdirUnsupported

1591

raise error.WdirUnsupported

1587

return None

1592

return None

1588

except TypeError:

1593

except TypeError:

1589

pass

1594

pass

1590

1595

1591

def lookup(self, id):

1596

def lookup(self, id):

1592

"""locate a node based on:

1597

"""locate a node based on:

1593

- revision number or str(revision number)

1598

- revision number or str(revision number)

1594

- nodeid or subset of hex nodeid

1599

- nodeid or subset of hex nodeid

1595

"""

1600

"""

1596

n = self._match(id)

1601

n = self._match(id)

1597

if n is not None:

1602

if n is not None:

1598

return n

1603

return n

1599

n = self._partialmatch(id)

1604

n = self._partialmatch(id)

1600

if n:

1605

if n:

1601

return n

1606

return n

1602

1607

1603

raise error.LookupError(id, self.display_id, _(b'no match found'))

1608

raise error.LookupError(id, self.display_id, _(b'no match found'))

1604

1609

1605

def shortest(self, node, minlength=1):

1610

def shortest(self, node, minlength=1):

1606

"""Find the shortest unambiguous prefix that matches node."""

1611

"""Find the shortest unambiguous prefix that matches node."""

1607

1612

1608

def isvalid(prefix):

1613

def isvalid(prefix):

1609

try:

1614

try:

1610

matchednode = self._partialmatch(prefix)

1615

matchednode = self._partialmatch(prefix)

1611

except error.AmbiguousPrefixLookupError:

1616

except error.AmbiguousPrefixLookupError:

1612

return False

1617

return False

1613

except error.WdirUnsupported:

1618

except error.WdirUnsupported:

1614

# single 'ff...' match

1619

# single 'ff...' match

1615

return True

1620

return True

1616

if matchednode is None:

1621

if matchednode is None:

1617

raise error.LookupError(node, self.display_id, _(b'no node'))

1622

raise error.LookupError(node, self.display_id, _(b'no node'))

1618

return True

1623

return True

1619

1624

1620

def maybewdir(prefix):

1625

def maybewdir(prefix):

1621

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1626

return all(c == b'f' for c in pycompat.iterbytestr(prefix))

1622

1627

1623

hexnode = hex(node)

1628

hexnode = hex(node)

1624

1629

1625

def disambiguate(hexnode, minlength):

1630

def disambiguate(hexnode, minlength):

1626

"""Disambiguate against wdirid."""

1631

"""Disambiguate against wdirid."""

1627

for length in range(minlength, len(hexnode) + 1):

1632

for length in range(minlength, len(hexnode) + 1):

1628

prefix = hexnode[:length]

1633

prefix = hexnode[:length]

1629

if not maybewdir(prefix):

1634

if not maybewdir(prefix):

1630

return prefix

1635

return prefix

1631

1636

1632

if not getattr(self, 'filteredrevs', None):

1637

if not getattr(self, 'filteredrevs', None):

1633

try:

1638

try:

1634

length = max(self.index.shortest(node), minlength)

1639

length = max(self.index.shortest(node), minlength)

1635

return disambiguate(hexnode, length)

1640

return disambiguate(hexnode, length)

1636

except error.RevlogError:

1641

except error.RevlogError:

1637

if node != self.nodeconstants.wdirid:

1642

if node != self.nodeconstants.wdirid:

1638

raise error.LookupError(

1643

raise error.LookupError(

1639

node, self.display_id, _(b'no node')

1644

node, self.display_id, _(b'no node')

1640

)

1645

)

1641

except AttributeError:

1646

except AttributeError:

1642

# Fall through to pure code

1647

# Fall through to pure code

1643

pass

1648

pass

1644

1649

1645

if node == self.nodeconstants.wdirid:

1650

if node == self.nodeconstants.wdirid:

1646

for length in range(minlength, len(hexnode) + 1):

1651

for length in range(minlength, len(hexnode) + 1):

1647

prefix = hexnode[:length]

1652

prefix = hexnode[:length]

1648

if isvalid(prefix):

1653

if isvalid(prefix):

1649

return prefix

1654

return prefix

1650

1655

1651

for length in range(minlength, len(hexnode) + 1):

1656

for length in range(minlength, len(hexnode) + 1):

1652

prefix = hexnode[:length]

1657

prefix = hexnode[:length]

1653

if isvalid(prefix):

1658

if isvalid(prefix):

1654

return disambiguate(hexnode, length)

1659

return disambiguate(hexnode, length)

1655

1660

1656

def cmp(self, node, text):

1661

def cmp(self, node, text):

1657

"""compare text with a given file revision

1662

"""compare text with a given file revision

1658

1663

1659

returns True if text is different than what is stored.

1664

returns True if text is different than what is stored.

1660

"""

1665

"""

1661

p1, p2 = self.parents(node)

1666

p1, p2 = self.parents(node)

1662

return storageutil.hashrevisionsha1(text, p1, p2) != node

1667

return storageutil.hashrevisionsha1(text, p1, p2) != node

1663

1668

1664

def _cachesegment(self, offset, data):

1669

def _cachesegment(self, offset, data):

1665

"""Add a segment to the revlog cache.

1670

"""Add a segment to the revlog cache.

1666

1671

1667

Accepts an absolute offset and the data that is at that location.

1672

Accepts an absolute offset and the data that is at that location.

1668

"""

1673

"""

1669

o, d = self._chunkcache

1674

o, d = self._chunkcache

1670

# try to add to existing cache

1675

# try to add to existing cache

1671

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1676

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1672

self._chunkcache = o, d + data

1677

self._chunkcache = o, d + data

1673

else:

1678

else:

1674

self._chunkcache = offset, data

1679

self._chunkcache = offset, data

1675

1680

1676

def _readsegment(self, offset, length, df=None):

1681

def _readsegment(self, offset, length, df=None):

1677

"""Load a segment of raw data from the revlog.

1682

"""Load a segment of raw data from the revlog.

1678

1683

1679

Accepts an absolute offset, length to read, and an optional existing

1684

Accepts an absolute offset, length to read, and an optional existing

1680

file handle to read from.

1685

file handle to read from.

1681

1686

1682

If an existing file handle is passed, it will be seeked and the

1687

If an existing file handle is passed, it will be seeked and the

1683

original seek position will NOT be restored.

1688

original seek position will NOT be restored.

1684

1689

1685

Returns a str or buffer of raw byte data.

1690

Returns a str or buffer of raw byte data.

1686

1691

1687

Raises if the requested number of bytes could not be read.

1692

Raises if the requested number of bytes could not be read.

1688

"""

1693

"""

1689

# Cache data both forward and backward around the requested

1694

# Cache data both forward and backward around the requested

1690

# data, in a fixed size window. This helps speed up operations

1695

# data, in a fixed size window. This helps speed up operations

1691

# involving reading the revlog backwards.

1696

# involving reading the revlog backwards.

1692

cachesize = self._chunkcachesize

1697

cachesize = self._chunkcachesize

1693

realoffset = offset & ~(cachesize - 1)

1698

realoffset = offset & ~(cachesize - 1)

1694

reallength = (

1699

reallength = (

1695

(offset + length + cachesize) & ~(cachesize - 1)

1700

(offset + length + cachesize) & ~(cachesize - 1)

1696

) - realoffset

1701

) - realoffset

1697

with self._datareadfp(df) as df:

1702

with self._datareadfp(df) as df:

1698

df.seek(realoffset)

1703

df.seek(realoffset)

1699

d = df.read(reallength)

1704

d = df.read(reallength)

1700

1705

1701

self._cachesegment(realoffset, d)

1706

self._cachesegment(realoffset, d)

1702

if offset != realoffset or reallength != length:

1707

if offset != realoffset or reallength != length:

1703

startoffset = offset - realoffset

1708

startoffset = offset - realoffset

1704

if len(d) - startoffset < length:

1709

if len(d) - startoffset < length:

1705

raise error.RevlogError(

1710

raise error.RevlogError(

1706

_(

1711

_(

1707

b'partial read of revlog %s; expected %d bytes from '

1712

b'partial read of revlog %s; expected %d bytes from '

1708

b'offset %d, got %d'

1713

b'offset %d, got %d'

1709

)

1714

)

1710

% (

1715

% (

1711

self._indexfile if self._inline else self._datafile,

1716

self._indexfile if self._inline else self._datafile,

1712

length,

1717

length,

1713

offset,

1718

offset,

1714

len(d) - startoffset,

1719

len(d) - startoffset,

1715

)

1720

)

1716

)

1721

)

1717

1722

1718

return util.buffer(d, startoffset, length)

1723

return util.buffer(d, startoffset, length)

1719

1724

1720

if len(d) < length:

1725

if len(d) < length:

1721

raise error.RevlogError(

1726

raise error.RevlogError(

1722

_(

1727

_(

1723

b'partial read of revlog %s; expected %d bytes from offset '

1728

b'partial read of revlog %s; expected %d bytes from offset '

1724

b'%d, got %d'

1729

b'%d, got %d'

1725

)

1730

)

1726

% (

1731

% (

1727

self._indexfile if self._inline else self._datafile,

1732

self._indexfile if self._inline else self._datafile,

1728

length,

1733

length,

1729

offset,

1734

offset,

1730

len(d),

1735

len(d),

1731

)

1736

)

1732

)

1737

)

1733

1738

1734

return d

1739

return d

1735

1740

1736

def _getsegment(self, offset, length, df=None):

1741

def _getsegment(self, offset, length, df=None):

1737

"""Obtain a segment of raw data from the revlog.

1742

"""Obtain a segment of raw data from the revlog.

1738

1743

1739

Accepts an absolute offset, length of bytes to obtain, and an

1744

Accepts an absolute offset, length of bytes to obtain, and an

1740

optional file handle to the already-opened revlog. If the file

1745

optional file handle to the already-opened revlog. If the file

1741

handle is used, it's original seek position will not be preserved.

1746

handle is used, it's original seek position will not be preserved.

1742

1747

1743

Requests for data may be returned from a cache.

1748

Requests for data may be returned from a cache.

1744

1749

1745

Returns a str or a buffer instance of raw byte data.

1750

Returns a str or a buffer instance of raw byte data.

1746

"""

1751

"""

1747

o, d = self._chunkcache

1752

o, d = self._chunkcache

1748

l = len(d)

1753

l = len(d)

1749

1754

1750

# is it in the cache?

1755

# is it in the cache?

1751

cachestart = offset - o

1756

cachestart = offset - o

1752

cacheend = cachestart + length

1757

cacheend = cachestart + length

1753

if cachestart >= 0 and cacheend <= l:

1758

if cachestart >= 0 and cacheend <= l:

1754

if cachestart == 0 and cacheend == l:

1759

if cachestart == 0 and cacheend == l:

1755

return d # avoid a copy

1760

return d # avoid a copy

1756

return util.buffer(d, cachestart, cacheend - cachestart)

1761

return util.buffer(d, cachestart, cacheend - cachestart)

1757

1762

1758

return self._readsegment(offset, length, df=df)

1763

return self._readsegment(offset, length, df=df)

1759

1764

1760

def _getsegmentforrevs(self, startrev, endrev, df=None):

1765

def _getsegmentforrevs(self, startrev, endrev, df=None):

1761

"""Obtain a segment of raw data corresponding to a range of revisions.

1766

"""Obtain a segment of raw data corresponding to a range of revisions.

1762

1767

1763

Accepts the start and end revisions and an optional already-open

1768

Accepts the start and end revisions and an optional already-open

1764

file handle to be used for reading. If the file handle is read, its

1769

file handle to be used for reading. If the file handle is read, its

1765

seek position will not be preserved.

1770

seek position will not be preserved.

1766

1771

1767

Requests for data may be satisfied by a cache.

1772

Requests for data may be satisfied by a cache.

1768

1773

1769

Returns a 2-tuple of (offset, data) for the requested range of

1774

Returns a 2-tuple of (offset, data) for the requested range of

1770

revisions. Offset is the integer offset from the beginning of the

1775

revisions. Offset is the integer offset from the beginning of the

1771

revlog and data is a str or buffer of the raw byte data.

1776

revlog and data is a str or buffer of the raw byte data.

1772

1777

1773

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1778

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1774

to determine where each revision's data begins and ends.

1779

to determine where each revision's data begins and ends.

1775

"""

1780

"""

1776

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1781

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1777

# (functions are expensive).

1782

# (functions are expensive).

1778

index = self.index

1783

index = self.index

1779

istart = index[startrev]

1784

istart = index[startrev]

1780

start = int(istart[0] >> 16)

1785

start = int(istart[0] >> 16)

1781

if startrev == endrev:

1786

if startrev == endrev:

1782

end = start + istart[1]

1787

end = start + istart[1]

1783

else:

1788

else:

1784

iend = index[endrev]

1789

iend = index[endrev]

1785

end = int(iend[0] >> 16) + iend[1]

1790

end = int(iend[0] >> 16) + iend[1]

1786

1791

1787

if self._inline:

1792

if self._inline:

1788

start += (startrev + 1) * self.index.entry_size

1793

start += (startrev + 1) * self.index.entry_size

1789

end += (endrev + 1) * self.index.entry_size

1794

end += (endrev + 1) * self.index.entry_size

1790

length = end - start

1795

length = end - start

1791

1796

1792

return start, self._getsegment(start, length, df=df)

1797

return start, self._getsegment(start, length, df=df)

1793

1798

1794

def _chunk(self, rev, df=None):

1799

def _chunk(self, rev, df=None):

1795

"""Obtain a single decompressed chunk for a revision.

1800

"""Obtain a single decompressed chunk for a revision.

1796

1801

1797

Accepts an integer revision and an optional already-open file handle

1802

Accepts an integer revision and an optional already-open file handle

1798

to be used for reading. If used, the seek position of the file will not

1803

to be used for reading. If used, the seek position of the file will not

1799

be preserved.

1804

be preserved.

1800

1805

1801

Returns a str holding uncompressed data for the requested revision.

1806

Returns a str holding uncompressed data for the requested revision.

1802

"""

1807

"""

1803

compression_mode = self.index[rev][10]

1808

compression_mode = self.index[rev][10]

1804

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1809

data = self._getsegmentforrevs(rev, rev, df=df)[1]

1805

if compression_mode == COMP_MODE_PLAIN:

1810

if compression_mode == COMP_MODE_PLAIN:

1806

return data

1811

return data

1807

elif compression_mode == COMP_MODE_DEFAULT:

1812

elif compression_mode == COMP_MODE_DEFAULT:

1808

return self._decompressor(data)

1813

return self._decompressor(data)

1809

elif compression_mode == COMP_MODE_INLINE:

1814

elif compression_mode == COMP_MODE_INLINE:

1810

return self.decompress(data)

1815

return self.decompress(data)

1811

else:

1816

else:

1812

msg = 'unknown compression mode %d'

1817

msg = 'unknown compression mode %d'

1813

msg %= compression_mode

1818

msg %= compression_mode

1814

raise error.RevlogError(msg)

1819

raise error.RevlogError(msg)

1815

1820

1816

def _chunks(self, revs, df=None, targetsize=None):

1821

def _chunks(self, revs, df=None, targetsize=None):

1817

"""Obtain decompressed chunks for the specified revisions.

1822

"""Obtain decompressed chunks for the specified revisions.

1818

1823

1819

Accepts an iterable of numeric revisions that are assumed to be in

1824

Accepts an iterable of numeric revisions that are assumed to be in

1820

ascending order. Also accepts an optional already-open file handle

1825

ascending order. Also accepts an optional already-open file handle

1821

to be used for reading. If used, the seek position of the file will

1826

to be used for reading. If used, the seek position of the file will

1822

not be preserved.

1827

not be preserved.

1823

1828

1824

This function is similar to calling ``self._chunk()`` multiple times,

1829

This function is similar to calling ``self._chunk()`` multiple times,

1825

but is faster.

1830

but is faster.

1826

1831

1827

Returns a list with decompressed data for each requested revision.

1832

Returns a list with decompressed data for each requested revision.

1828

"""

1833

"""

1829

if not revs:

1834

if not revs:

1830

return []

1835

return []

1831

start = self.start

1836

start = self.start

1832

length = self.length

1837

length = self.length

1833

inline = self._inline

1838

inline = self._inline

1834

iosize = self.index.entry_size

1839

iosize = self.index.entry_size

1835

buffer = util.buffer

1840

buffer = util.buffer

1836

1841

1837

l = []

1842

l = []

1838

ladd = l.append

1843

ladd = l.append

1839

1844

1840

if not self._withsparseread:

1845

if not self._withsparseread:

1841

slicedchunks = (revs,)

1846

slicedchunks = (revs,)

1842

else:

1847

else:

1843

slicedchunks = deltautil.slicechunk(

1848

slicedchunks = deltautil.slicechunk(

1844

self, revs, targetsize=targetsize

1849

self, revs, targetsize=targetsize

1845

)

1850

)

1846

1851

1847

for revschunk in slicedchunks:

1852

for revschunk in slicedchunks:

1848

firstrev = revschunk[0]

1853

firstrev = revschunk[0]

1849

# Skip trailing revisions with empty diff

1854

# Skip trailing revisions with empty diff

1850

for lastrev in revschunk[::-1]:

1855

for lastrev in revschunk[::-1]:

1851

if length(lastrev) != 0:

1856

if length(lastrev) != 0:

1852

break

1857

break

1853

1858

1854

try:

1859

try:

1855

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1860

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1856

except OverflowError:

1861

except OverflowError:

1857

# issue4215 - we can't cache a run of chunks greater than

1862

# issue4215 - we can't cache a run of chunks greater than

1858

# 2G on Windows

1863

# 2G on Windows

1859

return [self._chunk(rev, df=df) for rev in revschunk]

1864

return [self._chunk(rev, df=df) for rev in revschunk]

1860

1865

1861

decomp = self.decompress

1866

decomp = self.decompress

1862

# self._decompressor might be None, but will not be used in that case

1867

# self._decompressor might be None, but will not be used in that case

1863

def_decomp = self._decompressor

1868

def_decomp = self._decompressor

1864

for rev in revschunk:

1869

for rev in revschunk:

1865

chunkstart = start(rev)

1870

chunkstart = start(rev)

1866

if inline:

1871

if inline:

1867

chunkstart += (rev + 1) * iosize

1872

chunkstart += (rev + 1) * iosize

1868

chunklength = length(rev)

1873

chunklength = length(rev)

1869

comp_mode = self.index[rev][10]

1874

comp_mode = self.index[rev][10]

1870

c = buffer(data, chunkstart - offset, chunklength)

1875

c = buffer(data, chunkstart - offset, chunklength)

1871

if comp_mode == COMP_MODE_PLAIN:

1876

if comp_mode == COMP_MODE_PLAIN:

1872

ladd(c)

1877

ladd(c)

1873

elif comp_mode == COMP_MODE_INLINE:

1878

elif comp_mode == COMP_MODE_INLINE:

1874

ladd(decomp(c))

1879

ladd(decomp(c))

1875

elif comp_mode == COMP_MODE_DEFAULT:

1880

elif comp_mode == COMP_MODE_DEFAULT:

1876

ladd(def_decomp(c))

1881

ladd(def_decomp(c))

1877

else:

1882

else:

1878

msg = 'unknown compression mode %d'

1883

msg = 'unknown compression mode %d'

1879

msg %= comp_mode

1884

msg %= comp_mode

1880

raise error.RevlogError(msg)

1885

raise error.RevlogError(msg)

1881

1886

1882

return l

1887

return l

1883

1888

1884

def _chunkclear(self):

1889

def _chunkclear(self):

1885

"""Clear the raw chunk cache."""

1890

"""Clear the raw chunk cache."""

1886

self._chunkcache = (0, b'')

1891

self._chunkcache = (0, b'')

1887

1892

1888

def deltaparent(self, rev):

1893

def deltaparent(self, rev):

1889

"""return deltaparent of the given revision"""

1894

"""return deltaparent of the given revision"""

1890

base = self.index[rev][3]

1895

base = self.index[rev][3]

1891

if base == rev:

1896

if base == rev:

1892

return nullrev

1897

return nullrev

1893

elif self._generaldelta:

1898

elif self._generaldelta:

1894

return base

1899

return base

1895

else:

1900

else:

1896

return rev - 1

1901

return rev - 1

1897

1902

1898

def issnapshot(self, rev):

1903

def issnapshot(self, rev):

1899

"""tells whether rev is a snapshot"""

1904

"""tells whether rev is a snapshot"""

1900

if not self._sparserevlog:

1905

if not self._sparserevlog:

1901

return self.deltaparent(rev) == nullrev

1906

return self.deltaparent(rev) == nullrev

1902

elif util.safehasattr(self.index, b'issnapshot'):

1907

elif util.safehasattr(self.index, b'issnapshot'):

1903

# directly assign the method to cache the testing and access

1908

# directly assign the method to cache the testing and access

1904

self.issnapshot = self.index.issnapshot

1909

self.issnapshot = self.index.issnapshot

1905

return self.issnapshot(rev)

1910

return self.issnapshot(rev)

1906

if rev == nullrev:

1911

if rev == nullrev:

1907

return True

1912

return True

1908

entry = self.index[rev]

1913

entry = self.index[rev]

1909

base = entry[3]

1914

base = entry[3]

1910

if base == rev:

1915

if base == rev:

1911

return True

1916

return True

1912

if base == nullrev:

1917

if base == nullrev:

1913

return True

1918

return True

1914

p1 = entry[5]

1919

p1 = entry[5]

1915

p2 = entry[6]

1920

p2 = entry[6]

1916

if base == p1 or base == p2:

1921

if base == p1 or base == p2:

1917

return False

1922

return False

1918

return self.issnapshot(base)

1923

return self.issnapshot(base)

1919

1924

1920

def snapshotdepth(self, rev):

1925

def snapshotdepth(self, rev):

1921

"""number of snapshot in the chain before this one"""

1926

"""number of snapshot in the chain before this one"""

1922

if not self.issnapshot(rev):

1927

if not self.issnapshot(rev):

1923

raise error.ProgrammingError(b'revision %d not a snapshot')

1928

raise error.ProgrammingError(b'revision %d not a snapshot')

1924

return len(self._deltachain(rev)[0]) - 1

1929

return len(self._deltachain(rev)[0]) - 1

1925

1930

1926

def revdiff(self, rev1, rev2):

1931

def revdiff(self, rev1, rev2):

1927

"""return or calculate a delta between two revisions

1932

"""return or calculate a delta between two revisions

1928

1933

1929

The delta calculated is in binary form and is intended to be written to

1934

The delta calculated is in binary form and is intended to be written to

1930

revlog data directly. So this function needs raw revision data.

1935

revlog data directly. So this function needs raw revision data.

1931

"""

1936

"""

1932

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1937

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1933

return bytes(self._chunk(rev2))

1938

return bytes(self._chunk(rev2))

1934

1939

1935

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1940

return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))

1936

1941

1937

def _processflags(self, text, flags, operation, raw=False):

1942

def _processflags(self, text, flags, operation, raw=False):

1938

"""deprecated entry point to access flag processors"""

1943

"""deprecated entry point to access flag processors"""

1939

msg = b'_processflag(...) use the specialized variant'

1944

msg = b'_processflag(...) use the specialized variant'

1940

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1945

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1941

if raw:

1946

if raw:

1942

return text, flagutil.processflagsraw(self, text, flags)

1947

return text, flagutil.processflagsraw(self, text, flags)

1943

elif operation == b'read':

1948

elif operation == b'read':

1944

return flagutil.processflagsread(self, text, flags)

1949

return flagutil.processflagsread(self, text, flags)

1945

else: # write operation

1950

else: # write operation

1946

return flagutil.processflagswrite(self, text, flags)

1951

return flagutil.processflagswrite(self, text, flags)

1947

1952

1948

def revision(self, nodeorrev, _df=None, raw=False):

1953

def revision(self, nodeorrev, _df=None, raw=False):

1949

"""return an uncompressed revision of a given node or revision

1954

"""return an uncompressed revision of a given node or revision

1950

number.

1955

number.

1951

1956

1952

_df - an existing file handle to read from. (internal-only)

1957

_df - an existing file handle to read from. (internal-only)

1953

raw - an optional argument specifying if the revision data is to be

1958

raw - an optional argument specifying if the revision data is to be

1954

treated as raw data when applying flag transforms. 'raw' should be set

1959

treated as raw data when applying flag transforms. 'raw' should be set

1955

to True when generating changegroups or in debug commands.

1960

to True when generating changegroups or in debug commands.

1956

"""

1961

"""

1957

if raw:

1962

if raw:

1958

msg = (

1963

msg = (

1959

b'revlog.revision(..., raw=True) is deprecated, '

1964

b'revlog.revision(..., raw=True) is deprecated, '

1960

b'use revlog.rawdata(...)'

1965

b'use revlog.rawdata(...)'

1961

)

1966

)

1962

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1967

util.nouideprecwarn(msg, b'5.2', stacklevel=2)

1963

return self._revisiondata(nodeorrev, _df, raw=raw)[0]

1968

return self._revisiondata(nodeorrev, _df, raw=raw)[0]

1964

1969

1965

def sidedata(self, nodeorrev, _df=None):

1970

def sidedata(self, nodeorrev, _df=None):

1966

"""a map of extra data related to the changeset but not part of the hash

1971

"""a map of extra data related to the changeset but not part of the hash

1967

1972

1968

This function currently return a dictionary. However, more advanced

1973

This function currently return a dictionary. However, more advanced

1969

mapping object will likely be used in the future for a more

1974

mapping object will likely be used in the future for a more

1970

efficient/lazy code.

1975

efficient/lazy code.

1971

"""

1976

"""

1972

return self._revisiondata(nodeorrev, _df)[1]

1977

return self._revisiondata(nodeorrev, _df)[1]

1973

1978

1974

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1979

def _revisiondata(self, nodeorrev, _df=None, raw=False):

1975

# deal with <nodeorrev> argument type

1980

# deal with <nodeorrev> argument type

1976

if isinstance(nodeorrev, int):

1981

if isinstance(nodeorrev, int):

1977

rev = nodeorrev

1982

rev = nodeorrev

1978

node = self.node(rev)

1983

node = self.node(rev)

1979

else:

1984

else:

1980

node = nodeorrev

1985

node = nodeorrev

1981

rev = None

1986

rev = None

1982

1987

1983

# fast path the special `nullid` rev

1988

# fast path the special `nullid` rev

1984

if node == self.nullid:

1989

if node == self.nullid:

1985

return b"", {}

1990

return b"", {}

1986

1991

1987

# ``rawtext`` is the text as stored inside the revlog. Might be the

1992

# ``rawtext`` is the text as stored inside the revlog. Might be the

1988

# revision or might need to be processed to retrieve the revision.

1993

# revision or might need to be processed to retrieve the revision.

1989

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1994

rev, rawtext, validated = self._rawtext(node, rev, _df=_df)

1990

1995

1991

if self.hassidedata:

1996

if self.hassidedata:

1992

if rev is None:

1997

if rev is None:

1993

rev = self.rev(node)

1998

rev = self.rev(node)

1994

sidedata = self._sidedata(rev)

1999

sidedata = self._sidedata(rev)

1995

else:

2000

else:

1996

sidedata = {}

2001

sidedata = {}

1997

2002

1998

if raw and validated:

2003

if raw and validated:

1999

# if we don't want to process the raw text and that raw

2004

# if we don't want to process the raw text and that raw

2000

# text is cached, we can exit early.

2005

# text is cached, we can exit early.

2001

return rawtext, sidedata

2006

return rawtext, sidedata

2002

if rev is None:

2007

if rev is None:

2003

rev = self.rev(node)

2008

rev = self.rev(node)

2004

# the revlog's flag for this revision

2009

# the revlog's flag for this revision

2005

# (usually alter its state or content)

2010

# (usually alter its state or content)

2006

flags = self.flags(rev)

2011

flags = self.flags(rev)

2007

2012

2008

if validated and flags == REVIDX_DEFAULT_FLAGS:

2013

if validated and flags == REVIDX_DEFAULT_FLAGS:

2009

# no extra flags set, no flag processor runs, text = rawtext

2014

# no extra flags set, no flag processor runs, text = rawtext

2010

return rawtext, sidedata

2015

return rawtext, sidedata

2011

2016

2012

if raw:

2017

if raw:

2013

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2018

validatehash = flagutil.processflagsraw(self, rawtext, flags)

2014

text = rawtext

2019

text = rawtext

2015

else:

2020

else:

2016

r = flagutil.processflagsread(self, rawtext, flags)

2021

r = flagutil.processflagsread(self, rawtext, flags)

2017

text, validatehash = r

2022

text, validatehash = r

2018

if validatehash:

2023

if validatehash:

2019

self.checkhash(text, node, rev=rev)

2024

self.checkhash(text, node, rev=rev)

2020

if not validated:

2025

if not validated:

2021

self._revisioncache = (node, rev, rawtext)

2026

self._revisioncache = (node, rev, rawtext)

2022

2027

2023

return text, sidedata

2028

return text, sidedata

2024

2029

2025

def _rawtext(self, node, rev, _df=None):

2030

def _rawtext(self, node, rev, _df=None):

2026

"""return the possibly unvalidated rawtext for a revision

2031

"""return the possibly unvalidated rawtext for a revision

2027

2032

2028

returns (rev, rawtext, validated)

2033

returns (rev, rawtext, validated)

2029

"""

2034

"""

2030

2035

2031

# revision in the cache (could be useful to apply delta)

2036

# revision in the cache (could be useful to apply delta)

2032

cachedrev = None

2037

cachedrev = None

2033

# An intermediate text to apply deltas to

2038

# An intermediate text to apply deltas to

2034

basetext = None

2039

basetext = None

2035

2040

2036

# Check if we have the entry in cache

2041

# Check if we have the entry in cache

2037

# The cache entry looks like (node, rev, rawtext)

2042

# The cache entry looks like (node, rev, rawtext)

2038

if self._revisioncache:

2043

if self._revisioncache:

2039

if self._revisioncache[0] == node:

2044

if self._revisioncache[0] == node:

2040

return (rev, self._revisioncache[2], True)

2045

return (rev, self._revisioncache[2], True)

2041

cachedrev = self._revisioncache[1]

2046

cachedrev = self._revisioncache[1]

2042

2047

2043

if rev is None:

2048

if rev is None:

2044

rev = self.rev(node)

2049

rev = self.rev(node)

2045

2050

2046

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2051

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2047

if stopped:

2052

if stopped:

2048

basetext = self._revisioncache[2]

2053

basetext = self._revisioncache[2]

2049

2054

2050

# drop cache to save memory, the caller is expected to

2055

# drop cache to save memory, the caller is expected to

2051

# update self._revisioncache after validating the text

2056

# update self._revisioncache after validating the text

2052

self._revisioncache = None

2057

self._revisioncache = None

2053

2058

2054

targetsize = None

2059

targetsize = None

2055

rawsize = self.index[rev][2]

2060

rawsize = self.index[rev][2]

2056

if 0 <= rawsize:

2061

if 0 <= rawsize:

2057

targetsize = 4 * rawsize

2062

targetsize = 4 * rawsize

2058

2063

2059

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2064

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2060

if basetext is None:

2065

if basetext is None:

2061

basetext = bytes(bins[0])

2066

basetext = bytes(bins[0])

2062

bins = bins[1:]

2067

bins = bins[1:]

2063

2068

2064

rawtext = mdiff.patches(basetext, bins)

2069

rawtext = mdiff.patches(basetext, bins)

2065

del basetext # let us have a chance to free memory early

2070

del basetext # let us have a chance to free memory early

2066

return (rev, rawtext, False)

2071

return (rev, rawtext, False)

2067

2072

2068

def _sidedata(self, rev):

2073

def _sidedata(self, rev):

2069

"""Return the sidedata for a given revision number."""

2074

"""Return the sidedata for a given revision number."""

2070

index_entry = self.index[rev]

2075

index_entry = self.index[rev]

2071

sidedata_offset = index_entry[8]

2076

sidedata_offset = index_entry[8]

2072

sidedata_size = index_entry[9]

2077

sidedata_size = index_entry[9]

2073

2078

2074

if self._inline:

2079

if self._inline:

2075

sidedata_offset += self.index.entry_size * (1 + rev)

2080

sidedata_offset += self.index.entry_size * (1 + rev)

2076

if sidedata_size == 0:

2081

if sidedata_size == 0:

2077

return {}

2082

return {}

2078

2083

2079

comp_segment = self._getsegment(sidedata_offset, sidedata_size)

2084

comp_segment = self._getsegment(sidedata_offset, sidedata_size)

2080

comp = self.index[rev][11]

2085

comp = self.index[rev][11]

2081

if comp == COMP_MODE_PLAIN:

2086

if comp == COMP_MODE_PLAIN:

2082

segment = comp_segment

2087

segment = comp_segment

2083

elif comp == COMP_MODE_DEFAULT:

2088

elif comp == COMP_MODE_DEFAULT:

2084

segment = self._decompressor(comp_segment)

2089

segment = self._decompressor(comp_segment)

2085

elif comp == COMP_MODE_INLINE:

2090

elif comp == COMP_MODE_INLINE:

2086

segment = self.decompress(comp_segment)

2091

segment = self.decompress(comp_segment)

2087

else:

2092

else:

2088

msg = 'unknown compression mode %d'

2093

msg = 'unknown compression mode %d'

2089

msg %= comp

2094

msg %= comp

2090

raise error.RevlogError(msg)

2095

raise error.RevlogError(msg)

2091

2096

2092

sidedata = sidedatautil.deserialize_sidedata(segment)

2097

sidedata = sidedatautil.deserialize_sidedata(segment)

2093

return sidedata

2098

return sidedata

2094

2099

2095

def rawdata(self, nodeorrev, _df=None):

2100

def rawdata(self, nodeorrev, _df=None):

2096

"""return an uncompressed raw data of a given node or revision number.

2101

"""return an uncompressed raw data of a given node or revision number.

2097

2102

2098

_df - an existing file handle to read from. (internal-only)

2103

_df - an existing file handle to read from. (internal-only)

2099

"""

2104

"""

2100

return self._revisiondata(nodeorrev, _df, raw=True)[0]

2105

return self._revisiondata(nodeorrev, _df, raw=True)[0]

2101

2106

2102

def hash(self, text, p1, p2):

2107

def hash(self, text, p1, p2):

2103

"""Compute a node hash.

2108

"""Compute a node hash.

2104

2109

2105

Available as a function so that subclasses can replace the hash

2110

Available as a function so that subclasses can replace the hash

2106

as needed.

2111

as needed.

2107

"""

2112

"""

2108

return storageutil.hashrevisionsha1(text, p1, p2)

2113

return storageutil.hashrevisionsha1(text, p1, p2)

2109

2114

2110

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2115

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2111

"""Check node hash integrity.

2116

"""Check node hash integrity.

2112

2117

2113

Available as a function so that subclasses can extend hash mismatch

2118

Available as a function so that subclasses can extend hash mismatch

2114

behaviors as needed.

2119

behaviors as needed.

2115

"""

2120

"""

2116

try:

2121

try:

2117

if p1 is None and p2 is None:

2122

if p1 is None and p2 is None:

2118

p1, p2 = self.parents(node)

2123

p1, p2 = self.parents(node)

2119

if node != self.hash(text, p1, p2):

2124

if node != self.hash(text, p1, p2):

2120

# Clear the revision cache on hash failure. The revision cache

2125

# Clear the revision cache on hash failure. The revision cache

2121

# only stores the raw revision and clearing the cache does have

2126

# only stores the raw revision and clearing the cache does have

2122

# the side-effect that we won't have a cache hit when the raw

2127

# the side-effect that we won't have a cache hit when the raw

2123

# revision data is accessed. But this case should be rare and

2128

# revision data is accessed. But this case should be rare and

2124

# it is extra work to teach the cache about the hash

2129

# it is extra work to teach the cache about the hash

2125

# verification state.

2130

# verification state.

2126

if self._revisioncache and self._revisioncache[0] == node:

2131

if self._revisioncache and self._revisioncache[0] == node:

2127

self._revisioncache = None

2132

self._revisioncache = None

2128

2133

2129

revornode = rev

2134

revornode = rev

2130

if revornode is None:

2135

if revornode is None:

2131

revornode = templatefilters.short(hex(node))

2136

revornode = templatefilters.short(hex(node))

2132

raise error.RevlogError(

2137

raise error.RevlogError(

2133

_(b"integrity check failed on %s:%s")

2138

_(b"integrity check failed on %s:%s")

2134

% (self.display_id, pycompat.bytestr(revornode))

2139

% (self.display_id, pycompat.bytestr(revornode))

2135

)

2140

)

2136

except error.RevlogError:

2141

except error.RevlogError:

2137

if self._censorable and storageutil.iscensoredtext(text):

2142

if self._censorable and storageutil.iscensoredtext(text):

2138

raise error.CensoredNodeError(self.display_id, node, text)

2143

raise error.CensoredNodeError(self.display_id, node, text)

2139

raise

2144

raise

2140

2145

2141

def _enforceinlinesize(self, tr):

2146

def _enforceinlinesize(self, tr):

2142

"""Check if the revlog is too big for inline and convert if so.

2147

"""Check if the revlog is too big for inline and convert if so.

2143

2148

2144

This should be called after revisions are added to the revlog. If the

2149

This should be called after revisions are added to the revlog. If the

2145

revlog has grown too large to be an inline revlog, it will convert it

2150

revlog has grown too large to be an inline revlog, it will convert it

2146

to use multiple index and data files.

2151

to use multiple index and data files.

2147

"""

2152

"""

2148

tiprev = len(self) - 1

2153

tiprev = len(self) - 1

2149

total_size = self.start(tiprev) + self.length(tiprev)

2154

total_size = self.start(tiprev) + self.length(tiprev)

2150

if not self._inline or total_size < _maxinline:

2155

if not self._inline or total_size < _maxinline:

2151

return

2156

return

2152

2157

2153

troffset = tr.findoffset(self._indexfile)

2158

troffset = tr.findoffset(self._indexfile)

2154

if troffset is None:

2159

if troffset is None:

2155

raise error.RevlogError(

2160

raise error.RevlogError(

2156

_(b"%s not found in the transaction") % self._indexfile

2161

_(b"%s not found in the transaction") % self._indexfile

2157

)

2162

)

2158

trindex = 0

2163

trindex = 0

2159

tr.add(self._datafile, 0)

2164

tr.add(self._datafile, 0)

2160

2165

2161

existing_handles = False

2166

existing_handles = False

2162

if self._writinghandles is not None:

2167

if self._writinghandles is not None:

2163

existing_handles = True

2168

existing_handles = True

2164

fp = self._writinghandles[0]

2169

fp = self._writinghandles[0]

2165

fp.flush()

2170

fp.flush()

2166

fp.close()

2171

fp.close()

2167

# We can't use the cached file handle after close(). So prevent

2172

# We can't use the cached file handle after close(). So prevent

2168

# its usage.

2173

# its usage.

2169

self._writinghandles = None

2174

self._writinghandles = None

2170

2175

2171

new_dfh = self._datafp(b'w+')

2176

new_dfh = self._datafp(b'w+')

2172

new_dfh.truncate(0) # drop any potentially existing data

2177

new_dfh.truncate(0) # drop any potentially existing data

2173

try:

2178

try:

2174

with self._indexfp() as read_ifh:

2179

with self._indexfp() as read_ifh:

2175

for r in self:

2180

for r in self:

2176

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2181

new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])

2177

if troffset <= self.start(r) + r * self.index.entry_size:

2182

if troffset <= self.start(r) + r * self.index.entry_size:

2178

trindex = r

2183

trindex = r

2179

new_dfh.flush()

2184

new_dfh.flush()

2180

2185

2181

with self.__index_new_fp() as fp:

2186

with self.__index_new_fp() as fp:

2182

self._format_flags &= ~FLAG_INLINE_DATA

2187

self._format_flags &= ~FLAG_INLINE_DATA

2183

self._inline = False

2188

self._inline = False

2184

for i in self:

2189

for i in self:

2185

e = self.index.entry_binary(i)

2190

e = self.index.entry_binary(i)

2186

if i == 0 and self._docket is None:

2191

if i == 0 and self._docket is None:

2187

header = self._format_flags | self._format_version

2192

header = self._format_flags | self._format_version

2188

header = self.index.pack_header(header)

2193

header = self.index.pack_header(header)

2189

e = header + e

2194

e = header + e

2190

fp.write(e)

2195

fp.write(e)

2191

if self._docket is not None:

2196

if self._docket is not None:

2192

self._docket.index_end = fp.tell()

2197

self._docket.index_end = fp.tell()

2193

2198

2194

# There is a small transactional race here. If the rename of

2199

# There is a small transactional race here. If the rename of

2195

# the index fails, we should remove the datafile. It is more

2200

# the index fails, we should remove the datafile. It is more

2196

# important to ensure that the data file is not truncated

2201

# important to ensure that the data file is not truncated

2197

# when the index is replaced as otherwise data is lost.

2202

# when the index is replaced as otherwise data is lost.

2198

tr.replace(self._datafile, self.start(trindex))

2203

tr.replace(self._datafile, self.start(trindex))

2199

2204

2200

# the temp file replace the real index when we exit the context

2205

# the temp file replace the real index when we exit the context

2201

# manager

2206

# manager

2202

2207

2203

tr.replace(self._indexfile, trindex * self.index.entry_size)

2208

tr.replace(self._indexfile, trindex * self.index.entry_size)

2204

nodemaputil.setup_persistent_nodemap(tr, self)

2209

nodemaputil.setup_persistent_nodemap(tr, self)

2205

self._chunkclear()

2210

self._chunkclear()

2206

2211

2207

if existing_handles:

2212

if existing_handles:

2208

# switched from inline to conventional reopen the index

2213

# switched from inline to conventional reopen the index

2209

ifh = self.__index_write_fp()

2214

ifh = self.__index_write_fp()

2210

self._writinghandles = (ifh, new_dfh)

2215

self._writinghandles = (ifh, new_dfh)

2211

new_dfh = None

2216

new_dfh = None

2212

finally:

2217

finally:

2213

if new_dfh is not None:

2218

if new_dfh is not None:

2214

new_dfh.close()

2219

new_dfh.close()

2215

2220

2216

def _nodeduplicatecallback(self, transaction, node):

2221

def _nodeduplicatecallback(self, transaction, node):

2217

"""called when trying to add a node already stored."""

2222

"""called when trying to add a node already stored."""

2218

2223

2219

@contextlib.contextmanager

2224

@contextlib.contextmanager

2220

def _writing(self, transaction):

2225

def _writing(self, transaction):

2221

if self._trypending:

2226

if self._trypending:

2222

msg = b'try to write in a `trypending` revlog: %s'

2227

msg = b'try to write in a `trypending` revlog: %s'

2223

msg %= self.display_id

2228

msg %= self.display_id

2224

raise error.ProgrammingError(msg)

2229

raise error.ProgrammingError(msg)

2225

if self._writinghandles is not None:

2230

if self._writinghandles is not None:

2226

yield

2231

yield

2227

else:

2232

else:

2228

r = len(self)

2233

r = len(self)

2229

dsize = 0

2234

dsize = 0

2230

if r:

2235

if r:

2231

dsize = self.end(r - 1)

2236

dsize = self.end(r - 1)

2232

dfh = None

2237

dfh = None

2233

if not self._inline:

2238

if not self._inline:

2234

try:

2239

try:

2235

dfh = self._datafp(b"r+")

2240

dfh = self._datafp(b"r+")

2236

if self._docket is None:

2241

if self._docket is None:

2237

dfh.seek(0, os.SEEK_END)

2242

dfh.seek(0, os.SEEK_END)

2238

else:

2243

else:

2239

dfh.seek(self._docket.data_end, os.SEEK_SET)

2244

dfh.seek(self._docket.data_end, os.SEEK_SET)

2240

except IOError as inst:

2245

except IOError as inst:

2241

if inst.errno != errno.ENOENT:

2246

if inst.errno != errno.ENOENT:

2242

raise

2247

raise

2243

dfh = self._datafp(b"w+")

2248

dfh = self._datafp(b"w+")

2244

transaction.add(self._datafile, dsize)

2249

transaction.add(self._datafile, dsize)

2245

try:

2250

try:

2246

isize = r * self.index.entry_size

2251

isize = r * self.index.entry_size

2247

ifh = self.__index_write_fp()

2252

ifh = self.__index_write_fp()

2248

if self._inline:

2253

if self._inline:

2249

transaction.add(self._indexfile, dsize + isize)

2254

transaction.add(self._indexfile, dsize + isize)

2250

else:

2255

else:

2251

transaction.add(self._indexfile, isize)

2256

transaction.add(self._indexfile, isize)

2252

try:

2257

try:

2253

self._writinghandles = (ifh, dfh)

2258

self._writinghandles = (ifh, dfh)

2254

try:

2259

try:

2255

yield

2260

yield

2256

if self._docket is not None:

2261

if self._docket is not None:

2257

self._write_docket(transaction)

2262

self._write_docket(transaction)

2258

finally:

2263

finally:

2259

self._writinghandles = None

2264

self._writinghandles = None

2260

finally:

2265

finally:

2261

ifh.close()

2266

ifh.close()

2262

finally:

2267

finally:

2263

if dfh is not None:

2268

if dfh is not None:

2264

dfh.close()

2269

dfh.close()

2265

2270

2266

def _write_docket(self, transaction):

2271

def _write_docket(self, transaction):

2267

"""write the current docket on disk

2272

"""write the current docket on disk

2268

2273

2269

Exist as a method to help changelog to implement transaction logic

2274

Exist as a method to help changelog to implement transaction logic

2270

2275

2271

We could also imagine using the same transaction logic for all revlog

2276

We could also imagine using the same transaction logic for all revlog

2272

since docket are cheap."""

2277

since docket are cheap."""

2273

self._docket.write(transaction)

2278

self._docket.write(transaction)

2274

2279

2275

def addrevision(

2280

def addrevision(

2276

self,

2281

self,

2277

text,

2282

text,

2278

transaction,

2283

transaction,

2279

link,

2284

link,

2280

p1,

2285

p1,

2281

p2,

2286

p2,

2282

cachedelta=None,

2287

cachedelta=None,

2283

node=None,

2288

node=None,

2284

flags=REVIDX_DEFAULT_FLAGS,

2289

flags=REVIDX_DEFAULT_FLAGS,

2285

deltacomputer=None,

2290

deltacomputer=None,

2286

sidedata=None,

2291

sidedata=None,

2287

):

2292

):

2288

"""add a revision to the log

2293

"""add a revision to the log

2289

2294

2290

text - the revision data to add

2295

text - the revision data to add

2291

transaction - the transaction object used for rollback

2296

transaction - the transaction object used for rollback

2292

link - the linkrev data to add

2297

link - the linkrev data to add

2293

p1, p2 - the parent nodeids of the revision

2298

p1, p2 - the parent nodeids of the revision

2294

cachedelta - an optional precomputed delta

2299

cachedelta - an optional precomputed delta

2295

node - nodeid of revision; typically node is not specified, and it is

2300

node - nodeid of revision; typically node is not specified, and it is

2296

computed by default as hash(text, p1, p2), however subclasses might

2301

computed by default as hash(text, p1, p2), however subclasses might

2297

use different hashing method (and override checkhash() in such case)

2302

use different hashing method (and override checkhash() in such case)

2298

flags - the known flags to set on the revision

2303

flags - the known flags to set on the revision

2299

deltacomputer - an optional deltacomputer instance shared between

2304

deltacomputer - an optional deltacomputer instance shared between

2300

multiple calls

2305

multiple calls

2301

"""

2306

"""

2302

if link == nullrev:

2307

if link == nullrev:

2303

raise error.RevlogError(

2308

raise error.RevlogError(

2304

_(b"attempted to add linkrev -1 to %s") % self.display_id

2309

_(b"attempted to add linkrev -1 to %s") % self.display_id

2305

)

2310

)

2306

2311

2307

if sidedata is None:

2312

if sidedata is None:

2308

sidedata = {}

2313

sidedata = {}

2309

elif sidedata and not self.hassidedata:

2314

elif sidedata and not self.hassidedata:

2310

raise error.ProgrammingError(

2315

raise error.ProgrammingError(

2311

_(b"trying to add sidedata to a revlog who don't support them")

2316

_(b"trying to add sidedata to a revlog who don't support them")

2312

)

2317

)

2313

2318

2314

if flags:

2319

if flags:

2315

node = node or self.hash(text, p1, p2)

2320

node = node or self.hash(text, p1, p2)

2316

2321

2317

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2322

rawtext, validatehash = flagutil.processflagswrite(self, text, flags)

2318

2323

2319

# If the flag processor modifies the revision data, ignore any provided

2324

# If the flag processor modifies the revision data, ignore any provided

2320

# cachedelta.

2325

# cachedelta.

2321

if rawtext != text:

2326

if rawtext != text:

2322

cachedelta = None

2327

cachedelta = None

2323

2328

2324

if len(rawtext) > _maxentrysize:

2329

if len(rawtext) > _maxentrysize:

2325

raise error.RevlogError(

2330

raise error.RevlogError(

2326

_(

2331

_(

2327

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2332

b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"

2328

)

2333

)

2329

% (self.display_id, len(rawtext))

2334

% (self.display_id, len(rawtext))

2330

)

2335

)

2331

2336

2332

node = node or self.hash(rawtext, p1, p2)

2337

node = node or self.hash(rawtext, p1, p2)

2333

rev = self.index.get_rev(node)

2338

rev = self.index.get_rev(node)

2334

if rev is not None:

2339

if rev is not None:

2335

return rev

2340

return rev

2336

2341

2337

if validatehash:

2342

if validatehash:

2338

self.checkhash(rawtext, node, p1=p1, p2=p2)

2343

self.checkhash(rawtext, node, p1=p1, p2=p2)

2339

2344

2340

return self.addrawrevision(

2345

return self.addrawrevision(

2341

rawtext,

2346

rawtext,

2342

transaction,

2347

transaction,

2343

link,

2348

link,

2344

p1,

2349

p1,

2345

p2,

2350

p2,

2346

node,

2351

node,

2347

flags,

2352

flags,

2348

cachedelta=cachedelta,

2353

cachedelta=cachedelta,

2349

deltacomputer=deltacomputer,

2354

deltacomputer=deltacomputer,

2350

sidedata=sidedata,

2355

sidedata=sidedata,

2351

)

2356

)

2352

2357

2353

def addrawrevision(

2358

def addrawrevision(

2354

self,

2359

self,

2355

rawtext,

2360

rawtext,

2356

transaction,

2361

transaction,

2357

link,

2362

link,

2358

p1,

2363

p1,

2359

p2,

2364

p2,

2360

node,

2365

node,

2361

flags,

2366

flags,

2362

cachedelta=None,

2367

cachedelta=None,

2363

deltacomputer=None,

2368

deltacomputer=None,

2364

sidedata=None,

2369

sidedata=None,

2365

):

2370

):

2366

"""add a raw revision with known flags, node and parents

2371

"""add a raw revision with known flags, node and parents

2367

useful when reusing a revision not stored in this revlog (ex: received

2372

useful when reusing a revision not stored in this revlog (ex: received

2368

over wire, or read from an external bundle).

2373

over wire, or read from an external bundle).

2369

"""

2374

"""

2370

with self._writing(transaction):

2375

with self._writing(transaction):

2371

return self._addrevision(

2376

return self._addrevision(

2372

node,

2377

node,

2373

rawtext,

2378

rawtext,

2374

transaction,

2379

transaction,

2375

link,

2380

link,

2376

p1,

2381

p1,

2377

p2,

2382

p2,

2378

flags,

2383

flags,

2379

cachedelta,

2384

cachedelta,

2380

deltacomputer=deltacomputer,

2385

deltacomputer=deltacomputer,

2381

sidedata=sidedata,

2386

sidedata=sidedata,

2382

)

2387

)

2383

2388

2384

def compress(self, data):

2389

def compress(self, data):

2385

"""Generate a possibly-compressed representation of data."""

2390

"""Generate a possibly-compressed representation of data."""

2386

if not data:

2391

if not data:

2387

return b'', data

2392

return b'', data

2388

2393

2389

compressed = self._compressor.compress(data)

2394

compressed = self._compressor.compress(data)

2390

2395

2391

if compressed:

2396

if compressed:

2392

# The revlog compressor added the header in the returned data.

2397

# The revlog compressor added the header in the returned data.

2393

return b'', compressed

2398

return b'', compressed

2394

2399

2395

if data[0:1] == b'\0':

2400

if data[0:1] == b'\0':

2396

return b'', data

2401

return b'', data

2397

return b'u', data

2402

return b'u', data

2398

2403

2399

def decompress(self, data):

2404

def decompress(self, data):

2400

"""Decompress a revlog chunk.

2405

"""Decompress a revlog chunk.

2401

2406

2402

The chunk is expected to begin with a header identifying the

2407

The chunk is expected to begin with a header identifying the

2403

format type so it can be routed to an appropriate decompressor.

2408

format type so it can be routed to an appropriate decompressor.

2404

"""

2409

"""

2405

if not data:

2410

if not data:

2406

return data

2411

return data

2407

2412

2408

# Revlogs are read much more frequently than they are written and many

2413

# Revlogs are read much more frequently than they are written and many

2409

# chunks only take microseconds to decompress, so performance is

2414

# chunks only take microseconds to decompress, so performance is

2410

# important here.

2415

# important here.

2411

#

2416

#

2412

# We can make a few assumptions about revlogs:

2417

# We can make a few assumptions about revlogs:

2413

#

2418

#

2414

# 1) the majority of chunks will be compressed (as opposed to inline

2419

# 1) the majority of chunks will be compressed (as opposed to inline

2415

# raw data).

2420

# raw data).

2416

# 2) decompressing *any* data will likely by at least 10x slower than

2421

# 2) decompressing *any* data will likely by at least 10x slower than

2417

# returning raw inline data.

2422

# returning raw inline data.

2418

# 3) we want to prioritize common and officially supported compression

2423

# 3) we want to prioritize common and officially supported compression

2419

# engines

2424

# engines

2420

#

2425

#

2421

# It follows that we want to optimize for "decompress compressed data

2426

# It follows that we want to optimize for "decompress compressed data

2422

# when encoded with common and officially supported compression engines"

2427

# when encoded with common and officially supported compression engines"

2423

# case over "raw data" and "data encoded by less common or non-official

2428

# case over "raw data" and "data encoded by less common or non-official

2424

# compression engines." That is why we have the inline lookup first

2429

# compression engines." That is why we have the inline lookup first

2425

# followed by the compengines lookup.

2430

# followed by the compengines lookup.

2426

#

2431

#

2427

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2432

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2428

# compressed chunks. And this matters for changelog and manifest reads.

2433

# compressed chunks. And this matters for changelog and manifest reads.

2429

t = data[0:1]

2434

t = data[0:1]

2430

2435

2431

if t == b'x':

2436

if t == b'x':

2432

try:

2437

try:

2433

return _zlibdecompress(data)

2438

return _zlibdecompress(data)

2434

except zlib.error as e:

2439

except zlib.error as e:

2435

raise error.RevlogError(

2440

raise error.RevlogError(

2436

_(b'revlog decompress error: %s')

2441

_(b'revlog decompress error: %s')

2437

% stringutil.forcebytestr(e)

2442

% stringutil.forcebytestr(e)

2438

)

2443

)

2439

# '\0' is more common than 'u' so it goes first.

2444

# '\0' is more common than 'u' so it goes first.

2440

elif t == b'\0':

2445

elif t == b'\0':

2441

return data

2446

return data

2442

elif t == b'u':

2447

elif t == b'u':

2443

return util.buffer(data, 1)

2448

return util.buffer(data, 1)

2444

2449

2445

compressor = self._get_decompressor(t)

2450

compressor = self._get_decompressor(t)

2446

2451

2447

return compressor.decompress(data)

2452

return compressor.decompress(data)

2448

2453

2449

def _addrevision(

2454

def _addrevision(

2450

self,

2455

self,

2451

node,

2456

node,

2452

rawtext,

2457

rawtext,

2453

transaction,

2458

transaction,

2454

link,

2459

link,

2455

p1,

2460

p1,

2456

p2,

2461

p2,

2457

flags,

2462

flags,

2458

cachedelta,

2463

cachedelta,

2459

alwayscache=False,

2464

alwayscache=False,

2460

deltacomputer=None,

2465

deltacomputer=None,

2461

sidedata=None,

2466

sidedata=None,

2462

):

2467

):

2463

"""internal function to add revisions to the log

2468

"""internal function to add revisions to the log

2464

2469

2465

see addrevision for argument descriptions.

2470

see addrevision for argument descriptions.

2466

2471

2467

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2472

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2468

2473

2469

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2474

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2470

be used.

2475

be used.

2471

2476

2472

invariants:

2477

invariants:

2473

- rawtext is optional (can be None); if not set, cachedelta must be set.

2478

- rawtext is optional (can be None); if not set, cachedelta must be set.

2474

if both are set, they must correspond to each other.

2479

if both are set, they must correspond to each other.

2475

"""

2480

"""

2476

if node == self.nullid:

2481

if node == self.nullid:

2477

raise error.RevlogError(

2482

raise error.RevlogError(

2478

_(b"%s: attempt to add null revision") % self.display_id

2483

_(b"%s: attempt to add null revision") % self.display_id

2479

)

2484

)

2480

if (

2485

if (

2481

node == self.nodeconstants.wdirid

2486

node == self.nodeconstants.wdirid

2482

or node in self.nodeconstants.wdirfilenodeids

2487

or node in self.nodeconstants.wdirfilenodeids

2483

):

2488

):

2484

raise error.RevlogError(

2489

raise error.RevlogError(

2485

_(b"%s: attempt to add wdir revision") % self.display_id

2490

_(b"%s: attempt to add wdir revision") % self.display_id

2486

)

2491

)

2487

if self._writinghandles is None:

2492

if self._writinghandles is None:

2488

msg = b'adding revision outside `revlog._writing` context'

2493

msg = b'adding revision outside `revlog._writing` context'

2489

raise error.ProgrammingError(msg)

2494

raise error.ProgrammingError(msg)

2490

2495

2491

if self._inline:

2496

if self._inline:

2492

fh = self._writinghandles[0]

2497

fh = self._writinghandles[0]

2493

else:

2498

else:

2494

fh = self._writinghandles[1]

2499

fh = self._writinghandles[1]

2495

2500

2496

btext = [rawtext]

2501

btext = [rawtext]

2497

2502

2498

curr = len(self)

2503

curr = len(self)

2499

prev = curr - 1

2504

prev = curr - 1

2500

2505

2501

offset = self._get_data_offset(prev)

2506

offset = self._get_data_offset(prev)

2502

2507

2503

if self._concurrencychecker:

2508

if self._concurrencychecker:

2504

ifh, dfh = self._writinghandles

2509

ifh, dfh = self._writinghandles

2505

if self._inline:

2510

if self._inline:

2506

# offset is "as if" it were in the .d file, so we need to add on

2511

# offset is "as if" it were in the .d file, so we need to add on

2507

# the size of the entry metadata.

2512

# the size of the entry metadata.

2508

self._concurrencychecker(

2513

self._concurrencychecker(

2509

ifh, self._indexfile, offset + curr * self.index.entry_size

2514

ifh, self._indexfile, offset + curr * self.index.entry_size

2510

)

2515

)

2511

else:

2516

else:

2512

# Entries in the .i are a consistent size.

2517

# Entries in the .i are a consistent size.

2513

self._concurrencychecker(

2518

self._concurrencychecker(

2514

ifh, self._indexfile, curr * self.index.entry_size

2519

ifh, self._indexfile, curr * self.index.entry_size

2515

)

2520

)

2516

self._concurrencychecker(dfh, self._datafile, offset)

2521

self._concurrencychecker(dfh, self._datafile, offset)

2517

2522

2518

p1r, p2r = self.rev(p1), self.rev(p2)

2523

p1r, p2r = self.rev(p1), self.rev(p2)

2519

2524

2520

# full versions are inserted when the needed deltas

2525

# full versions are inserted when the needed deltas

2521

# become comparable to the uncompressed text

2526

# become comparable to the uncompressed text

2522

if rawtext is None:

2527

if rawtext is None:

2523

# need rawtext size, before changed by flag processors, which is

2528

# need rawtext size, before changed by flag processors, which is

2524

# the non-raw size. use revlog explicitly to avoid filelog's extra

2529

# the non-raw size. use revlog explicitly to avoid filelog's extra

2525

# logic that might remove metadata size.

2530

# logic that might remove metadata size.

2526

textlen = mdiff.patchedsize(

2531

textlen = mdiff.patchedsize(

2527

revlog.size(self, cachedelta[0]), cachedelta[1]

2532

revlog.size(self, cachedelta[0]), cachedelta[1]

2528

)

2533

)

2529

else:

2534

else:

2530

textlen = len(rawtext)

2535

textlen = len(rawtext)

2531

2536

2532

if deltacomputer is None:

2537

if deltacomputer is None:

2533

deltacomputer = deltautil.deltacomputer(self)

2538

deltacomputer = deltautil.deltacomputer(self)

2534

2539

2535

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2540

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2536

2541

2537

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2542

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2538

2543

2539

compression_mode = COMP_MODE_INLINE

2544

compression_mode = COMP_MODE_INLINE

2540

if self._docket is not None:

2545

if self._docket is not None:

2541

h, d = deltainfo.data

2546

h, d = deltainfo.data

2542

if not h and not d:

2547

if not h and not d:

2543

# not data to store at all... declare them uncompressed

2548

# not data to store at all... declare them uncompressed

2544

compression_mode = COMP_MODE_PLAIN

2549

compression_mode = COMP_MODE_PLAIN

2545

elif not h:

2550

elif not h:

2546

t = d[0:1]

2551

t = d[0:1]

2547

if t == b'\0':

2552

if t == b'\0':

2548

compression_mode = COMP_MODE_PLAIN

2553

compression_mode = COMP_MODE_PLAIN

2549

elif t == self._docket.default_compression_header:

2554

elif t == self._docket.default_compression_header:

2550

compression_mode = COMP_MODE_DEFAULT

2555

compression_mode = COMP_MODE_DEFAULT

2551

elif h == b'u':

2556

elif h == b'u':

2552

# we have a more efficient way to declare uncompressed

2557

# we have a more efficient way to declare uncompressed

2553

h = b''

2558

h = b''

2554

compression_mode = COMP_MODE_PLAIN

2559

compression_mode = COMP_MODE_PLAIN

2555

deltainfo = deltautil.drop_u_compression(deltainfo)

2560

deltainfo = deltautil.drop_u_compression(deltainfo)

2556

2561

2557

sidedata_compression_mode = COMP_MODE_INLINE

2562

sidedata_compression_mode = COMP_MODE_INLINE

2558

if sidedata and self.hassidedata:

2563

if sidedata and self.hassidedata:

2559

sidedata_compression_mode = COMP_MODE_PLAIN

2564

sidedata_compression_mode = COMP_MODE_PLAIN

2560

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2565

serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)

2561

sidedata_offset = offset + deltainfo.deltalen

2566

sidedata_offset = offset + deltainfo.deltalen

2562

h, comp_sidedata = self.compress(serialized_sidedata)

2567

h, comp_sidedata = self.compress(serialized_sidedata)

2563

if (

2568

if (

2564

h != b'u'

2569

h != b'u'

2565

and comp_sidedata[0:1] != b'\0'

2570

and comp_sidedata[0:1] != b'\0'

2566

and len(comp_sidedata) < len(serialized_sidedata)

2571

and len(comp_sidedata) < len(serialized_sidedata)

2567

):

2572

):

2568

assert not h

2573

assert not h

2569

if (

2574

if (

2570

comp_sidedata[0:1]

2575

comp_sidedata[0:1]

2571

== self._docket.default_compression_header

2576

== self._docket.default_compression_header

2572

):

2577

):

2573

sidedata_compression_mode = COMP_MODE_DEFAULT

2578

sidedata_compression_mode = COMP_MODE_DEFAULT

2574

serialized_sidedata = comp_sidedata

2579

serialized_sidedata = comp_sidedata

2575

else:

2580

else:

2576

sidedata_compression_mode = COMP_MODE_INLINE

2581

sidedata_compression_mode = COMP_MODE_INLINE

2577

serialized_sidedata = comp_sidedata

2582

serialized_sidedata = comp_sidedata

2578

else:

2583

else:

2579

serialized_sidedata = b""

2584

serialized_sidedata = b""

2580

# Don't store the offset if the sidedata is empty, that way

2585

# Don't store the offset if the sidedata is empty, that way

2581

# we can easily detect empty sidedata and they will be no different

2586

# we can easily detect empty sidedata and they will be no different

2582

# than ones we manually add.

2587

# than ones we manually add.

2583

sidedata_offset = 0

2588

sidedata_offset = 0

2584

2589

2585

e = (

2590

e = (

2586

offset_type(offset, flags),

2591

offset_type(offset, flags),

2587

deltainfo.deltalen,

2592

deltainfo.deltalen,

2588

textlen,

2593

textlen,

2589

deltainfo.base,

2594

deltainfo.base,

2590

link,

2595

link,

2591

p1r,

2596

p1r,

2592

p2r,

2597

p2r,

2593

node,

2598

node,

2594

sidedata_offset,

2599

sidedata_offset,

2595

len(serialized_sidedata),

2600

len(serialized_sidedata),

2596

compression_mode,

2601

compression_mode,

2597

sidedata_compression_mode,

2602

sidedata_compression_mode,

2598

)

2603

)

2599

2604

2600

self.index.append(e)

2605

self.index.append(e)

2601

entry = self.index.entry_binary(curr)

2606

entry = self.index.entry_binary(curr)

2602

if curr == 0 and self._docket is None:

2607

if curr == 0 and self._docket is None:

2603

header = self._format_flags | self._format_version

2608

header = self._format_flags | self._format_version

2604

header = self.index.pack_header(header)

2609

header = self.index.pack_header(header)

2605

entry = header + entry

2610

entry = header + entry

2606

self._writeentry(

2611

self._writeentry(

2607

transaction,

2612

transaction,

2608

entry,

2613

entry,

2609

deltainfo.data,

2614

deltainfo.data,

2610

link,

2615

link,

2611

offset,

2616

offset,

2612

serialized_sidedata,

2617

serialized_sidedata,

2613

)

2618

)

2614

2619

2615

rawtext = btext[0]

2620

rawtext = btext[0]

2616

2621

2617

if alwayscache and rawtext is None:

2622

if alwayscache and rawtext is None:

2618

rawtext = deltacomputer.buildtext(revinfo, fh)

2623

rawtext = deltacomputer.buildtext(revinfo, fh)

2619

2624

2620

if type(rawtext) == bytes: # only accept immutable objects

2625

if type(rawtext) == bytes: # only accept immutable objects

2621

self._revisioncache = (node, curr, rawtext)

2626

self._revisioncache = (node, curr, rawtext)

2622

self._chainbasecache[curr] = deltainfo.chainbase

2627

self._chainbasecache[curr] = deltainfo.chainbase

2623

return curr

2628

return curr

2624

2629

2625

def _get_data_offset(self, prev):

2630

def _get_data_offset(self, prev):

2626

"""Returns the current offset in the (in-transaction) data file.

2631

"""Returns the current offset in the (in-transaction) data file.

2627

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2632

Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket

2628

file to store that information: since sidedata can be rewritten to the

2633

file to store that information: since sidedata can be rewritten to the

2629

end of the data file within a transaction, you can have cases where, for

2634

end of the data file within a transaction, you can have cases where, for

2630

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2635

example, rev `n` does not have sidedata while rev `n - 1` does, leading

2631

to `n - 1`'s sidedata being written after `n`'s data.

2636

to `n - 1`'s sidedata being written after `n`'s data.

2632

2637

2633

TODO cache this in a docket file before getting out of experimental."""

2638

TODO cache this in a docket file before getting out of experimental."""

2634

if self._docket is None:

2639

if self._docket is None:

2635

return self.end(prev)

2640

return self.end(prev)

2636

else:

2641

else:

2637

return self._docket.data_end

2642

return self._docket.data_end

2638

2643

2639

def _writeentry(self, transaction, entry, data, link, offset, sidedata):

2644

def _writeentry(self, transaction, entry, data, link, offset, sidedata):

2640

# Files opened in a+ mode have inconsistent behavior on various

2645

# Files opened in a+ mode have inconsistent behavior on various

2641

# platforms. Windows requires that a file positioning call be made

2646

# platforms. Windows requires that a file positioning call be made

2642

# when the file handle transitions between reads and writes. See

2647

# when the file handle transitions between reads and writes. See

2643

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2648

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2644

# platforms, Python or the platform itself can be buggy. Some versions

2649

# platforms, Python or the platform itself can be buggy. Some versions

2645

# of Solaris have been observed to not append at the end of the file

2650

# of Solaris have been observed to not append at the end of the file

2646

# if the file was seeked to before the end. See issue4943 for more.

2651

# if the file was seeked to before the end. See issue4943 for more.

2647

#

2652

#

2648

# We work around this issue by inserting a seek() before writing.

2653

# We work around this issue by inserting a seek() before writing.

2649

# Note: This is likely not necessary on Python 3. However, because

2654

# Note: This is likely not necessary on Python 3. However, because

2650

# the file handle is reused for reads and may be seeked there, we need

2655

# the file handle is reused for reads and may be seeked there, we need

2651

# to be careful before changing this.

2656

# to be careful before changing this.

2652

if self._writinghandles is None:

2657

if self._writinghandles is None:

2653

msg = b'adding revision outside `revlog._writing` context'

2658

msg = b'adding revision outside `revlog._writing` context'

2654

raise error.ProgrammingError(msg)

2659

raise error.ProgrammingError(msg)

2655

ifh, dfh = self._writinghandles

2660

ifh, dfh = self._writinghandles

2656

if self._docket is None:

2661

if self._docket is None:

2657

ifh.seek(0, os.SEEK_END)

2662

ifh.seek(0, os.SEEK_END)

2658

else:

2663

else:

2659

ifh.seek(self._docket.index_end, os.SEEK_SET)

2664

ifh.seek(self._docket.index_end, os.SEEK_SET)

2660

if dfh:

2665

if dfh:

2661

if self._docket is None:

2666

if self._docket is None:

2662

dfh.seek(0, os.SEEK_END)

2667

dfh.seek(0, os.SEEK_END)

2663

else:

2668

else:

2664

dfh.seek(self._docket.data_end, os.SEEK_SET)

2669

dfh.seek(self._docket.data_end, os.SEEK_SET)

2665

2670

2666

curr = len(self) - 1

2671

curr = len(self) - 1

2667

if not self._inline:

2672

if not self._inline:

2668

transaction.add(self._datafile, offset)

2673

transaction.add(self._datafile, offset)

2669

transaction.add(self._indexfile, curr * len(entry))

2674

transaction.add(self._indexfile, curr * len(entry))

2670

if data[0]:

2675

if data[0]:

2671

dfh.write(data[0])

2676

dfh.write(data[0])

2672

dfh.write(data[1])

2677

dfh.write(data[1])

2673

if sidedata:

2678

if sidedata:

2674

dfh.write(sidedata)

2679

dfh.write(sidedata)

2675

ifh.write(entry)

2680

ifh.write(entry)

2676

else:

2681

else:

2677

offset += curr * self.index.entry_size

2682

offset += curr * self.index.entry_size

2678

transaction.add(self._indexfile, offset)

2683

transaction.add(self._indexfile, offset)

2679

ifh.write(entry)

2684

ifh.write(entry)

2680

ifh.write(data[0])

2685

ifh.write(data[0])

2681

ifh.write(data[1])

2686

ifh.write(data[1])

2682

if sidedata:

2687

if sidedata:

2683

ifh.write(sidedata)

2688

ifh.write(sidedata)

2684

self._enforceinlinesize(transaction)

2689

self._enforceinlinesize(transaction)

2685

if self._docket is not None:

2690

if self._docket is not None:

2686

self._docket.index_end = self._writinghandles[0].tell()

2691

self._docket.index_end = self._writinghandles[0].tell()

2687

self._docket.data_end = self._writinghandles[1].tell()

2692

self._docket.data_end = self._writinghandles[1].tell()

2688

2693

2689

nodemaputil.setup_persistent_nodemap(transaction, self)

2694

nodemaputil.setup_persistent_nodemap(transaction, self)

2690

2695

2691

def addgroup(

2696

def addgroup(

2692

self,

2697

self,

2693

deltas,

2698

deltas,

2694

linkmapper,

2699

linkmapper,

2695

transaction,

2700

transaction,

2696

alwayscache=False,

2701

alwayscache=False,

2697

addrevisioncb=None,

2702

addrevisioncb=None,

2698

duplicaterevisioncb=None,

2703

duplicaterevisioncb=None,

2699

):

2704

):

2700

"""

2705

"""

2701

add a delta group

2706

add a delta group

2702

2707

2703

given a set of deltas, add them to the revision log. the

2708

given a set of deltas, add them to the revision log. the

2704

first delta is against its parent, which should be in our

2709

first delta is against its parent, which should be in our

2705

log, the rest are against the previous delta.

2710

log, the rest are against the previous delta.

2706

2711

2707

If ``addrevisioncb`` is defined, it will be called with arguments of

2712

If ``addrevisioncb`` is defined, it will be called with arguments of

2708

this revlog and the node that was added.

2713

this revlog and the node that was added.

2709

"""

2714

"""

2710

2715

2711

if self._adding_group:

2716

if self._adding_group:

2712

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2717

raise error.ProgrammingError(b'cannot nest addgroup() calls')

2713

2718

2714

self._adding_group = True

2719

self._adding_group = True

2715

empty = True

2720

empty = True

2716

try:

2721

try:

2717

with self._writing(transaction):

2722

with self._writing(transaction):

2718

deltacomputer = deltautil.deltacomputer(self)

2723

deltacomputer = deltautil.deltacomputer(self)

2719

# loop through our set of deltas

2724

# loop through our set of deltas

2720

for data in deltas:

2725

for data in deltas:

2721

(

2726

(

2722

node,

2727

node,

2723

p1,

2728

p1,

2724

p2,

2729

p2,

2725

linknode,

2730

linknode,

2726

deltabase,

2731

deltabase,

2727

delta,

2732

delta,

2728

flags,

2733

flags,

2729

sidedata,

2734

sidedata,

2730

) = data

2735

) = data

2731

link = linkmapper(linknode)

2736

link = linkmapper(linknode)

2732

flags = flags or REVIDX_DEFAULT_FLAGS

2737

flags = flags or REVIDX_DEFAULT_FLAGS

2733

2738

2734

rev = self.index.get_rev(node)

2739

rev = self.index.get_rev(node)

2735

if rev is not None:

2740

if rev is not None:

2736

# this can happen if two branches make the same change

2741

# this can happen if two branches make the same change

2737

self._nodeduplicatecallback(transaction, rev)

2742

self._nodeduplicatecallback(transaction, rev)

2738

if duplicaterevisioncb:

2743

if duplicaterevisioncb:

2739

duplicaterevisioncb(self, rev)

2744

duplicaterevisioncb(self, rev)

2740

empty = False

2745

empty = False

2741

continue

2746

continue

2742

2747

2743

for p in (p1, p2):

2748

for p in (p1, p2):

2744

if not self.index.has_node(p):

2749

if not self.index.has_node(p):

2745

raise error.LookupError(

2750

raise error.LookupError(

2746

p, self.radix, _(b'unknown parent')

2751

p, self.radix, _(b'unknown parent')

2747

)

2752

)

2748

2753

2749

if not self.index.has_node(deltabase):

2754

if not self.index.has_node(deltabase):

2750

raise error.LookupError(

2755

raise error.LookupError(

2751

deltabase, self.display_id, _(b'unknown delta base')

2756

deltabase, self.display_id, _(b'unknown delta base')

2752

)

2757

)

2753

2758

2754

baserev = self.rev(deltabase)

2759

baserev = self.rev(deltabase)

2755

2760

2756

if baserev != nullrev and self.iscensored(baserev):

2761

if baserev != nullrev and self.iscensored(baserev):

2757

# if base is censored, delta must be full replacement in a

2762

# if base is censored, delta must be full replacement in a

2758

# single patch operation

2763

# single patch operation

2759

hlen = struct.calcsize(b">lll")

2764

hlen = struct.calcsize(b">lll")

2760

oldlen = self.rawsize(baserev)

2765

oldlen = self.rawsize(baserev)

2761

newlen = len(delta) - hlen

2766

newlen = len(delta) - hlen

2762

if delta[:hlen] != mdiff.replacediffheader(

2767

if delta[:hlen] != mdiff.replacediffheader(

2763

oldlen, newlen

2768

oldlen, newlen

2764

):

2769

):

2765

raise error.CensoredBaseError(

2770

raise error.CensoredBaseError(

2766

self.display_id, self.node(baserev)

2771

self.display_id, self.node(baserev)

2767

)

2772

)

2768

2773

2769

if not flags and self._peek_iscensored(baserev, delta):

2774

if not flags and self._peek_iscensored(baserev, delta):

2770

flags |= REVIDX_ISCENSORED

2775

flags |= REVIDX_ISCENSORED

2771

2776

2772

# We assume consumers of addrevisioncb will want to retrieve

2777

# We assume consumers of addrevisioncb will want to retrieve

2773

# the added revision, which will require a call to

2778

# the added revision, which will require a call to

2774

# revision(). revision() will fast path if there is a cache

2779

# revision(). revision() will fast path if there is a cache

2775

# hit. So, we tell _addrevision() to always cache in this case.

2780

# hit. So, we tell _addrevision() to always cache in this case.

2776

# We're only using addgroup() in the context of changegroup

2781

# We're only using addgroup() in the context of changegroup

2777

# generation so the revision data can always be handled as raw

2782

# generation so the revision data can always be handled as raw

2778

# by the flagprocessor.

2783

# by the flagprocessor.

2779

rev = self._addrevision(

2784

rev = self._addrevision(

2780

node,

2785

node,

2781

None,

2786

None,

2782

transaction,

2787

transaction,

2783

link,

2788

link,

2784

p1,

2789

p1,

2785

p2,

2790

p2,

2786

flags,

2791

flags,

2787

(baserev, delta),

2792

(baserev, delta),

2788

alwayscache=alwayscache,

2793

alwayscache=alwayscache,

2789

deltacomputer=deltacomputer,

2794

deltacomputer=deltacomputer,

2790

sidedata=sidedata,

2795

sidedata=sidedata,

2791

)

2796

)

2792

2797

2793

if addrevisioncb:

2798

if addrevisioncb:

2794

addrevisioncb(self, rev)

2799

addrevisioncb(self, rev)

2795

empty = False

2800

empty = False

2796

finally:

2801

finally:

2797

self._adding_group = False

2802

self._adding_group = False

2798

return not empty

2803

return not empty

2799

2804

2800

def iscensored(self, rev):

2805

def iscensored(self, rev):

2801

"""Check if a file revision is censored."""

2806

"""Check if a file revision is censored."""

2802

if not self._censorable:

2807

if not self._censorable:

2803

return False

2808

return False

2804

2809

2805

return self.flags(rev) & REVIDX_ISCENSORED

2810

return self.flags(rev) & REVIDX_ISCENSORED

2806

2811

2807

def _peek_iscensored(self, baserev, delta):

2812

def _peek_iscensored(self, baserev, delta):

2808

"""Quickly check if a delta produces a censored revision."""

2813

"""Quickly check if a delta produces a censored revision."""

2809

if not self._censorable:

2814

if not self._censorable:

2810

return False

2815

return False

2811

2816

2812

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2817

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2813

2818

2814

def getstrippoint(self, minlink):

2819

def getstrippoint(self, minlink):

2815

"""find the minimum rev that must be stripped to strip the linkrev

2820

"""find the minimum rev that must be stripped to strip the linkrev

2816

2821

2817

Returns a tuple containing the minimum rev and a set of all revs that

2822

Returns a tuple containing the minimum rev and a set of all revs that

2818

have linkrevs that will be broken by this strip.

2823

have linkrevs that will be broken by this strip.

2819

"""

2824

"""

2820

return storageutil.resolvestripinfo(

2825

return storageutil.resolvestripinfo(

2821

minlink,

2826

minlink,

2822

len(self) - 1,

2827

len(self) - 1,

2823

self.headrevs(),

2828

self.headrevs(),

2824

self.linkrev,

2829

self.linkrev,

2825

self.parentrevs,

2830

self.parentrevs,

2826

)

2831

)

2827

2832

2828

def strip(self, minlink, transaction):

2833

def strip(self, minlink, transaction):

2829

"""truncate the revlog on the first revision with a linkrev >= minlink

2834

"""truncate the revlog on the first revision with a linkrev >= minlink

2830

2835

2831

This function is called when we're stripping revision minlink and

2836

This function is called when we're stripping revision minlink and

2832

its descendants from the repository.

2837

its descendants from the repository.

2833

2838

2834

We have to remove all revisions with linkrev >= minlink, because

2839

We have to remove all revisions with linkrev >= minlink, because

2835

the equivalent changelog revisions will be renumbered after the

2840

the equivalent changelog revisions will be renumbered after the

2836

strip.

2841

strip.

2837

2842

2838

So we truncate the revlog on the first of these revisions, and

2843

So we truncate the revlog on the first of these revisions, and

2839

trust that the caller has saved the revisions that shouldn't be

2844

trust that the caller has saved the revisions that shouldn't be

2840

removed and that it'll re-add them after this truncation.

2845

removed and that it'll re-add them after this truncation.

2841

"""

2846

"""

2842

if len(self) == 0:

2847

if len(self) == 0:

2843

return

2848

return

2844

2849

2845

rev, _ = self.getstrippoint(minlink)

2850

rev, _ = self.getstrippoint(minlink)

2846

if rev == len(self):

2851

if rev == len(self):

2847

return

2852

return

2848

2853

2849

# first truncate the files on disk

2854

# first truncate the files on disk

2850

data_end = self.start(rev)

2855

data_end = self.start(rev)

2851

if not self._inline:

2856

if not self._inline:

2852

transaction.add(self._datafile, data_end)

2857

transaction.add(self._datafile, data_end)

2853

end = rev * self.index.entry_size

2858

end = rev * self.index.entry_size

2854

else:

2859

else:

2855

end = data_end + (rev * self.index.entry_size)

2860

end = data_end + (rev * self.index.entry_size)

2856

2861

2857

transaction.add(self._indexfile, end)

2862

transaction.add(self._indexfile, end)

2858

if self._docket is not None:

2863

if self._docket is not None:

2859

# XXX we could, leverage the docket while stripping. However it is

2864

# XXX we could, leverage the docket while stripping. However it is

2860

# not powerfull enough at the time of this comment

2865

# not powerfull enough at the time of this comment

2861

self._docket.index_end = end

2866

self._docket.index_end = end

2862

self._docket.data_end = data_end

2867

self._docket.data_end = data_end

2863

self._docket.write(transaction, stripping=True)

2868

self._docket.write(transaction, stripping=True)

2864

2869

2865

# then reset internal state in memory to forget those revisions

2870

# then reset internal state in memory to forget those revisions

2866

self._revisioncache = None

2871

self._revisioncache = None

2867

self._chaininfocache = util.lrucachedict(500)

2872

self._chaininfocache = util.lrucachedict(500)

2868

self._chunkclear()

2873

self._chunkclear()

2869

2874

2870

del self.index[rev:-1]

2875

del self.index[rev:-1]

2871

2876

2872

def checksize(self):

2877

def checksize(self):

2873

"""Check size of index and data files

2878

"""Check size of index and data files

2874

2879

2875

return a (dd, di) tuple.

2880

return a (dd, di) tuple.

2876

- dd: extra bytes for the "data" file

2881

- dd: extra bytes for the "data" file

2877

- di: extra bytes for the "index" file

2882

- di: extra bytes for the "index" file

2878

2883

2879

A healthy revlog will return (0, 0).

2884

A healthy revlog will return (0, 0).

2880

"""

2885

"""

2881

expected = 0

2886

expected = 0

2882

if len(self):

2887

if len(self):

2883

expected = max(0, self.end(len(self) - 1))

2888

expected = max(0, self.end(len(self) - 1))

2884

2889

2885

try:

2890

try:

2886

with self._datafp() as f:

2891

with self._datafp() as f:

2887

f.seek(0, io.SEEK_END)

2892

f.seek(0, io.SEEK_END)

2888

actual = f.tell()

2893

actual = f.tell()

2889

dd = actual - expected

2894

dd = actual - expected

2890

except IOError as inst:

2895

except IOError as inst:

2891

if inst.errno != errno.ENOENT:

2896

if inst.errno != errno.ENOENT:

2892

raise

2897

raise

2893

dd = 0

2898

dd = 0

2894

2899

2895

try:

2900

try:

2896

f = self.opener(self._indexfile)

2901

f = self.opener(self._indexfile)

2897

f.seek(0, io.SEEK_END)

2902

f.seek(0, io.SEEK_END)

2898

actual = f.tell()

2903

actual = f.tell()

2899

f.close()

2904

f.close()

2900

s = self.index.entry_size

2905

s = self.index.entry_size

2901

i = max(0, actual // s)

2906

i = max(0, actual // s)

2902

di = actual - (i * s)

2907

di = actual - (i * s)

2903

if self._inline:

2908

if self._inline:

2904

databytes = 0

2909

databytes = 0

2905

for r in self:

2910

for r in self:

2906

databytes += max(0, self.length(r))

2911

databytes += max(0, self.length(r))

2907

dd = 0

2912

dd = 0

2908

di = actual - len(self) * s - databytes

2913

di = actual - len(self) * s - databytes

2909

except IOError as inst:

2914

except IOError as inst:

2910

if inst.errno != errno.ENOENT:

2915

if inst.errno != errno.ENOENT:

2911

raise

2916

raise

2912

di = 0

2917

di = 0

2913

2918

2914

return (dd, di)

2919

return (dd, di)

2915

2920

2916

def files(self):

2921

def files(self):

2917

res = [self._indexfile]

2922

res = [self._indexfile]

2918

if not self._inline:

2923

if not self._inline:

2919

res.append(self._datafile)

2924

res.append(self._datafile)

2920

return res

2925

return res

2921

2926

2922

def emitrevisions(

2927

def emitrevisions(

2923

self,

2928

self,

2924

nodes,

2929

nodes,

2925

nodesorder=None,

2930

nodesorder=None,

2926

revisiondata=False,

2931

revisiondata=False,

2927

assumehaveparentrevisions=False,

2932

assumehaveparentrevisions=False,

2928

deltamode=repository.CG_DELTAMODE_STD,

2933

deltamode=repository.CG_DELTAMODE_STD,

2929

sidedata_helpers=None,

2934

sidedata_helpers=None,

2930

):

2935

):

2931

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2936

if nodesorder not in (b'nodes', b'storage', b'linear', None):

2932

raise error.ProgrammingError(

2937

raise error.ProgrammingError(

2933

b'unhandled value for nodesorder: %s' % nodesorder

2938

b'unhandled value for nodesorder: %s' % nodesorder

2934

)

2939

)

2935

2940

2936

if nodesorder is None and not self._generaldelta:

2941

if nodesorder is None and not self._generaldelta:

2937

nodesorder = b'storage'

2942

nodesorder = b'storage'

2938

2943

2939

if (

2944

if (

2940

not self._storedeltachains

2945

not self._storedeltachains

2941

and deltamode != repository.CG_DELTAMODE_PREV

2946

and deltamode != repository.CG_DELTAMODE_PREV

2942

):

2947

):

2943

deltamode = repository.CG_DELTAMODE_FULL

2948

deltamode = repository.CG_DELTAMODE_FULL

2944

2949

2945

return storageutil.emitrevisions(

2950

return storageutil.emitrevisions(

2946

self,

2951

self,

2947

nodes,

2952

nodes,

2948

nodesorder,

2953

nodesorder,

2949

revlogrevisiondelta,

2954

revlogrevisiondelta,

2950

deltaparentfn=self.deltaparent,

2955

deltaparentfn=self.deltaparent,

2951

candeltafn=self.candelta,

2956

candeltafn=self.candelta,

2952

rawsizefn=self.rawsize,

2957

rawsizefn=self.rawsize,

2953

revdifffn=self.revdiff,

2958

revdifffn=self.revdiff,

2954

flagsfn=self.flags,

2959

flagsfn=self.flags,

2955

deltamode=deltamode,

2960

deltamode=deltamode,

2956

revisiondata=revisiondata,

2961

revisiondata=revisiondata,

2957

assumehaveparentrevisions=assumehaveparentrevisions,

2962

assumehaveparentrevisions=assumehaveparentrevisions,

2958

sidedata_helpers=sidedata_helpers,

2963

sidedata_helpers=sidedata_helpers,

2959

)

2964

)

2960

2965

2961

DELTAREUSEALWAYS = b'always'

2966

DELTAREUSEALWAYS = b'always'

2962

DELTAREUSESAMEREVS = b'samerevs'

2967

DELTAREUSESAMEREVS = b'samerevs'

2963

DELTAREUSENEVER = b'never'

2968

DELTAREUSENEVER = b'never'

2964

2969

2965

DELTAREUSEFULLADD = b'fulladd'

2970

DELTAREUSEFULLADD = b'fulladd'

2966

2971

2967

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2972

DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}

2968

2973

2969

def clone(

2974

def clone(

2970

self,

2975

self,

2971

tr,

2976

tr,

2972

destrevlog,

2977

destrevlog,

2973

addrevisioncb=None,

2978

addrevisioncb=None,

2974

deltareuse=DELTAREUSESAMEREVS,

2979

deltareuse=DELTAREUSESAMEREVS,

2975

forcedeltabothparents=None,

2980

forcedeltabothparents=None,

2976

sidedata_helpers=None,

2981

sidedata_helpers=None,

2977

):

2982

):

2978

"""Copy this revlog to another, possibly with format changes.

2983

"""Copy this revlog to another, possibly with format changes.

2979

2984

2980

The destination revlog will contain the same revisions and nodes.

2985

The destination revlog will contain the same revisions and nodes.

2981

However, it may not be bit-for-bit identical due to e.g. delta encoding

2986

However, it may not be bit-for-bit identical due to e.g. delta encoding

2982

differences.

2987

differences.

2983

2988

2984

The ``deltareuse`` argument control how deltas from the existing revlog

2989

The ``deltareuse`` argument control how deltas from the existing revlog

2985

are preserved in the destination revlog. The argument can have the

2990

are preserved in the destination revlog. The argument can have the

2986

following values:

2991

following values:

2987

2992

2988

DELTAREUSEALWAYS

2993

DELTAREUSEALWAYS

2989

Deltas will always be reused (if possible), even if the destination

2994

Deltas will always be reused (if possible), even if the destination

2990

revlog would not select the same revisions for the delta. This is the

2995

revlog would not select the same revisions for the delta. This is the

2991

fastest mode of operation.

2996

fastest mode of operation.

2992

DELTAREUSESAMEREVS

2997

DELTAREUSESAMEREVS

2993

Deltas will be reused if the destination revlog would pick the same

2998

Deltas will be reused if the destination revlog would pick the same

2994

revisions for the delta. This mode strikes a balance between speed

2999

revisions for the delta. This mode strikes a balance between speed

2995

and optimization.

3000

and optimization.

2996

DELTAREUSENEVER

3001

DELTAREUSENEVER

2997

Deltas will never be reused. This is the slowest mode of execution.

3002

Deltas will never be reused. This is the slowest mode of execution.

2998

This mode can be used to recompute deltas (e.g. if the diff/delta

3003

This mode can be used to recompute deltas (e.g. if the diff/delta

2999

algorithm changes).

3004

algorithm changes).

3000

DELTAREUSEFULLADD

3005

DELTAREUSEFULLADD

3001

Revision will be re-added as if their were new content. This is

3006

Revision will be re-added as if their were new content. This is

3002

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3007

slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.

3003

eg: large file detection and handling.

3008

eg: large file detection and handling.

3004

3009

3005

Delta computation can be slow, so the choice of delta reuse policy can

3010

Delta computation can be slow, so the choice of delta reuse policy can

3006

significantly affect run time.

3011

significantly affect run time.

3007

3012

3008

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3013

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

3009

two extremes. Deltas will be reused if they are appropriate. But if the

3014

two extremes. Deltas will be reused if they are appropriate. But if the

3010

delta could choose a better revision, it will do so. This means if you

3015

delta could choose a better revision, it will do so. This means if you

3011

are converting a non-generaldelta revlog to a generaldelta revlog,

3016

are converting a non-generaldelta revlog to a generaldelta revlog,

3012

deltas will be recomputed if the delta's parent isn't a parent of the

3017

deltas will be recomputed if the delta's parent isn't a parent of the

3013

revision.

3018

revision.

3014

3019

3015

In addition to the delta policy, the ``forcedeltabothparents``

3020

In addition to the delta policy, the ``forcedeltabothparents``

3016

argument controls whether to force compute deltas against both parents

3021

argument controls whether to force compute deltas against both parents

3017

for merges. By default, the current default is used.

3022

for merges. By default, the current default is used.

3018

3023

3019

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3024

See `revlogutil.sidedata.get_sidedata_helpers` for the doc on

3020

`sidedata_helpers`.

3025

`sidedata_helpers`.

3021

"""

3026

"""

3022

if deltareuse not in self.DELTAREUSEALL:

3027

if deltareuse not in self.DELTAREUSEALL:

3023

raise ValueError(

3028

raise ValueError(

3024

_(b'value for deltareuse invalid: %s') % deltareuse

3029

_(b'value for deltareuse invalid: %s') % deltareuse

3025

)

3030

)

3026

3031

3027

if len(destrevlog):

3032

if len(destrevlog):

3028

raise ValueError(_(b'destination revlog is not empty'))

3033

raise ValueError(_(b'destination revlog is not empty'))

3029

3034

3030

if getattr(self, 'filteredrevs', None):

3035

if getattr(self, 'filteredrevs', None):

3031

raise ValueError(_(b'source revlog has filtered revisions'))

3036

raise ValueError(_(b'source revlog has filtered revisions'))

3032

if getattr(destrevlog, 'filteredrevs', None):

3037

if getattr(destrevlog, 'filteredrevs', None):

3033

raise ValueError(_(b'destination revlog has filtered revisions'))

3038

raise ValueError(_(b'destination revlog has filtered revisions'))

3034

3039

3035

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3040

# lazydelta and lazydeltabase controls whether to reuse a cached delta,

3036

# if possible.

3041

# if possible.

3037

oldlazydelta = destrevlog._lazydelta

3042

oldlazydelta = destrevlog._lazydelta

3038

oldlazydeltabase = destrevlog._lazydeltabase

3043

oldlazydeltabase = destrevlog._lazydeltabase

3039

oldamd = destrevlog._deltabothparents

3044

oldamd = destrevlog._deltabothparents

3040

3045

3041

try:

3046

try:

3042

if deltareuse == self.DELTAREUSEALWAYS:

3047

if deltareuse == self.DELTAREUSEALWAYS:

3043

destrevlog._lazydeltabase = True

3048

destrevlog._lazydeltabase = True

3044

destrevlog._lazydelta = True

3049

destrevlog._lazydelta = True

3045

elif deltareuse == self.DELTAREUSESAMEREVS:

3050

elif deltareuse == self.DELTAREUSESAMEREVS:

3046

destrevlog._lazydeltabase = False

3051

destrevlog._lazydeltabase = False

3047

destrevlog._lazydelta = True

3052

destrevlog._lazydelta = True

3048

elif deltareuse == self.DELTAREUSENEVER:

3053

elif deltareuse == self.DELTAREUSENEVER:

3049

destrevlog._lazydeltabase = False

3054

destrevlog._lazydeltabase = False

3050

destrevlog._lazydelta = False

3055

destrevlog._lazydelta = False

3051

3056

3052

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3057

destrevlog._deltabothparents = forcedeltabothparents or oldamd

3053

3058

3054

self._clone(

3059

self._clone(

3055

tr,

3060

tr,

3056

destrevlog,

3061

destrevlog,

3057

addrevisioncb,

3062

addrevisioncb,

3058

deltareuse,

3063

deltareuse,

3059

forcedeltabothparents,

3064

forcedeltabothparents,

3060

sidedata_helpers,

3065

sidedata_helpers,

3061

)

3066

)

3062

3067

3063

finally:

3068

finally:

3064

destrevlog._lazydelta = oldlazydelta

3069

destrevlog._lazydelta = oldlazydelta

3065

destrevlog._lazydeltabase = oldlazydeltabase

3070

destrevlog._lazydeltabase = oldlazydeltabase

3066

destrevlog._deltabothparents = oldamd

3071

destrevlog._deltabothparents = oldamd

3067

3072

3068

def _clone(

3073

def _clone(

3069

self,

3074

self,

3070

tr,

3075

tr,

3071

destrevlog,

3076

destrevlog,

3072

addrevisioncb,

3077

addrevisioncb,

3073

deltareuse,

3078

deltareuse,

3074

forcedeltabothparents,

3079

forcedeltabothparents,

3075

sidedata_helpers,

3080

sidedata_helpers,

3076

):

3081

):

3077

"""perform the core duty of `revlog.clone` after parameter processing"""

3082

"""perform the core duty of `revlog.clone` after parameter processing"""

3078

deltacomputer = deltautil.deltacomputer(destrevlog)

3083

deltacomputer = deltautil.deltacomputer(destrevlog)

3079

index = self.index

3084

index = self.index

3080

for rev in self:

3085

for rev in self:

3081

entry = index[rev]

3086

entry = index[rev]

3082

3087

3083

# Some classes override linkrev to take filtered revs into

3088

# Some classes override linkrev to take filtered revs into

3084

# account. Use raw entry from index.

3089

# account. Use raw entry from index.

3085

flags = entry[0] & 0xFFFF

3090

flags = entry[0] & 0xFFFF

3086

linkrev = entry[4]

3091

linkrev = entry[4]

3087

p1 = index[entry[5]][7]

3092

p1 = index[entry[5]][7]

3088

p2 = index[entry[6]][7]

3093

p2 = index[entry[6]][7]

3089

node = entry[7]

3094

node = entry[7]

3090

3095

3091

# (Possibly) reuse the delta from the revlog if allowed and

3096

# (Possibly) reuse the delta from the revlog if allowed and

3092

# the revlog chunk is a delta.

3097

# the revlog chunk is a delta.

3093

cachedelta = None

3098

cachedelta = None

3094

rawtext = None

3099

rawtext = None

3095

if deltareuse == self.DELTAREUSEFULLADD:

3100

if deltareuse == self.DELTAREUSEFULLADD:

3096

text, sidedata = self._revisiondata(rev)

3101

text, sidedata = self._revisiondata(rev)

3097

3102

3098

if sidedata_helpers is not None:

3103

if sidedata_helpers is not None:

3099

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3104

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3100

self, sidedata_helpers, sidedata, rev

3105

self, sidedata_helpers, sidedata, rev

3101

)

3106

)

3102

flags = flags | new_flags[0] & ~new_flags[1]

3107

flags = flags | new_flags[0] & ~new_flags[1]

3103

3108

3104

destrevlog.addrevision(

3109

destrevlog.addrevision(

3105

text,

3110

text,

3106

tr,

3111

tr,

3107

linkrev,

3112

linkrev,

3108

p1,

3113

p1,

3109

p2,

3114

p2,

3110

cachedelta=cachedelta,

3115

cachedelta=cachedelta,

3111

node=node,

3116

node=node,

3112

flags=flags,

3117

flags=flags,

3113

deltacomputer=deltacomputer,

3118

deltacomputer=deltacomputer,

3114

sidedata=sidedata,

3119

sidedata=sidedata,

3115

)

3120

)

3116

else:

3121

else:

3117

if destrevlog._lazydelta:

3122

if destrevlog._lazydelta:

3118

dp = self.deltaparent(rev)

3123

dp = self.deltaparent(rev)

3119

if dp != nullrev:

3124

if dp != nullrev:

3120

cachedelta = (dp, bytes(self._chunk(rev)))

3125

cachedelta = (dp, bytes(self._chunk(rev)))

3121

3126

3122

sidedata = None

3127

sidedata = None

3123

if not cachedelta:

3128

if not cachedelta:

3124

rawtext, sidedata = self._revisiondata(rev)

3129

rawtext, sidedata = self._revisiondata(rev)

3125

if sidedata is None:

3130

if sidedata is None:

3126

sidedata = self.sidedata(rev)

3131

sidedata = self.sidedata(rev)

3127

3132

3128

if sidedata_helpers is not None:

3133

if sidedata_helpers is not None:

3129

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3134

(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(

3130

self, sidedata_helpers, sidedata, rev

3135

self, sidedata_helpers, sidedata, rev

3131

)

3136

)

3132

flags = flags | new_flags[0] & ~new_flags[1]

3137

flags = flags | new_flags[0] & ~new_flags[1]

3133

3138

3134

with destrevlog._writing(tr):

3139

with destrevlog._writing(tr):

3135

destrevlog._addrevision(

3140

destrevlog._addrevision(

3136

node,

3141

node,

3137

rawtext,

3142

rawtext,

3138

tr,

3143

tr,

3139

linkrev,

3144

linkrev,

3140

p1,

3145

p1,

3141

p2,

3146

p2,

3142

flags,

3147

flags,

3143

cachedelta,

3148

cachedelta,

3144

deltacomputer=deltacomputer,

3149

deltacomputer=deltacomputer,

3145

sidedata=sidedata,

3150

sidedata=sidedata,

3146

)

3151

)

3147

3152

3148

if addrevisioncb:

3153

if addrevisioncb:

3149

addrevisioncb(self, rev, node)

3154

addrevisioncb(self, rev, node)

3150

3155

3151

def censorrevision(self, tr, censornode, tombstone=b''):

3156

def censorrevision(self, tr, censornode, tombstone=b''):

3152

if self._format_version == REVLOGV0:

3157

if self._format_version == REVLOGV0:

3153

raise error.RevlogError(

3158

raise error.RevlogError(

3154

_(b'cannot censor with version %d revlogs')

3159

_(b'cannot censor with version %d revlogs')

3155

% self._format_version

3160

% self._format_version

3156

)

3161

)

3157

3162

3158

censorrev = self.rev(censornode)

3163

censorrev = self.rev(censornode)

3159

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

3164

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

3160

3165

3161

if len(tombstone) > self.rawsize(censorrev):

3166

if len(tombstone) > self.rawsize(censorrev):

3162

raise error.Abort(

3167

raise error.Abort(

3163

_(b'censor tombstone must be no longer than censored data')

3168

_(b'censor tombstone must be no longer than censored data')

3164

)

3169

)

3165

3170

3166

# Rewriting the revlog in place is hard. Our strategy for censoring is

3171

# Rewriting the revlog in place is hard. Our strategy for censoring is

3167

# to create a new revlog, copy all revisions to it, then replace the

3172

# to create a new revlog, copy all revisions to it, then replace the

3168

# revlogs on transaction close.

3173

# revlogs on transaction close.

3169

#

3174

#

3170

# This is a bit dangerous. We could easily have a mismatch of state.

3175

# This is a bit dangerous. We could easily have a mismatch of state.

3171

newrl = revlog(

3176

newrl = revlog(

3172

self.opener,

3177

self.opener,

3173

target=self.target,

3178

target=self.target,

3174

radix=self.radix,

3179

radix=self.radix,

3175

postfix=b'tmpcensored',

3180

postfix=b'tmpcensored',

3176

censorable=True,

3181

censorable=True,

3177

)

3182

)

3178

newrl._format_version = self._format_version

3183

newrl._format_version = self._format_version

3179

newrl._format_flags = self._format_flags

3184

newrl._format_flags = self._format_flags

3180

newrl._generaldelta = self._generaldelta

3185

newrl._generaldelta = self._generaldelta

3181

newrl._parse_index = self._parse_index

3186

newrl._parse_index = self._parse_index

3182

3187

3183

for rev in self.revs():

3188

for rev in self.revs():

3184

node = self.node(rev)

3189

node = self.node(rev)

3185

p1, p2 = self.parents(node)

3190

p1, p2 = self.parents(node)

3186

3191

3187

if rev == censorrev:

3192

if rev == censorrev:

3188

newrl.addrawrevision(

3193

newrl.addrawrevision(

3189

tombstone,

3194

tombstone,

3190

tr,

3195

tr,

3191

self.linkrev(censorrev),

3196

self.linkrev(censorrev),

3192

p1,

3197

p1,

3193

p2,

3198

p2,

3194

censornode,

3199

censornode,

3195

REVIDX_ISCENSORED,

3200

REVIDX_ISCENSORED,

3196

)

3201

)

3197

3202

3198

if newrl.deltaparent(rev) != nullrev:

3203

if newrl.deltaparent(rev) != nullrev:

3199

raise error.Abort(

3204

raise error.Abort(

3200

_(

3205

_(

3201

b'censored revision stored as delta; '

3206

b'censored revision stored as delta; '

3202

b'cannot censor'

3207

b'cannot censor'

3203

),

3208

),

3204

hint=_(

3209

hint=_(

3205

b'censoring of revlogs is not '

3210

b'censoring of revlogs is not '

3206

b'fully implemented; please report '

3211

b'fully implemented; please report '

3207

b'this bug'

3212

b'this bug'

3208

),

3213

),

3209

)

3214

)

3210

continue

3215

continue

3211

3216

3212

if self.iscensored(rev):

3217

if self.iscensored(rev):

3213

if self.deltaparent(rev) != nullrev:

3218

if self.deltaparent(rev) != nullrev:

3214

raise error.Abort(

3219

raise error.Abort(

3215

_(

3220

_(

3216

b'cannot censor due to censored '

3221

b'cannot censor due to censored '

3217

b'revision having delta stored'

3222

b'revision having delta stored'

3218

)

3223

)

3219

)

3224

)

3220

rawtext = self._chunk(rev)

3225

rawtext = self._chunk(rev)

3221

else:

3226

else:

3222

rawtext = self.rawdata(rev)

3227

rawtext = self.rawdata(rev)

3223

3228

3224

newrl.addrawrevision(

3229

newrl.addrawrevision(

3225

rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)

3230

rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)

3226

)

3231

)

3227

3232

3228

tr.addbackup(self._indexfile, location=b'store')

3233

tr.addbackup(self._indexfile, location=b'store')

3229

if not self._inline:

3234

if not self._inline:

3230

tr.addbackup(self._datafile, location=b'store')

3235

tr.addbackup(self._datafile, location=b'store')

3231

3236

3232

self.opener.rename(newrl._indexfile, self._indexfile)

3237

self.opener.rename(newrl._indexfile, self._indexfile)

3233

if not self._inline:

3238

if not self._inline:

3234

self.opener.rename(newrl._datafile, self._datafile)

3239

self.opener.rename(newrl._datafile, self._datafile)

3235

3240

3236

self.clearcaches()

3241

self.clearcaches()

3237

self._loadindex()

3242

self._loadindex()

3238

3243

3239

def verifyintegrity(self, state):

3244

def verifyintegrity(self, state):

3240

"""Verifies the integrity of the revlog.

3245

"""Verifies the integrity of the revlog.

3241

3246

3242

Yields ``revlogproblem`` instances describing problems that are

3247

Yields ``revlogproblem`` instances describing problems that are

3243

found.

3248

found.

3244

"""

3249

"""

3245

dd, di = self.checksize()

3250

dd, di = self.checksize()

3246

if dd:

3251

if dd:

3247

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3252

yield revlogproblem(error=_(b'data length off by %d bytes') % dd)

3248

if di:

3253

if di:

3249

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3254

yield revlogproblem(error=_(b'index contains %d extra bytes') % di)

3250

3255

3251

version = self._format_version

3256

version = self._format_version

3252

3257

3253

# The verifier tells us what version revlog we should be.

3258

# The verifier tells us what version revlog we should be.

3254

if version != state[b'expectedversion']:

3259

if version != state[b'expectedversion']:

3255

yield revlogproblem(

3260

yield revlogproblem(

3256

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3261

warning=_(b"warning: '%s' uses revlog format %d; expected %d")

3257

% (self.display_id, version, state[b'expectedversion'])

3262

% (self.display_id, version, state[b'expectedversion'])

3258

)

3263

)

3259

3264

3260

state[b'skipread'] = set()

3265

state[b'skipread'] = set()

3261

state[b'safe_renamed'] = set()

3266

state[b'safe_renamed'] = set()

3262

3267

3263

for rev in self:

3268

for rev in self:

3264

node = self.node(rev)

3269

node = self.node(rev)

3265

3270

3266

# Verify contents. 4 cases to care about:

3271

# Verify contents. 4 cases to care about:

3267

#

3272

#

3268

# common: the most common case

3273

# common: the most common case

3269

# rename: with a rename

3274

# rename: with a rename

3270

# meta: file content starts with b'\1\n', the metadata

3275

# meta: file content starts with b'\1\n', the metadata

3271

# header defined in filelog.py, but without a rename

3276

# header defined in filelog.py, but without a rename

3272

# ext: content stored externally

3277

# ext: content stored externally

3273

#

3278

#

3274

# More formally, their differences are shown below:

3279

# More formally, their differences are shown below:

3275

#

3280

#

3276

# | common | rename | meta | ext

3281

# | common | rename | meta | ext

3277

# -------------------------------------------------------

3282

# -------------------------------------------------------

3278

# flags() | 0 | 0 | 0 | not 0

3283

# flags() | 0 | 0 | 0 | not 0

3279

# renamed() | False | True | False | ?

3284

# renamed() | False | True | False | ?

3280

# rawtext[0:2]=='\1\n'| False | True | True | ?

3285

# rawtext[0:2]=='\1\n'| False | True | True | ?

3281

#

3286

#

3282

# "rawtext" means the raw text stored in revlog data, which

3287

# "rawtext" means the raw text stored in revlog data, which

3283

# could be retrieved by "rawdata(rev)". "text"

3288

# could be retrieved by "rawdata(rev)". "text"

3284

# mentioned below is "revision(rev)".

3289

# mentioned below is "revision(rev)".

3285

#

3290

#

3286

# There are 3 different lengths stored physically:

3291

# There are 3 different lengths stored physically:

3287

# 1. L1: rawsize, stored in revlog index

3292

# 1. L1: rawsize, stored in revlog index

3288

# 2. L2: len(rawtext), stored in revlog data

3293

# 2. L2: len(rawtext), stored in revlog data

3289

# 3. L3: len(text), stored in revlog data if flags==0, or

3294

# 3. L3: len(text), stored in revlog data if flags==0, or

3290

# possibly somewhere else if flags!=0

3295

# possibly somewhere else if flags!=0

3291

#

3296

#

3292

# L1 should be equal to L2. L3 could be different from them.

3297

# L1 should be equal to L2. L3 could be different from them.

3293

# "text" may or may not affect commit hash depending on flag

3298

# "text" may or may not affect commit hash depending on flag

3294

# processors (see flagutil.addflagprocessor).

3299

# processors (see flagutil.addflagprocessor).

3295

#

3300

#

3296

# | common | rename | meta | ext

3301

# | common | rename | meta | ext

3297

# -------------------------------------------------

3302

# -------------------------------------------------

3298

# rawsize() | L1 | L1 | L1 | L1

3303

# rawsize() | L1 | L1 | L1 | L1

3299

# size() | L1 | L2-LM | L1(*) | L1 (?)

3304

# size() | L1 | L2-LM | L1(*) | L1 (?)

3300

# len(rawtext) | L2 | L2 | L2 | L2

3305

# len(rawtext) | L2 | L2 | L2 | L2

3301

# len(text) | L2 | L2 | L2 | L3

3306

# len(text) | L2 | L2 | L2 | L3

3302

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3307

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

3303

#

3308

#

3304

# LM: length of metadata, depending on rawtext

3309

# LM: length of metadata, depending on rawtext

3305

# (*): not ideal, see comment in filelog.size

3310

# (*): not ideal, see comment in filelog.size

3306

# (?): could be "- len(meta)" if the resolved content has

3311

# (?): could be "- len(meta)" if the resolved content has

3307

# rename metadata

3312

# rename metadata

3308

#

3313

#

3309

# Checks needed to be done:

3314

# Checks needed to be done:

3310

# 1. length check: L1 == L2, in all cases.

3315

# 1. length check: L1 == L2, in all cases.

3311

# 2. hash check: depending on flag processor, we may need to

3316

# 2. hash check: depending on flag processor, we may need to

3312

# use either "text" (external), or "rawtext" (in revlog).

3317

# use either "text" (external), or "rawtext" (in revlog).

3313

3318

3314

try:

3319

try:

3315

skipflags = state.get(b'skipflags', 0)

3320

skipflags = state.get(b'skipflags', 0)

3316

if skipflags:

3321

if skipflags:

3317

skipflags &= self.flags(rev)

3322

skipflags &= self.flags(rev)

3318

3323

3319

_verify_revision(self, skipflags, state, node)

3324

_verify_revision(self, skipflags, state, node)

3320

3325

3321

l1 = self.rawsize(rev)

3326

l1 = self.rawsize(rev)

3322

l2 = len(self.rawdata(node))

3327

l2 = len(self.rawdata(node))

3323

3328

3324

if l1 != l2:

3329

if l1 != l2:

3325

yield revlogproblem(

3330

yield revlogproblem(

3326

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3331

error=_(b'unpacked size is %d, %d expected') % (l2, l1),

3327

node=node,

3332

node=node,

3328

)

3333

)

3329

3334

3330

except error.CensoredNodeError:

3335

except error.CensoredNodeError:

3331

if state[b'erroroncensored']:

3336

if state[b'erroroncensored']:

3332

yield revlogproblem(

3337

yield revlogproblem(

3333

error=_(b'censored file data'), node=node

3338

error=_(b'censored file data'), node=node

3334

)

3339

)

3335

state[b'skipread'].add(node)

3340

state[b'skipread'].add(node)

3336

except Exception as e:

3341

except Exception as e:

3337

yield revlogproblem(

3342

yield revlogproblem(

3338

error=_(b'unpacking %s: %s')

3343

error=_(b'unpacking %s: %s')

3339

% (short(node), stringutil.forcebytestr(e)),

3344

% (short(node), stringutil.forcebytestr(e)),

3340

node=node,

3345

node=node,

3341

)

3346

)

3342

state[b'skipread'].add(node)

3347

state[b'skipread'].add(node)

3343

3348

3344

def storageinfo(

3349

def storageinfo(

3345

self,

3350

self,

3346

exclusivefiles=False,

3351

exclusivefiles=False,

3347

sharedfiles=False,

3352

sharedfiles=False,

3348

revisionscount=False,

3353

revisionscount=False,

3349

trackedsize=False,

3354

trackedsize=False,

3350

storedsize=False,

3355

storedsize=False,

3351

):

3356

):

3352

d = {}

3357

d = {}

3353

3358

3354

if exclusivefiles:

3359

if exclusivefiles:

3355

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3360

d[b'exclusivefiles'] = [(self.opener, self._indexfile)]

3356

if not self._inline:

3361

if not self._inline:

3357

d[b'exclusivefiles'].append((self.opener, self._datafile))

3362

d[b'exclusivefiles'].append((self.opener, self._datafile))

3358

3363

3359

if sharedfiles:

3364

if sharedfiles:

3360

d[b'sharedfiles'] = []

3365

d[b'sharedfiles'] = []

3361

3366

3362

if revisionscount:

3367

if revisionscount:

3363

d[b'revisionscount'] = len(self)

3368

d[b'revisionscount'] = len(self)

3364

3369

3365

if trackedsize:

3370

if trackedsize:

3366

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3371

d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))

3367

3372

3368

if storedsize:

3373

if storedsize:

3369

d[b'storedsize'] = sum(

3374

d[b'storedsize'] = sum(

3370

self.opener.stat(path).st_size for path in self.files()

3375

self.opener.stat(path).st_size for path in self.files()

3371

)

3376

)

3372

3377

3373

return d

3378

return d

3374

3379

3375

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3380

def rewrite_sidedata(self, transaction, helpers, startrev, endrev):

3376

if not self.hassidedata:

3381

if not self.hassidedata:

3377

return

3382

return

3378

# revlog formats with sidedata support does not support inline

3383

# revlog formats with sidedata support does not support inline

3379

assert not self._inline

3384

assert not self._inline

3380

if not helpers[1] and not helpers[2]:

3385

if not helpers[1] and not helpers[2]:

3381

# Nothing to generate or remove

3386

# Nothing to generate or remove

3382

return

3387

return

3383

3388

3384

new_entries = []

3389

new_entries = []

3385

# append the new sidedata

3390

# append the new sidedata

3386

with self._writing(transaction):

3391

with self._writing(transaction):

3387

ifh, dfh = self._writinghandles

3392

ifh, dfh = self._writinghandles

3388

if self._docket is not None:

3393

if self._docket is not None:

3389

dfh.seek(self._docket.data_end, os.SEEK_SET)

3394

dfh.seek(self._docket.data_end, os.SEEK_SET)

3390

else:

3395

else:

3391

dfh.seek(0, os.SEEK_END)

3396

dfh.seek(0, os.SEEK_END)

3392

3397

3393

current_offset = dfh.tell()

3398

current_offset = dfh.tell()

3394

for rev in range(startrev, endrev + 1):

3399

for rev in range(startrev, endrev + 1):

3395

entry = self.index[rev]

3400

entry = self.index[rev]

3396

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3401

new_sidedata, flags = sidedatautil.run_sidedata_helpers(

3397

store=self,

3402

store=self,

3398

sidedata_helpers=helpers,

3403

sidedata_helpers=helpers,

3399

sidedata={},

3404

sidedata={},

3400

rev=rev,

3405

rev=rev,

3401

)

3406

)

3402

3407

3403

serialized_sidedata = sidedatautil.serialize_sidedata(

3408

serialized_sidedata = sidedatautil.serialize_sidedata(

3404

new_sidedata

3409

new_sidedata

3405

)

3410

)

3406

3411

3407

sidedata_compression_mode = COMP_MODE_INLINE

3412

sidedata_compression_mode = COMP_MODE_INLINE

3408

if serialized_sidedata and self.hassidedata:

3413

if serialized_sidedata and self.hassidedata:

3409

sidedata_compression_mode = COMP_MODE_PLAIN

3414

sidedata_compression_mode = COMP_MODE_PLAIN

3410

h, comp_sidedata = self.compress(serialized_sidedata)

3415

h, comp_sidedata = self.compress(serialized_sidedata)

3411

if (

3416

if (

3412

h != b'u'

3417

h != b'u'

3413

and comp_sidedata[0] != b'\0'

3418

and comp_sidedata[0] != b'\0'

3414

and len(comp_sidedata) < len(serialized_sidedata)

3419

and len(comp_sidedata) < len(serialized_sidedata)

3415

):

3420

):

3416

assert not h

3421

assert not h

3417

if (

3422

if (

3418

comp_sidedata[0]

3423

comp_sidedata[0]

3419

== self._docket.default_compression_header

3424

== self._docket.default_compression_header

3420

):

3425

):

3421

sidedata_compression_mode = COMP_MODE_DEFAULT

3426

sidedata_compression_mode = COMP_MODE_DEFAULT

3422

serialized_sidedata = comp_sidedata

3427

serialized_sidedata = comp_sidedata

3423

else:

3428

else:

3424

sidedata_compression_mode = COMP_MODE_INLINE

3429

sidedata_compression_mode = COMP_MODE_INLINE

3425

serialized_sidedata = comp_sidedata

3430

serialized_sidedata = comp_sidedata

3426

if entry[8] != 0 or entry[9] != 0:

3431

if entry[8] != 0 or entry[9] != 0:

3427

# rewriting entries that already have sidedata is not

3432

# rewriting entries that already have sidedata is not

3428

# supported yet, because it introduces garbage data in the

3433

# supported yet, because it introduces garbage data in the

3429

# revlog.

3434

# revlog.

3430

msg = b"rewriting existing sidedata is not supported yet"

3435

msg = b"rewriting existing sidedata is not supported yet"

3431

raise error.Abort(msg)

3436

raise error.Abort(msg)

3432

3437

3433

# Apply (potential) flags to add and to remove after running

3438

# Apply (potential) flags to add and to remove after running

3434

# the sidedata helpers

3439

# the sidedata helpers

3435

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3440

new_offset_flags = entry[0] | flags[0] & ~flags[1]

3436

entry_update = (

3441

entry_update = (

3437

current_offset,

3442

current_offset,

3438

len(serialized_sidedata),

3443

len(serialized_sidedata),

3439

new_offset_flags,

3444

new_offset_flags,

3440

sidedata_compression_mode,

3445

sidedata_compression_mode,

3441

)

3446

)

3442

3447

3443

# the sidedata computation might have move the file cursors around

3448

# the sidedata computation might have move the file cursors around

3444

dfh.seek(current_offset, os.SEEK_SET)

3449

dfh.seek(current_offset, os.SEEK_SET)

3445

dfh.write(serialized_sidedata)

3450

dfh.write(serialized_sidedata)

3446

new_entries.append(entry_update)

3451

new_entries.append(entry_update)

3447

current_offset += len(serialized_sidedata)

3452

current_offset += len(serialized_sidedata)

3448

if self._docket is not None:

3453

if self._docket is not None:

3449

self._docket.data_end = dfh.tell()

3454

self._docket.data_end = dfh.tell()

3450

3455

3451

# rewrite the new index entries

3456

# rewrite the new index entries

3452

ifh.seek(startrev * self.index.entry_size)

3457

ifh.seek(startrev * self.index.entry_size)

3453

for i, e in enumerate(new_entries):

3458

for i, e in enumerate(new_entries):

3454

rev = startrev + i

3459

rev = startrev + i

3455

self.index.replace_sidedata_info(rev, *e)

3460

self.index.replace_sidedata_info(rev, *e)

3456

packed = self.index.entry_binary(rev)

3461

packed = self.index.entry_binary(rev)

3457

if rev == 0 and self._docket is None:

3462

if rev == 0 and self._docket is None:

3458

header = self._format_flags | self._format_version

3463

header = self._format_flags | self._format_version

3459

header = self.index.pack_header(header)

3464

header = self.index.pack_header(header)

3460

packed = header + packed

3465

packed = header + packed

3461

ifh.write(packed)

3466

ifh.write(packed)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import contextlib
             import errno
             import io
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .pycompat import getattr
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 revlogv0,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def offset_type(offset, type):
                 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError(b'unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
                 parsers, 'BaseIndexObject'
             )
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 assert not inline
                 from .pure.parsers import parse_index_cl_v2
                 index, cache = parse_index_cl_v2(data)
                 return index, cache
             if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 Internal details
                 ----------------
                 A large part of the revlog logic deals with revisions' "index entries", tuple
                 objects that contains the same "items" whatever the revlog version.
                 Different versions will have different ways of storing these items (sometimes
                 not having them at all), but the tuple will always be the same. New fields
                 are usually added at the end to avoid breaking existing code that relies
                 on the existing order. The field are defined as follows:
                 [0] offset:
                         The byte index of the start of revision data chunk.
                         That value is shifted up by 16 bits. use "offset = field >> 16" to
                         retrieve it.
                     flags:
                         A flag field that carries special information or changes the behavior
                         of the revision. (see `REVIDX_*` constants for details)
                         The flag field only occupies the first 16 bits of this field,
                         use "flags = field & 0xFFFF" to retrieve the value.
                 [1] compressed length:
                         The size, in bytes, of the chunk on disk
                 [2] uncompressed length:
                         The size, in bytes, of the full revision once reconstructed.
                 [3] base rev:
                         Either the base of the revision delta chain (without general
                         delta), or the base of the delta (stored in the data chunk)
                         with general delta.
                 [4] link rev:
                         Changelog revision number of the changeset introducing this
                         revision.
                 [5] parent 1 rev:
                         Revision number of the first parent
                 [6] parent 2 rev:
                         Revision number of the second parent
                 [7] node id:
                         The node id of the current revision
                 [8] sidedata offset:
                         The byte index of the start of the revision's side-data chunk.
                 [9] sidedata chunk length:
                         The size, in bytes, of the revision's side-data chunk.
                 [10] data compression mode:
                         two bits that detail the way the data chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details). For revlog version 0 and
 this will always be COMP_MODE_INLINE.
                 [11] side-data compression mode:
                         two bits that detail the way the sidedata chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details)
                 """
                 _flagserrorclass = error.RevlogError
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 2-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in pycompat.iteritems(
                         opts.get(b'flagprocessors', {})
                     ):
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b''
                 def _loadindex(self):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     else:
                         entry_point = b'%s.i' % self.radix
                     entry_data = b''
                     self._initempty = True
                     entry_data = self._get_data(entry_point, mmapindexthreshold)
                     if len(entry_data) > 0:
                         header = INDEX_HEADER.unpack(entry_data[:4])[0]
                         self._initempty = False
                     else:
                         header = new_header
                     self._format_flags = header & ~0xFFFF
                     self._format_version = header & 0xFFFF
                     supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                     if supported_flags is None:
                         msg = _(b'unknown version (%d) in revlog %s')
                         msg %= (self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     elif self._format_flags & ~supported_flags:
                         msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                         display_flag = self._format_flags >> 16
                         msg %= (display_flag, self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     features = FEATURES_BY_VERSION[self._format_version]
                     self._inline = features[b'inline'](self._format_flags)
                     self._generaldelta = features[b'generaldelta'](self._format_flags)
                     self.hassidedata = features[b'sidedata']
                     if not features[b'docket']:
                         self._indexfile = entry_point
                         index_data = entry_data
                     else:
                         self._docket_file = entry_point
                         if self._initempty:
                             self._docket = docketutil.default_docket(self, header)
                         else:
                             self._docket = docketutil.parse_docket(
                                 self, entry_data, use_pending=self._trypending
                             )
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, _chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and util.safehasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index, self._chunkcache = d
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     # Maybe we should build a user facing representation of
                     # revlog.target instead of using `self.radix`
                     return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     # Use explicit file handle, if given.
                     if existingfp is not None:
                         yield existingfp
                     # Use a file handle being actively used for writes, if available.
                     # There is some danger to doing this because reads will seek the
                     # file. However, _writeentry() performs a SEEK_END before all writes,
                     # so we should be safe.
                     elif self._writinghandles:
                         if self._inline:
                             yield self._writinghandles[0]
                         else:
                             yield self._writinghandles[1]
                     # Otherwise open a new file handle.
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @property
                 def nodemap(self):
                     msg = (
                         b"revlog.nodemap is deprecated, "
                         b"use revlog.index.[has_node|rev|get_rev]"
                     )
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 @property
                 def _nodecache(self):
                     msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, b'')
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and util.safehasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in pycompat.iteritems(heads) if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
+                    ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
-                                raise error.RevlogError
+                                ambiguous = True
+                            else:
                                 return partial
-                        if maybewdir:
+                        elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
+                        else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
-                            raise error.AmbiguousPrefixLookupError(
+                            ambiguous = True
-                                id, self.display_id, _(b'ambiguous identifier')
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
+                    if ambiguous:
+                        raise error.AmbiguousPrefixLookupError(
+                            id, self.display_id, _(b'ambiguous identifier')
+                        )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[: l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     Raises if the requested number of bytes could not be read.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (
                         (offset + length + cachesize) & ~(cachesize - 1)
                     ) - realoffset
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         startoffset = offset - realoffset
                         if len(d) - startoffset < length:
                             raise error.RevlogError(
                                 _(
                                     b'partial read of revlog %s; expected %d bytes from '
                                     b'offset %d, got %d'
                                 )
                                 % (
                                     self._indexfile if self._inline else self._datafile,
                                     length,
                                     offset,
                                     len(d) - startoffset,
                                 )
                             )
                         return util.buffer(d, startoffset, length)
                     if len(d) < length:
                         raise error.RevlogError(
                             _(
                                 b'partial read of revlog %s; expected %d bytes from offset '
                                 b'%d, got %d'
                             )
                             % (
                                 self._indexfile if self._inline else self._datafile,
                                 length,
                                 offset,
                                 len(d),
                             )
                         )
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d  # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = 'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, b'')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, b'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def _processflags(self, text, flags, operation, raw=False):
                     """deprecated entry point to access flag processors"""
                     msg = b'_processflag(...) use the specialized variant'
                     util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     if raw:
                         return text, flagutil.processflagsraw(self, text, flags)
                     elif operation == b'read':
                         return flagutil.processflagsread(self, text, flags)
                     else:  # write operation
                         return flagutil.processflagswrite(self, text, flags)
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if raw:
                         msg = (
                             b'revlog.revision(..., raw=True) is deprecated, '
                             b'use revlog.rawdata(...)'
                         )
                         util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     return self._revisiondata(nodeorrev, _df, raw=raw)[0]
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     return self._revisiondata(nodeorrev, _df)[1]
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b"", {}
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if self.hassidedata:
                         if rev is None:
                             rev = self.rev(node)
                         sidedata = self._sidedata(rev)
                     else:
                         sidedata = {}
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext, sidedata
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext, sidedata
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text, sidedata
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     comp_segment = self._getsegment(sidedata_offset, sidedata_size)
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)[0]
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 def _enforceinlinesize(self, tr):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     trindex = 0
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                                 if troffset <= self.start(r) + r * self.index.entry_size:
                                     trindex = r
                             new_dfh.flush()
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # There is a small transactional race here. If the rename of
                             # the index fails, we should remove the datafile. It is more
                             # important to ensure that the data file is not truncated
                             # when the index is replaced as otherwise data is lost.
                             tr.replace(self._datafile, self.start(trindex))
                             # the temp file replace the real index when we exit the context
                             # manager
                         tr.replace(self._indexfile, trindex * self.index.entry_size)
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._chunkclear()
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh)
                             new_dfh = None
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         r = len(self)
                         dsize = 0
                         if r:
                             dsize = self.end(r - 1)
                         dfh = None
                         if not self._inline:
                             try:
                                 dfh = self._datafp(b"r+")
                                 if self._docket is None:
                                     dfh.seek(0, os.SEEK_END)
                                 else:
                                     dfh.seek(self._docket.data_end, os.SEEK_SET)
                             except IOError as inst:
                                 if inst.errno != errno.ENOENT:
                                     raise
                                 dfh = self._datafp(b"w+")
                             transaction.add(self._datafile, dsize)
                         try:
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             try:
                                 self._writinghandles = (ifh, dfh)
                                 try:
                                     yield
                                     if self._docket is not None:
                                         self._write_docket(transaction)
                                 finally:
                                     self._writinghandles = None
                             finally:
                                 ifh.close()
                         finally:
                             if dfh is not None:
                                 dfh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh = self._writinghandles
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         h, d = deltainfo.data
                         if not h and not d:
                             # not data to store at all... declare them uncompressed
                             compression_mode = COMP_MODE_PLAIN
                         elif not h:
                             t = d[0:1]
                             if t == b'\0':
                                 compression_mode = COMP_MODE_PLAIN
                             elif t == self._docket.default_compression_header:
                                 compression_mode = COMP_MODE_DEFAULT
                         elif h == b'u':
                             # we have a more efficient way to declare uncompressed
                             h = b''
                             compression_mode = COMP_MODE_PLAIN
                             deltainfo = deltautil.drop_u_compression(deltainfo)
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = offset + deltainfo.deltalen
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     e = (
                         offset_type(offset, flags),
                         deltainfo.deltalen,
                         textlen,
                         deltainfo.base,
                         link,
                         p1r,
                         p2r,
                         node,
                         sidedata_offset,
                         len(serialized_sidedata),
                         compression_mode,
                         sidedata_compression_mode,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             dfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         if sidedata:
                             ifh.write(sidedata)
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         self._docket.index_end = self._writinghandles[0].tell()
                         self._docket.data_end = self._writinghandles[1].tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             deltacomputer = deltautil.deltacomputer(self)
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._chunkclear()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if not self._inline:
                         res.append(self._datafile)
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     deltacomputer = deltautil.deltacomputer(destrevlog)
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text, sidedata = self._revisiondata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext, sidedata = self._revisiondata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
                     censorrev = self.rev(censornode)
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     if len(tombstone) > self.rawsize(censorrev):
                         raise error.Abort(
                             _(b'censor tombstone must be no longer than censored data')
                         )
                     # Rewriting the revlog in place is hard. Our strategy for censoring is
                     # to create a new revlog, copy all revisions to it, then replace the
                     # revlogs on transaction close.
                     #
                     # This is a bit dangerous. We could easily have a mismatch of state.
                     newrl = revlog(
                         self.opener,
                         target=self.target,
                         radix=self.radix,
                         postfix=b'tmpcensored',
                         censorable=True,
                     )
                     newrl._format_version = self._format_version
                     newrl._format_flags = self._format_flags
                     newrl._generaldelta = self._generaldelta
                     newrl._parse_index = self._parse_index
                     for rev in self.revs():
                         node = self.node(rev)
                         p1, p2 = self.parents(node)
                         if rev == censorrev:
                             newrl.addrawrevision(
                                 tombstone,
                                 tr,
                                 self.linkrev(censorrev),
                                 p1,
                                 p2,
                                 censornode,
                                 REVIDX_ISCENSORED,
                             )
                             if newrl.deltaparent(rev) != nullrev:
                                 raise error.Abort(
                                     _(
                                         b'censored revision stored as delta; '
                                         b'cannot censor'
                                     ),
                                     hint=_(
                                         b'censoring of revlogs is not '
                                         b'fully implemented; please report '
                                         b'this bug'
                                     ),
                                 )
                             continue
                         if self.iscensored(rev):
                             if self.deltaparent(rev) != nullrev:
                                 raise error.Abort(
                                     _(
                                         b'cannot censor due to censored '
                                         b'revision having delta stored'
                                     )
                                 )
                             rawtext = self._chunk(rev)
                         else:
                             rawtext = self.rawdata(rev)
                         newrl.addrawrevision(
                             rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
                         )
                     tr.addbackup(self._indexfile, location=b'store')
                     if not self._inline:
                         tr.addbackup(self._datafile, location=b'store')
                     self.opener.rename(newrl._indexfile, self._indexfile)
                     if not self._inline:
                         self.opener.rename(newrl._datafile, self._datafile)
                     self.clearcaches()
                     self._loadindex()
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh = self._writinghandles
                         if self._docket is not None:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                         else:
                             dfh.seek(0, os.SEEK_END)
                         current_offset = dfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             dfh.seek(current_offset, os.SEEK_SET)
                             dfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             if self._docket is not None:
                                 self._docket.data_end = dfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)