upstream/mercurial-mirror Commit - r40188:adbf8ca2

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullhex,

27

nullhex,

28

nullid,

28

nullid,

29

nullrev,

29

nullrev,

30

short,

30

short,

31

wdirfilenodeids,

31

wdirfilenodeids,

32

wdirhex,

32

wdirhex,

33

wdirid,

33

wdirid,

34

wdirrev,

34

wdirrev,

35

)

35

)

36

from .i18n import _

36

from .i18n import _

37

from .revlogutils.constants import (

37

from .revlogutils.constants import (

38

FLAG_GENERALDELTA,

38

FLAG_GENERALDELTA,

39

FLAG_INLINE_DATA,

39

FLAG_INLINE_DATA,

40

REVIDX_DEFAULT_FLAGS,

40

REVIDX_DEFAULT_FLAGS,

41

REVIDX_ELLIPSIS,

41

REVIDX_ELLIPSIS,

42

REVIDX_EXTSTORED,

42

REVIDX_EXTSTORED,

43

REVIDX_FLAGS_ORDER,

43

REVIDX_FLAGS_ORDER,

44

REVIDX_ISCENSORED,

44

REVIDX_ISCENSORED,

45

REVIDX_KNOWN_FLAGS,

45

REVIDX_KNOWN_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

47

REVLOGV0,

47

REVLOGV0,

48

REVLOGV1,

48

REVLOGV1,

49

REVLOGV1_FLAGS,

49

REVLOGV1_FLAGS,

50

REVLOGV2,

50

REVLOGV2,

51

REVLOGV2_FLAGS,

51

REVLOGV2_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FORMAT,

53

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_VERSION,

54

REVLOG_DEFAULT_VERSION,

55

)

55

)

56

from .thirdparty import (

56

from .thirdparty import (

57

attr,

57

attr,

58

)

58

)

59

from . import (

59

from . import (

60

ancestor,

60

ancestor,

61

dagop,

61

dagop,

62

error,

62

error,

63

mdiff,

63

mdiff,

64

policy,

64

policy,

65

pycompat,

65

pycompat,

66

repository,

66

repository,

67

templatefilters,

67

templatefilters,

68

util,

68

util,

69

)

69

)

70

from .revlogutils import (

70

from .revlogutils import (

71

deltas as deltautil,

71

deltas as deltautil,

72

)

72

)

73

from .utils import (

73

from .utils import (

74

interfaceutil,

74

interfaceutil,

75

storageutil,

75

storageutil,

76

stringutil,

76

stringutil,

77

)

77

)

78

79

# blanked usage of all the name to prevent pyflakes constraints

79

# blanked usage of all the name to prevent pyflakes constraints

80

# We need these name available in the module for extensions.

80

# We need these name available in the module for extensions.

81

REVLOGV0

81

REVLOGV0

82

REVLOGV1

82

REVLOGV1

83

REVLOGV2

83

REVLOGV2

84

FLAG_INLINE_DATA

84

FLAG_INLINE_DATA

85

FLAG_GENERALDELTA

85

FLAG_GENERALDELTA

86

REVLOG_DEFAULT_FLAGS

86

REVLOG_DEFAULT_FLAGS

87

REVLOG_DEFAULT_FORMAT

87

REVLOG_DEFAULT_FORMAT

88

REVLOG_DEFAULT_VERSION

88

REVLOG_DEFAULT_VERSION

89

REVLOGV1_FLAGS

89

REVLOGV1_FLAGS

90

REVLOGV2_FLAGS

90

REVLOGV2_FLAGS

91

REVIDX_ISCENSORED

91

REVIDX_ISCENSORED

92

REVIDX_ELLIPSIS

92

REVIDX_ELLIPSIS

93

REVIDX_EXTSTORED

93

REVIDX_EXTSTORED

94

REVIDX_DEFAULT_FLAGS

94

REVIDX_DEFAULT_FLAGS

95

REVIDX_FLAGS_ORDER

95

REVIDX_FLAGS_ORDER

96

REVIDX_KNOWN_FLAGS

96

REVIDX_KNOWN_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

98

99

parsers = policy.importmod(r'parsers')

99

parsers = policy.importmod(r'parsers')

100

101

# Aliased for performance.

101

# Aliased for performance.

102

_zlibdecompress = zlib.decompress

102

_zlibdecompress = zlib.decompress

103

104

# max size of revlog with inline data

104

# max size of revlog with inline data

105

_maxinline = 131072

105

_maxinline = 131072

106

_chunksize = 1048576

106

_chunksize = 1048576

107

108

# Store flag processors (cf. 'addflagprocessor()' to register)

108

# Store flag processors (cf. 'addflagprocessor()' to register)

109

_flagprocessors = {

109

_flagprocessors = {

110

REVIDX_ISCENSORED: None,

110

REVIDX_ISCENSORED: None,

111

}

111

}

112

113

# Flag processors for REVIDX_ELLIPSIS.

113

# Flag processors for REVIDX_ELLIPSIS.

114

def ellipsisreadprocessor(rl, text):

114

def ellipsisreadprocessor(rl, text):

115

return text, False

115

return text, False

116

117

def ellipsiswriteprocessor(rl, text):

117

def ellipsiswriteprocessor(rl, text):

118

return text, False

118

return text, False

119

120

def ellipsisrawprocessor(rl, text):

120

def ellipsisrawprocessor(rl, text):

121

return False

121

return False

122

123

ellipsisprocessor = (

123

ellipsisprocessor = (

124

ellipsisreadprocessor,

124

ellipsisreadprocessor,

125

ellipsiswriteprocessor,

125

ellipsiswriteprocessor,

126

ellipsisrawprocessor,

126

ellipsisrawprocessor,

127

)

127

)

128

129

def addflagprocessor(flag, processor):

129

def addflagprocessor(flag, processor):

130

"""Register a flag processor on a revision data flag.

130

"""Register a flag processor on a revision data flag.

131

132

Invariant:

132

Invariant:

133

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

133

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

134

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

134

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

135

- Only one flag processor can be registered on a specific flag.

135

- Only one flag processor can be registered on a specific flag.

136

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

136

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

137

following signatures:

137

following signatures:

138

- (read) f(self, rawtext) -> text, bool

138

- (read) f(self, rawtext) -> text, bool

139

- (write) f(self, text) -> rawtext, bool

139

- (write) f(self, text) -> rawtext, bool

140

- (raw) f(self, rawtext) -> bool

140

- (raw) f(self, rawtext) -> bool

141

"text" is presented to the user. "rawtext" is stored in revlog data, not

141

"text" is presented to the user. "rawtext" is stored in revlog data, not

142

directly visible to the user.

142

directly visible to the user.

143

The boolean returned by these transforms is used to determine whether

143

The boolean returned by these transforms is used to determine whether

144

the returned text can be used for hash integrity checking. For example,

144

the returned text can be used for hash integrity checking. For example,

145

if "write" returns False, then "text" is used to generate hash. If

145

if "write" returns False, then "text" is used to generate hash. If

146

"write" returns True, that basically means "rawtext" returned by "write"

146

"write" returns True, that basically means "rawtext" returned by "write"

147

should be used to generate hash. Usually, "write" and "read" return

147

should be used to generate hash. Usually, "write" and "read" return

148

different booleans. And "raw" returns a same boolean as "write".

148

different booleans. And "raw" returns a same boolean as "write".

149

150

Note: The 'raw' transform is used for changegroup generation and in some

150

Note: The 'raw' transform is used for changegroup generation and in some

151

debug commands. In this case the transform only indicates whether the

151

debug commands. In this case the transform only indicates whether the

152

contents can be used for hash integrity checks.

152

contents can be used for hash integrity checks.

153

"""

153

"""

154

if not flag & REVIDX_KNOWN_FLAGS:

154

if not flag & REVIDX_KNOWN_FLAGS:

155

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

155

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

156

raise error.ProgrammingError(msg)

156

raise error.ProgrammingError(msg)

157

if flag not in REVIDX_FLAGS_ORDER:

157

if flag not in REVIDX_FLAGS_ORDER:

158

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

158

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

159

raise error.ProgrammingError(msg)

159

raise error.ProgrammingError(msg)

160

if flag in _flagprocessors:

160

if flag in _flagprocessors:

161

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

161

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

162

raise error.Abort(msg)

162

raise error.Abort(msg)

163

_flagprocessors[flag] = processor

163

_flagprocessors[flag] = processor

164

165

def getoffset(q):

165

def getoffset(q):

166

return int(q >> 16)

166

return int(q >> 16)

167

168

def gettype(q):

168

def gettype(q):

169

return int(q & 0xFFFF)

169

return int(q & 0xFFFF)

170

171

def offset_type(offset, type):

171

def offset_type(offset, type):

172

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

172

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

173

raise ValueError('unknown revlog index flags')

173

raise ValueError('unknown revlog index flags')

174

return int(int(offset) << 16 | type)

174

return int(int(offset) << 16 | type)

175

176

@attr.s(slots=True, frozen=True)

176

@attr.s(slots=True, frozen=True)

177

class _revisioninfo(object):

177

class _revisioninfo(object):

178

"""Information about a revision that allows building its fulltext

178

"""Information about a revision that allows building its fulltext

179

node: expected hash of the revision

179

node: expected hash of the revision

180

p1, p2: parent revs of the revision

180

p1, p2: parent revs of the revision

181

btext: built text cache consisting of a one-element list

181

btext: built text cache consisting of a one-element list

182

cachedelta: (baserev, uncompressed_delta) or None

182

cachedelta: (baserev, uncompressed_delta) or None

183

flags: flags associated to the revision storage

183

flags: flags associated to the revision storage

184

185

One of btext[0] or cachedelta must be set.

185

One of btext[0] or cachedelta must be set.

186

"""

186

"""

187

node = attr.ib()

187

node = attr.ib()

188

p1 = attr.ib()

188

p1 = attr.ib()

189

p2 = attr.ib()

189

p2 = attr.ib()

190

btext = attr.ib()

190

btext = attr.ib()

191

textlen = attr.ib()

191

textlen = attr.ib()

192

cachedelta = attr.ib()

192

cachedelta = attr.ib()

193

flags = attr.ib()

193

flags = attr.ib()

194

195

@interfaceutil.implementer(repository.irevisiondelta)

195

@interfaceutil.implementer(repository.irevisiondelta)

196

@attr.s(slots=True)

196

@attr.s(slots=True)

197

class revlogrevisiondelta(object):

197

class revlogrevisiondelta(object):

198

node = attr.ib()

198

node = attr.ib()

199

p1node = attr.ib()

199

p1node = attr.ib()

200

p2node = attr.ib()

200

p2node = attr.ib()

201

basenode = attr.ib()

201

basenode = attr.ib()

202

flags = attr.ib()

202

flags = attr.ib()

203

baserevisionsize = attr.ib()

203

baserevisionsize = attr.ib()

204

revision = attr.ib()

204

revision = attr.ib()

205

delta = attr.ib()

205

delta = attr.ib()

206

linknode = attr.ib(default=None)

206

linknode = attr.ib(default=None)

207

208

@interfaceutil.implementer(repository.iverifyproblem)

208

@interfaceutil.implementer(repository.iverifyproblem)

209

@attr.s(frozen=True)

209

@attr.s(frozen=True)

210

class revlogproblem(object):

210

class revlogproblem(object):

211

warning = attr.ib(default=None)

211

warning = attr.ib(default=None)

212

error = attr.ib(default=None)

212

error = attr.ib(default=None)

213

node = attr.ib(default=None)

213

node = attr.ib(default=None)

214

215

# index v0:

215

# index v0:

216

# 4 bytes: offset

216

# 4 bytes: offset

217

# 4 bytes: compressed length

217

# 4 bytes: compressed length

218

# 4 bytes: base rev

218

# 4 bytes: base rev

219

# 4 bytes: link rev

219

# 4 bytes: link rev

220

# 20 bytes: parent 1 nodeid

220

# 20 bytes: parent 1 nodeid

221

# 20 bytes: parent 2 nodeid

221

# 20 bytes: parent 2 nodeid

222

# 20 bytes: nodeid

222

# 20 bytes: nodeid

223

indexformatv0 = struct.Struct(">4l20s20s20s")

223

indexformatv0 = struct.Struct(">4l20s20s20s")

224

indexformatv0_pack = indexformatv0.pack

224

indexformatv0_pack = indexformatv0.pack

225

indexformatv0_unpack = indexformatv0.unpack

225

indexformatv0_unpack = indexformatv0.unpack

226

227

class revlogoldindex(list):

227

class revlogoldindex(list):

228

def __getitem__(self, i):

228

def __getitem__(self, i):

229

if i == -1:

229

if i == -1:

230

return (0, 0, 0, -1, -1, -1, -1, nullid)

230

return (0, 0, 0, -1, -1, -1, -1, nullid)

231

return list.__getitem__(self, i)

231

return list.__getitem__(self, i)

232

233

class revlogoldio(object):

233

class revlogoldio(object):

234

def __init__(self):

234

def __init__(self):

235

self.size = indexformatv0.size

235

self.size = indexformatv0.size

236

237

def parseindex(self, data, inline):

237

def parseindex(self, data, inline):

238

s = self.size

238

s = self.size

239

index = []

239

index = []

240

nodemap = {nullid: nullrev}

240

nodemap = {nullid: nullrev}

241

n = off = 0

241

n = off = 0

242

l = len(data)

242

l = len(data)

243

while off + s <= l:

243

while off + s <= l:

244

cur = data[off:off + s]

244

cur = data[off:off + s]

245

off += s

245

off += s

246

e = indexformatv0_unpack(cur)

246

e = indexformatv0_unpack(cur)

247

# transform to revlogv1 format

247

# transform to revlogv1 format

248

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

248

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

249

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

249

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

250

index.append(e2)

250

index.append(e2)

251

nodemap[e[6]] = n

251

nodemap[e[6]] = n

252

n += 1

252

n += 1

253

254

return revlogoldindex(index), nodemap, None

254

return revlogoldindex(index), nodemap, None

255

256

def packentry(self, entry, node, version, rev):

256

def packentry(self, entry, node, version, rev):

257

if gettype(entry[0]):

257

if gettype(entry[0]):

258

raise error.RevlogError(_('index entry flags need revlog '

258

raise error.RevlogError(_('index entry flags need revlog '

259

'version 1'))

259

'version 1'))

260

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

260

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

261

node(entry[5]), node(entry[6]), entry[7])

261

node(entry[5]), node(entry[6]), entry[7])

262

return indexformatv0_pack(*e2)

262

return indexformatv0_pack(*e2)

263

264

# index ng:

264

# index ng:

265

# 6 bytes: offset

265

# 6 bytes: offset

266

# 2 bytes: flags

266

# 2 bytes: flags

267

# 4 bytes: compressed length

267

# 4 bytes: compressed length

268

# 4 bytes: uncompressed length

268

# 4 bytes: uncompressed length

269

# 4 bytes: base rev

269

# 4 bytes: base rev

270

# 4 bytes: link rev

270

# 4 bytes: link rev

271

# 4 bytes: parent 1 rev

271

# 4 bytes: parent 1 rev

272

# 4 bytes: parent 2 rev

272

# 4 bytes: parent 2 rev

273

# 32 bytes: nodeid

273

# 32 bytes: nodeid

274

indexformatng = struct.Struct(">Qiiiiii20s12x")

274

indexformatng = struct.Struct(">Qiiiiii20s12x")

275

indexformatng_pack = indexformatng.pack

275

indexformatng_pack = indexformatng.pack

276

versionformat = struct.Struct(">I")

276

versionformat = struct.Struct(">I")

277

versionformat_pack = versionformat.pack

277

versionformat_pack = versionformat.pack

278

versionformat_unpack = versionformat.unpack

278

versionformat_unpack = versionformat.unpack

279

280

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

280

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

281

# signed integer)

281

# signed integer)

282

_maxentrysize = 0x7fffffff

282

_maxentrysize = 0x7fffffff

283

284

class revlogio(object):

284

class revlogio(object):

285

def __init__(self):

285

def __init__(self):

286

self.size = indexformatng.size

286

self.size = indexformatng.size

287

288

def parseindex(self, data, inline):

288

def parseindex(self, data, inline):

289

# call the C implementation to parse the index data

289

# call the C implementation to parse the index data

290

index, cache = parsers.parse_index2(data, inline)

290

index, cache = parsers.parse_index2(data, inline)

291

return index, getattr(index, 'nodemap', None), cache

291

return index, getattr(index, 'nodemap', None), cache

292

293

def packentry(self, entry, node, version, rev):

293

def packentry(self, entry, node, version, rev):

294

p = indexformatng_pack(*entry)

294

p = indexformatng_pack(*entry)

295

if rev == 0:

295

if rev == 0:

296

p = versionformat_pack(version) + p[4:]

296

p = versionformat_pack(version) + p[4:]

297

return p

297

return p

298

299

class revlog(object):

299

class revlog(object):

300

"""

300

"""

301

the underlying revision storage object

301

the underlying revision storage object

302

303

A revlog consists of two parts, an index and the revision data.

303

A revlog consists of two parts, an index and the revision data.

304

305

The index is a file with a fixed record size containing

305

The index is a file with a fixed record size containing

306

information on each revision, including its nodeid (hash), the

306

information on each revision, including its nodeid (hash), the

307

nodeids of its parents, the position and offset of its data within

307

nodeids of its parents, the position and offset of its data within

308

the data file, and the revision it's based on. Finally, each entry

308

the data file, and the revision it's based on. Finally, each entry

309

contains a linkrev entry that can serve as a pointer to external

309

contains a linkrev entry that can serve as a pointer to external

310

data.

310

data.

311

312

The revision data itself is a linear collection of data chunks.

312

The revision data itself is a linear collection of data chunks.

313

Each chunk represents a revision and is usually represented as a

313

Each chunk represents a revision and is usually represented as a

314

delta against the previous chunk. To bound lookup time, runs of

314

delta against the previous chunk. To bound lookup time, runs of

315

deltas are limited to about 2 times the length of the original

315

deltas are limited to about 2 times the length of the original

316

version data. This makes retrieval of a version proportional to

316

version data. This makes retrieval of a version proportional to

317

its size, or O(1) relative to the number of revisions.

317

its size, or O(1) relative to the number of revisions.

318

319

Both pieces of the revlog are written to in an append-only

319

Both pieces of the revlog are written to in an append-only

320

fashion, which means we never need to rewrite a file to insert or

320

fashion, which means we never need to rewrite a file to insert or

321

remove data, and can use some simple techniques to avoid the need

321

remove data, and can use some simple techniques to avoid the need

322

for locking while reading.

322

for locking while reading.

323

324

If checkambig, indexfile is opened with checkambig=True at

324

If checkambig, indexfile is opened with checkambig=True at

325

writing, to avoid file stat ambiguity.

325

writing, to avoid file stat ambiguity.

326

327

If mmaplargeindex is True, and an mmapindexthreshold is set, the

327

If mmaplargeindex is True, and an mmapindexthreshold is set, the

328

index will be mmapped rather than read if it is larger than the

328

index will be mmapped rather than read if it is larger than the

329

configured threshold.

329

configured threshold.

330

331

If censorable is True, the revlog can have censored revisions.

331

If censorable is True, the revlog can have censored revisions.

332

"""

332

"""

333

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

333

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

334

mmaplargeindex=False, censorable=False):

334

mmaplargeindex=False, censorable=False):

335

"""

335

"""

336

create a revlog object

336

create a revlog object

337

338

opener is a function that abstracts the file opening operation

338

opener is a function that abstracts the file opening operation

339

and can be used to implement COW semantics or the like.

339

and can be used to implement COW semantics or the like.

340

"""

340

"""

341

self.indexfile = indexfile

341

self.indexfile = indexfile

342

self.datafile = datafile or (indexfile[:-2] + ".d")

342

self.datafile = datafile or (indexfile[:-2] + ".d")

343

self.opener = opener

343

self.opener = opener

344

# When True, indexfile is opened with checkambig=True at writing, to

344

# When True, indexfile is opened with checkambig=True at writing, to

345

# avoid file stat ambiguity.

345

# avoid file stat ambiguity.

346

self._checkambig = checkambig

346

self._checkambig = checkambig

347

self._censorable = censorable

347

self._censorable = censorable

348

# 3-tuple of (node, rev, text) for a raw revision.

348

# 3-tuple of (node, rev, text) for a raw revision.

349

self._revisioncache = None

349

self._revisioncache = None

350

# Maps rev to chain base rev.

350

# Maps rev to chain base rev.

351

self._chainbasecache = util.lrucachedict(100)

351

self._chainbasecache = util.lrucachedict(100)

352

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

352

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

353

self._chunkcache = (0, '')

353

self._chunkcache = (0, '')

354

# How much data to read and cache into the raw revlog data cache.

354

# How much data to read and cache into the raw revlog data cache.

355

self._chunkcachesize = 65536

355

self._chunkcachesize = 65536

356

self._maxchainlen = None

356

self._maxchainlen = None

357

self._deltabothparents = True

357

self._deltabothparents = True

358

self.index = []

358

self.index = []

359

# Mapping of partial identifiers to full nodes.

359

# Mapping of partial identifiers to full nodes.

360

self._pcache = {}

360

self._pcache = {}

361

# Mapping of revision integer to full node.

361

# Mapping of revision integer to full node.

362

self._nodecache = {nullid: nullrev}

362

self._nodecache = {nullid: nullrev}

363

self._nodepos = None

363

self._nodepos = None

364

self._compengine = 'zlib'

364

self._compengine = 'zlib'

365

self._maxdeltachainspan = -1

365

self._maxdeltachainspan = -1

366

self._withsparseread = False

366

self._withsparseread = False

367

self._sparserevlog = False

367

self._sparserevlog = False

368

self._srdensitythreshold = 0.50

368

self._srdensitythreshold = 0.50

369

self._srmingapsize = 262144

369

self._srmingapsize = 262144

370

371

# Make copy of flag processors so each revlog instance can support

371

# Make copy of flag processors so each revlog instance can support

372

# custom flags.

372

# custom flags.

373

self._flagprocessors = dict(_flagprocessors)

373

self._flagprocessors = dict(_flagprocessors)

374

375

mmapindexthreshold = None

375

mmapindexthreshold = None

376

v = REVLOG_DEFAULT_VERSION

376

v = REVLOG_DEFAULT_VERSION

377

opts = getattr(opener, 'options', None)

377

opts = getattr(opener, 'options', None)

378

if opts is not None:

378

if opts is not None:

379

if 'revlogv2' in opts:

379

if 'revlogv2' in opts:

380

# version 2 revlogs always use generaldelta.

380

# version 2 revlogs always use generaldelta.

381

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

381

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

382

elif 'revlogv1' in opts:

382

elif 'revlogv1' in opts:

383

if 'generaldelta' in opts:

383

if 'generaldelta' in opts:

384

v |= FLAG_GENERALDELTA

384

v |= FLAG_GENERALDELTA

385

else:

385

else:

386

v = 0

386

v = 0

387

if 'chunkcachesize' in opts:

387

if 'chunkcachesize' in opts:

388

self._chunkcachesize = opts['chunkcachesize']

388

self._chunkcachesize = opts['chunkcachesize']

389

if 'maxchainlen' in opts:

389

if 'maxchainlen' in opts:

390

self._maxchainlen = opts['maxchainlen']

390

self._maxchainlen = opts['maxchainlen']

391

if 'deltabothparents' in opts:

391

if 'deltabothparents' in opts:

392

self._deltabothparents = opts['deltabothparents']

392

self._deltabothparents = opts['deltabothparents']

393

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

393

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

394

if 'compengine' in opts:

394

if 'compengine' in opts:

395

self._compengine = opts['compengine']

395

self._compengine = opts['compengine']

396

if 'maxdeltachainspan' in opts:

396

if 'maxdeltachainspan' in opts:

397

self._maxdeltachainspan = opts['maxdeltachainspan']

397

self._maxdeltachainspan = opts['maxdeltachainspan']

398

if mmaplargeindex and 'mmapindexthreshold' in opts:

398

if mmaplargeindex and 'mmapindexthreshold' in opts:

399

mmapindexthreshold = opts['mmapindexthreshold']

399

mmapindexthreshold = opts['mmapindexthreshold']

400

self._sparserevlog = bool(opts.get('sparse-revlog', False))

400

self._sparserevlog = bool(opts.get('sparse-revlog', False))

401

withsparseread = bool(opts.get('with-sparse-read', False))

401

withsparseread = bool(opts.get('with-sparse-read', False))

402

# sparse-revlog forces sparse-read

402

# sparse-revlog forces sparse-read

403

self._withsparseread = self._sparserevlog or withsparseread

403

self._withsparseread = self._sparserevlog or withsparseread

404

if 'sparse-read-density-threshold' in opts:

404

if 'sparse-read-density-threshold' in opts:

405

self._srdensitythreshold = opts['sparse-read-density-threshold']

405

self._srdensitythreshold = opts['sparse-read-density-threshold']

406

if 'sparse-read-min-gap-size' in opts:

406

if 'sparse-read-min-gap-size' in opts:

407

self._srmingapsize = opts['sparse-read-min-gap-size']

407

self._srmingapsize = opts['sparse-read-min-gap-size']

408

if opts.get('enableellipsis'):

408

if opts.get('enableellipsis'):

409

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

409

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

410

411

if self._chunkcachesize <= 0:

411

if self._chunkcachesize <= 0:

412

raise error.RevlogError(_('revlog chunk cache size %r is not '

412

raise error.RevlogError(_('revlog chunk cache size %r is not '

413

'greater than 0') % self._chunkcachesize)

413

'greater than 0') % self._chunkcachesize)

414

elif self._chunkcachesize & (self._chunkcachesize - 1):

414

elif self._chunkcachesize & (self._chunkcachesize - 1):

415

raise error.RevlogError(_('revlog chunk cache size %r is not a '

415

raise error.RevlogError(_('revlog chunk cache size %r is not a '

416

'power of 2') % self._chunkcachesize)

416

'power of 2') % self._chunkcachesize)

417

418

self._loadindex(v, mmapindexthreshold)

418

self._loadindex(v, mmapindexthreshold)

419

420

def _loadindex(self, v, mmapindexthreshold):

420

def _loadindex(self, v, mmapindexthreshold):

421

indexdata = ''

421

indexdata = ''

422

self._initempty = True

422

self._initempty = True

423

try:

423

try:

424

with self._indexfp() as f:

424

with self._indexfp() as f:

425

if (mmapindexthreshold is not None and

425

if (mmapindexthreshold is not None and

426

self.opener.fstat(f).st_size >= mmapindexthreshold):

426

self.opener.fstat(f).st_size >= mmapindexthreshold):

427

indexdata = util.buffer(util.mmapread(f))

427

indexdata = util.buffer(util.mmapread(f))

428

else:

428

else:

429

indexdata = f.read()

429

indexdata = f.read()

430

if len(indexdata) > 0:

430

if len(indexdata) > 0:

431

v = versionformat_unpack(indexdata[:4])[0]

431

v = versionformat_unpack(indexdata[:4])[0]

432

self._initempty = False

432

self._initempty = False

433

except IOError as inst:

433

except IOError as inst:

434

if inst.errno != errno.ENOENT:

434

if inst.errno != errno.ENOENT:

435

raise

435

raise

436

437

self.version = v

437

self.version = v

438

self._inline = v & FLAG_INLINE_DATA

438

self._inline = v & FLAG_INLINE_DATA

439

self._generaldelta = v & FLAG_GENERALDELTA

439

self._generaldelta = v & FLAG_GENERALDELTA

440

flags = v & ~0xFFFF

440

flags = v & ~0xFFFF

441

fmt = v & 0xFFFF

441

fmt = v & 0xFFFF

442

if fmt == REVLOGV0:

442

if fmt == REVLOGV0:

443

if flags:

443

if flags:

444

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

444

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

445

'revlog %s') %

445

'revlog %s') %

446

(flags >> 16, fmt, self.indexfile))

446

(flags >> 16, fmt, self.indexfile))

447

elif fmt == REVLOGV1:

447

elif fmt == REVLOGV1:

448

if flags & ~REVLOGV1_FLAGS:

448

if flags & ~REVLOGV1_FLAGS:

449

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

449

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

450

'revlog %s') %

450

'revlog %s') %

451

(flags >> 16, fmt, self.indexfile))

451

(flags >> 16, fmt, self.indexfile))

452

elif fmt == REVLOGV2:

452

elif fmt == REVLOGV2:

453

if flags & ~REVLOGV2_FLAGS:

453

if flags & ~REVLOGV2_FLAGS:

454

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

454

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

455

'revlog %s') %

455

'revlog %s') %

456

(flags >> 16, fmt, self.indexfile))

456

(flags >> 16, fmt, self.indexfile))

457

else:

457

else:

458

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

458

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

459

(fmt, self.indexfile))

459

(fmt, self.indexfile))

460

461

self._storedeltachains = True

461

self._storedeltachains = True

462

463

self._io = revlogio()

463

self._io = revlogio()

464

if self.version == REVLOGV0:

464

if self.version == REVLOGV0:

465

self._io = revlogoldio()

465

self._io = revlogoldio()

466

try:

466

try:

467

d = self._io.parseindex(indexdata, self._inline)

467

d = self._io.parseindex(indexdata, self._inline)

468

except (ValueError, IndexError):

468

except (ValueError, IndexError):

469

raise error.RevlogError(_("index %s is corrupted") %

469

raise error.RevlogError(_("index %s is corrupted") %

470

self.indexfile)

470

self.indexfile)

471

self.index, nodemap, self._chunkcache = d

471

self.index, nodemap, self._chunkcache = d

472

if nodemap is not None:

472

if nodemap is not None:

473

self.nodemap = self._nodecache = nodemap

473

self.nodemap = self._nodecache = nodemap

474

if not self._chunkcache:

474

if not self._chunkcache:

475

self._chunkclear()

475

self._chunkclear()

476

# revnum -> (chain-length, sum-delta-length)

476

# revnum -> (chain-length, sum-delta-length)

477

self._chaininfocache = {}

477

self._chaininfocache = {}

478

# revlog header -> revlog compressor

478

# revlog header -> revlog compressor

479

self._decompressors = {}

479

self._decompressors = {}

480

481

@util.propertycache

481

@util.propertycache

482

def _compressor(self):

482

def _compressor(self):

483

return util.compengines[self._compengine].revlogcompressor()

483

return util.compengines[self._compengine].revlogcompressor()

484

485

def _indexfp(self, mode='r'):

485

def _indexfp(self, mode='r'):

486

"""file object for the revlog's index file"""

486

"""file object for the revlog's index file"""

487

args = {r'mode': mode}

487

args = {r'mode': mode}

488

if mode != 'r':

488

if mode != 'r':

489

args[r'checkambig'] = self._checkambig

489

args[r'checkambig'] = self._checkambig

490

if mode == 'w':

490

if mode == 'w':

491

args[r'atomictemp'] = True

491

args[r'atomictemp'] = True

492

return self.opener(self.indexfile, **args)

492

return self.opener(self.indexfile, **args)

493

494

def _datafp(self, mode='r'):

494

def _datafp(self, mode='r'):

495

"""file object for the revlog's data file"""

495

"""file object for the revlog's data file"""

496

return self.opener(self.datafile, mode=mode)

496

return self.opener(self.datafile, mode=mode)

497

498

@contextlib.contextmanager

498

@contextlib.contextmanager

499

def _datareadfp(self, existingfp=None):

499

def _datareadfp(self, existingfp=None):

500

"""file object suitable to read data"""

500

"""file object suitable to read data"""

501

if existingfp is not None:

501

if existingfp is not None:

502

yield existingfp

502

yield existingfp

503

else:

503

else:

504

if self._inline:

504

if self._inline:

505

func = self._indexfp

505

func = self._indexfp

506

else:

506

else:

507

func = self._datafp

507

func = self._datafp

508

with func() as fp:

508

with func() as fp:

509

yield fp

509

yield fp

510

511

def tip(self):

511

def tip(self):

512

return self.node(len(self.index) - 1)

512

return self.node(len(self.index) - 1)

513

def __contains__(self, rev):

513

def __contains__(self, rev):

514

return 0 <= rev < len(self)

514

return 0 <= rev < len(self)

515

def __len__(self):

515

def __len__(self):

516

return len(self.index)

516

return len(self.index)

517

def __iter__(self):

517

def __iter__(self):

518

return iter(pycompat.xrange(len(self)))

518

return iter(pycompat.xrange(len(self)))

519

def revs(self, start=0, stop=None):

519

def revs(self, start=0, stop=None):

520

"""iterate over all rev in this revlog (from start to stop)"""

520

"""iterate over all rev in this revlog (from start to stop)"""

521

return storageutil.iterrevs(len(self), start=start, stop=stop)

521

return storageutil.iterrevs(len(self), start=start, stop=stop)

522

523

@util.propertycache

523

@util.propertycache

524

def nodemap(self):

524

def nodemap(self):

525

if self.index:

525

if self.index:

526

# populate mapping down to the initial node

526

# populate mapping down to the initial node

527

node0 = self.index[0][7] # get around changelog filtering

527

node0 = self.index[0][7] # get around changelog filtering

528

self.rev(node0)

528

self.rev(node0)

529

return self._nodecache

529

return self._nodecache

530

531

def hasnode(self, node):

531

def hasnode(self, node):

532

try:

532

try:

533

self.rev(node)

533

self.rev(node)

534

return True

534

return True

535

except KeyError:

535

except KeyError:

536

return False

536

return False

537

538

def candelta(self, baserev, rev):

538

def candelta(self, baserev, rev):

539

"""whether two revisions (baserev, rev) can be delta-ed or not"""

539

"""whether two revisions (baserev, rev) can be delta-ed or not"""

540

# Disable delta if either rev requires a content-changing flag

540

# Disable delta if either rev requires a content-changing flag

541

# processor (ex. LFS). This is because such flag processor can alter

541

# processor (ex. LFS). This is because such flag processor can alter

542

# the rawtext content that the delta will be based on, and two clients

542

# the rawtext content that the delta will be based on, and two clients

543

# could have a same revlog node with different flags (i.e. different

543

# could have a same revlog node with different flags (i.e. different

544

# rawtext contents) and the delta could be incompatible.

544

# rawtext contents) and the delta could be incompatible.

545

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

545

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

546

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

546

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

547

return False

547

return False

548

return True

548

return True

549

550

def clearcaches(self):

550

def clearcaches(self):

551

self._revisioncache = None

551

self._revisioncache = None

552

self._chainbasecache.clear()

552

self._chainbasecache.clear()

553

self._chunkcache = (0, '')

553

self._chunkcache = (0, '')

554

self._pcache = {}

554

self._pcache = {}

555

556

try:

556

try:

557

self._nodecache.clearcaches()

557

self._nodecache.clearcaches()

558

except AttributeError:

558

except AttributeError:

559

self._nodecache = {nullid: nullrev}

559

self._nodecache = {nullid: nullrev}

560

self._nodepos = None

560

self._nodepos = None

561

562

def rev(self, node):

562

def rev(self, node):

563

try:

563

try:

564

return self._nodecache[node]

564

return self._nodecache[node]

565

except TypeError:

565

except TypeError:

566

raise

566

raise

567

except error.RevlogError:

567

except error.RevlogError:

568

# parsers.c radix tree lookup failed

568

# parsers.c radix tree lookup failed

569

if node == wdirid or node in wdirfilenodeids:

569

if node == wdirid or node in wdirfilenodeids:

570

raise error.WdirUnsupported

570

raise error.WdirUnsupported

571

raise error.LookupError(node, self.indexfile, _('no node'))

571

raise error.LookupError(node, self.indexfile, _('no node'))

572

except KeyError:

572

except KeyError:

573

# pure python cache lookup failed

573

# pure python cache lookup failed

574

n = self._nodecache

574

n = self._nodecache

575

i = self.index

575

i = self.index

576

p = self._nodepos

576

p = self._nodepos

577

if p is None:

577

if p is None:

578

p = len(i) - 1

578

p = len(i) - 1

579

else:

579

else:

580

assert p < len(i)

580

assert p < len(i)

581

for r in pycompat.xrange(p, -1, -1):

581

for r in pycompat.xrange(p, -1, -1):

582

v = i[r][7]

582

v = i[r][7]

583

n[v] = r

583

n[v] = r

584

if v == node:

584

if v == node:

585

self._nodepos = r - 1

585

self._nodepos = r - 1

586

return r

586

return r

587

if node == wdirid or node in wdirfilenodeids:

587

if node == wdirid or node in wdirfilenodeids:

588

raise error.WdirUnsupported

588

raise error.WdirUnsupported

589

raise error.LookupError(node, self.indexfile, _('no node'))

589

raise error.LookupError(node, self.indexfile, _('no node'))

590

591

# Accessors for index entries.

591

# Accessors for index entries.

592

593

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

593

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

594

# are flags.

594

# are flags.

595

def start(self, rev):

595

def start(self, rev):

596

return int(self.index[rev][0] >> 16)

596

return int(self.index[rev][0] >> 16)

597

598

def flags(self, rev):

598

def flags(self, rev):

599

return self.index[rev][0] & 0xFFFF

599

return self.index[rev][0] & 0xFFFF

600

601

def length(self, rev):

601

def length(self, rev):

602

return self.index[rev][1]

602

return self.index[rev][1]

603

604

def rawsize(self, rev):

604

def rawsize(self, rev):

605

"""return the length of the uncompressed text for a given revision"""

605

"""return the length of the uncompressed text for a given revision"""

606

l = self.index[rev][2]

606

l = self.index[rev][2]

607

if l >= 0:

607

if l >= 0:

608

return l

608

return l

609

610

t = self.revision(rev, raw=True)

610

t = self.revision(rev, raw=True)

611

return len(t)

611

return len(t)

612

613

def size(self, rev):

613

def size(self, rev):

614

"""length of non-raw text (processed by a "read" flag processor)"""

614

"""length of non-raw text (processed by a "read" flag processor)"""

615

# fast path: if no "read" flag processor could change the content,

615

# fast path: if no "read" flag processor could change the content,

616

# size is rawsize. note: ELLIPSIS is known to not change the content.

616

# size is rawsize. note: ELLIPSIS is known to not change the content.

617

flags = self.flags(rev)

617

flags = self.flags(rev)

618

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

618

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

619

return self.rawsize(rev)

619

return self.rawsize(rev)

620

621

return len(self.revision(rev, raw=False))

621

return len(self.revision(rev, raw=False))

622

623

def chainbase(self, rev):

623

def chainbase(self, rev):

624

base = self._chainbasecache.get(rev)

624

base = self._chainbasecache.get(rev)

625

if base is not None:

625

if base is not None:

626

return base

626

return base

627

628

index = self.index

628

index = self.index

629

iterrev = rev

629

iterrev = rev

630

base = index[iterrev][3]

630

base = index[iterrev][3]

631

while base != iterrev:

631

while base != iterrev:

632

iterrev = base

632

iterrev = base

633

base = index[iterrev][3]

633

base = index[iterrev][3]

634

635

self._chainbasecache[rev] = base

635

self._chainbasecache[rev] = base

636

return base

636

return base

637

638

def linkrev(self, rev):

638

def linkrev(self, rev):

639

return self.index[rev][4]

639

return self.index[rev][4]

640

641

def parentrevs(self, rev):

641

def parentrevs(self, rev):

642

try:

642

try:

643

entry = self.index[rev]

643

entry = self.index[rev]

644

except IndexError:

644

except IndexError:

645

if rev == wdirrev:

645

if rev == wdirrev:

646

raise error.WdirUnsupported

646

raise error.WdirUnsupported

647

raise

647

raise

648

649

return entry[5], entry[6]

649

return entry[5], entry[6]

650

651

# fast parentrevs(rev) where rev isn't filtered

652

_uncheckedparentrevs = parentrevs

653

651

def node(self, rev):

654

def node(self, rev):

652

try:

655

try:

653

return self.index[rev][7]

656

return self.index[rev][7]

654

except IndexError:

657

except IndexError:

655

if rev == wdirrev:

658

if rev == wdirrev:

656

raise error.WdirUnsupported

659

raise error.WdirUnsupported

657

raise

660

raise

658

661

659

# Derived from index values.

662

# Derived from index values.

660

663

661

def end(self, rev):

664

def end(self, rev):

662

return self.start(rev) + self.length(rev)

665

return self.start(rev) + self.length(rev)

663

666

664

def parents(self, node):

667

def parents(self, node):

665

i = self.index

668

i = self.index

666

d = i[self.rev(node)]

669

d = i[self.rev(node)]

667

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

670

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

668

671

669

def chainlen(self, rev):

672

def chainlen(self, rev):

670

return self._chaininfo(rev)[0]

673

return self._chaininfo(rev)[0]

671

674

672

def _chaininfo(self, rev):

675

def _chaininfo(self, rev):

673

chaininfocache = self._chaininfocache

676

chaininfocache = self._chaininfocache

674

if rev in chaininfocache:

677

if rev in chaininfocache:

675

return chaininfocache[rev]

678

return chaininfocache[rev]

676

index = self.index

679

index = self.index

677

generaldelta = self._generaldelta

680

generaldelta = self._generaldelta

678

iterrev = rev

681

iterrev = rev

679

e = index[iterrev]

682

e = index[iterrev]

680

clen = 0

683

clen = 0

681

compresseddeltalen = 0

684

compresseddeltalen = 0

682

while iterrev != e[3]:

685

while iterrev != e[3]:

683

clen += 1

686

clen += 1

684

compresseddeltalen += e[1]

687

compresseddeltalen += e[1]

685

if generaldelta:

688

if generaldelta:

686

iterrev = e[3]

689

iterrev = e[3]

687

else:

690

else:

688

iterrev -= 1

691

iterrev -= 1

689

if iterrev in chaininfocache:

692

if iterrev in chaininfocache:

690

t = chaininfocache[iterrev]

693

t = chaininfocache[iterrev]

691

clen += t[0]

694

clen += t[0]

692

compresseddeltalen += t[1]

695

compresseddeltalen += t[1]

693

break

696

break

694

e = index[iterrev]

697

e = index[iterrev]

695

else:

698

else:

696

# Add text length of base since decompressing that also takes

699

# Add text length of base since decompressing that also takes

697

# work. For cache hits the length is already included.

700

# work. For cache hits the length is already included.

698

compresseddeltalen += e[1]

701

compresseddeltalen += e[1]

699

r = (clen, compresseddeltalen)

702

r = (clen, compresseddeltalen)

700

chaininfocache[rev] = r

703

chaininfocache[rev] = r

701

return r

704

return r

702

705

703

def _deltachain(self, rev, stoprev=None):

706

def _deltachain(self, rev, stoprev=None):

704

"""Obtain the delta chain for a revision.

707

"""Obtain the delta chain for a revision.

705

708

706

``stoprev`` specifies a revision to stop at. If not specified, we

709

``stoprev`` specifies a revision to stop at. If not specified, we

707

stop at the base of the chain.

710

stop at the base of the chain.

708

711

709

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

712

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

710

revs in ascending order and ``stopped`` is a bool indicating whether

713

revs in ascending order and ``stopped`` is a bool indicating whether

711

``stoprev`` was hit.

714

``stoprev`` was hit.

712

"""

715

"""

713

# Try C implementation.

716

# Try C implementation.

714

try:

717

try:

715

return self.index.deltachain(rev, stoprev, self._generaldelta)

718

return self.index.deltachain(rev, stoprev, self._generaldelta)

716

except AttributeError:

719

except AttributeError:

717

pass

720

pass

718

721

719

chain = []

722

chain = []

720

723

721

# Alias to prevent attribute lookup in tight loop.

724

# Alias to prevent attribute lookup in tight loop.

722

index = self.index

725

index = self.index

723

generaldelta = self._generaldelta

726

generaldelta = self._generaldelta

724

727

725

iterrev = rev

728

iterrev = rev

726

e = index[iterrev]

729

e = index[iterrev]

727

while iterrev != e[3] and iterrev != stoprev:

730

while iterrev != e[3] and iterrev != stoprev:

728

chain.append(iterrev)

731

chain.append(iterrev)

729

if generaldelta:

732

if generaldelta:

730

iterrev = e[3]

733

iterrev = e[3]

731

else:

734

else:

732

iterrev -= 1

735

iterrev -= 1

733

e = index[iterrev]

736

e = index[iterrev]

734

737

735

if iterrev == stoprev:

738

if iterrev == stoprev:

736

stopped = True

739

stopped = True

737

else:

740

else:

738

chain.append(iterrev)

741

chain.append(iterrev)

739

stopped = False

742

stopped = False

740

743

741

chain.reverse()

744

chain.reverse()

742

return chain, stopped

745

return chain, stopped

743

746

744

def ancestors(self, revs, stoprev=0, inclusive=False):

747

def ancestors(self, revs, stoprev=0, inclusive=False):

745

"""Generate the ancestors of 'revs' in reverse topological order.

748

"""Generate the ancestors of 'revs' in reverse topological order.

746

Does not generate revs lower than stoprev.

749

Does not generate revs lower than stoprev.

747

750

748

See the documentation for ancestor.lazyancestors for more details."""

751

See the documentation for ancestor.lazyancestors for more details."""

749

752

750

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

753

# first, make sure start revisions aren't filtered

751

inclusive=inclusive)

754

revs = list(revs)

755

checkrev = self.node

756

for r in revs:

757

checkrev(r)

758

# and we're sure ancestors aren't filtered as well

759

return ancestor.lazyancestors(self._uncheckedparentrevs, revs,

760

stoprev=stoprev, inclusive=inclusive)

752

761

753

def descendants(self, revs):

762

def descendants(self, revs):

754

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

763

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

755

764

756

def findcommonmissing(self, common=None, heads=None):

765

def findcommonmissing(self, common=None, heads=None):

757

"""Return a tuple of the ancestors of common and the ancestors of heads

766

"""Return a tuple of the ancestors of common and the ancestors of heads

758

that are not ancestors of common. In revset terminology, we return the

767

that are not ancestors of common. In revset terminology, we return the

759

tuple:

768

tuple:

760

769

761

::common, (::heads) - (::common)

770

::common, (::heads) - (::common)

762

771

763

The list is sorted by revision number, meaning it is

772

The list is sorted by revision number, meaning it is

764

topologically sorted.

773

topologically sorted.

765

774

766

'heads' and 'common' are both lists of node IDs. If heads is

775

'heads' and 'common' are both lists of node IDs. If heads is

767

not supplied, uses all of the revlog's heads. If common is not

776

not supplied, uses all of the revlog's heads. If common is not

768

supplied, uses nullid."""

777

supplied, uses nullid."""

769

if common is None:

778

if common is None:

770

common = [nullid]

779

common = [nullid]

771

if heads is None:

780

if heads is None:

772

heads = self.heads()

781

heads = self.heads()

773

782

774

common = [self.rev(n) for n in common]

783

common = [self.rev(n) for n in common]

775

heads = [self.rev(n) for n in heads]

784

heads = [self.rev(n) for n in heads]

776

785

777

# we want the ancestors, but inclusive

786

# we want the ancestors, but inclusive

778

class lazyset(object):

787

class lazyset(object):

779

def __init__(self, lazyvalues):

788

def __init__(self, lazyvalues):

780

self.addedvalues = set()

789

self.addedvalues = set()

781

self.lazyvalues = lazyvalues

790

self.lazyvalues = lazyvalues

782

791

783

def __contains__(self, value):

792

def __contains__(self, value):

784

return value in self.addedvalues or value in self.lazyvalues

793

return value in self.addedvalues or value in self.lazyvalues

785

794

786

def __iter__(self):

795

def __iter__(self):

787

added = self.addedvalues

796

added = self.addedvalues

788

for r in added:

797

for r in added:

789

yield r

798

yield r

790

for r in self.lazyvalues:

799

for r in self.lazyvalues:

791

if not r in added:

800

if not r in added:

792

yield r

801

yield r

793

802

794

def add(self, value):

803

def add(self, value):

795

self.addedvalues.add(value)

804

self.addedvalues.add(value)

796

805

797

def update(self, values):

806

def update(self, values):

798

self.addedvalues.update(values)

807

self.addedvalues.update(values)

799

808

800

has = lazyset(self.ancestors(common))

809

has = lazyset(self.ancestors(common))

801

has.add(nullrev)

810

has.add(nullrev)

802

has.update(common)

811

has.update(common)

803

812

804

# take all ancestors from heads that aren't in has

813

# take all ancestors from heads that aren't in has

805

missing = set()

814

missing = set()

806

visit = collections.deque(r for r in heads if r not in has)

815

visit = collections.deque(r for r in heads if r not in has)

807

while visit:

816

while visit:

808

r = visit.popleft()

817

r = visit.popleft()

809

if r in missing:

818

if r in missing:

810

continue

819

continue

811

else:

820

else:

812

missing.add(r)

821

missing.add(r)

813

for p in self.parentrevs(r):

822

for p in self.parentrevs(r):

814

if p not in has:

823

if p not in has:

815

visit.append(p)

824

visit.append(p)

816

missing = list(missing)

825

missing = list(missing)

817

missing.sort()

826

missing.sort()

818

return has, [self.node(miss) for miss in missing]

827

return has, [self.node(miss) for miss in missing]

819

828

820

def incrementalmissingrevs(self, common=None):

829

def incrementalmissingrevs(self, common=None):

821

"""Return an object that can be used to incrementally compute the

830

"""Return an object that can be used to incrementally compute the

822

revision numbers of the ancestors of arbitrary sets that are not

831

revision numbers of the ancestors of arbitrary sets that are not

823

ancestors of common. This is an ancestor.incrementalmissingancestors

832

ancestors of common. This is an ancestor.incrementalmissingancestors

824

object.

833

object.

825

834

826

'common' is a list of revision numbers. If common is not supplied, uses

835

'common' is a list of revision numbers. If common is not supplied, uses

827

nullrev.

836

nullrev.

828

"""

837

"""

829

if common is None:

838

if common is None:

830

common = [nullrev]

839

common = [nullrev]

831

840

832

return ancestor.incrementalmissingancestors(self.parentrevs, common)

841

return ancestor.incrementalmissingancestors(self.parentrevs, common)

833

842

834

def findmissingrevs(self, common=None, heads=None):

843

def findmissingrevs(self, common=None, heads=None):

835

"""Return the revision numbers of the ancestors of heads that

844

"""Return the revision numbers of the ancestors of heads that

836

are not ancestors of common.

845

are not ancestors of common.

837

846

838

More specifically, return a list of revision numbers corresponding to

847

More specifically, return a list of revision numbers corresponding to

839

nodes N such that every N satisfies the following constraints:

848

nodes N such that every N satisfies the following constraints:

840

849

841

1. N is an ancestor of some node in 'heads'

850

1. N is an ancestor of some node in 'heads'

842

2. N is not an ancestor of any node in 'common'

851

2. N is not an ancestor of any node in 'common'

843

852

844

The list is sorted by revision number, meaning it is

853

The list is sorted by revision number, meaning it is

845

topologically sorted.

854

topologically sorted.

846

855

847

'heads' and 'common' are both lists of revision numbers. If heads is

856

'heads' and 'common' are both lists of revision numbers. If heads is

848

not supplied, uses all of the revlog's heads. If common is not

857

not supplied, uses all of the revlog's heads. If common is not

849

supplied, uses nullid."""

858

supplied, uses nullid."""

850

if common is None:

859

if common is None:

851

common = [nullrev]

860

common = [nullrev]

852

if heads is None:

861

if heads is None:

853

heads = self.headrevs()

862

heads = self.headrevs()

854

863

855

inc = self.incrementalmissingrevs(common=common)

864

inc = self.incrementalmissingrevs(common=common)

856

return inc.missingancestors(heads)

865

return inc.missingancestors(heads)

857

866

858

def findmissing(self, common=None, heads=None):

867

def findmissing(self, common=None, heads=None):

859

"""Return the ancestors of heads that are not ancestors of common.

868

"""Return the ancestors of heads that are not ancestors of common.

860

869

861

More specifically, return a list of nodes N such that every N

870

More specifically, return a list of nodes N such that every N

862

satisfies the following constraints:

871

satisfies the following constraints:

863

872

864

1. N is an ancestor of some node in 'heads'

873

1. N is an ancestor of some node in 'heads'

865

2. N is not an ancestor of any node in 'common'

874

2. N is not an ancestor of any node in 'common'

866

875

867

The list is sorted by revision number, meaning it is

876

The list is sorted by revision number, meaning it is

868

topologically sorted.

877

topologically sorted.

869

878

870

'heads' and 'common' are both lists of node IDs. If heads is

879

'heads' and 'common' are both lists of node IDs. If heads is

871

not supplied, uses all of the revlog's heads. If common is not

880

not supplied, uses all of the revlog's heads. If common is not

872

supplied, uses nullid."""

881

supplied, uses nullid."""

873

if common is None:

882

if common is None:

874

common = [nullid]

883

common = [nullid]

875

if heads is None:

884

if heads is None:

876

heads = self.heads()

885

heads = self.heads()

877

886

878

common = [self.rev(n) for n in common]

887

common = [self.rev(n) for n in common]

879

heads = [self.rev(n) for n in heads]

888

heads = [self.rev(n) for n in heads]

880

889

881

inc = self.incrementalmissingrevs(common=common)

890

inc = self.incrementalmissingrevs(common=common)

882

return [self.node(r) for r in inc.missingancestors(heads)]

891

return [self.node(r) for r in inc.missingancestors(heads)]

883

892

884

def nodesbetween(self, roots=None, heads=None):

893

def nodesbetween(self, roots=None, heads=None):

885

"""Return a topological path from 'roots' to 'heads'.

894

"""Return a topological path from 'roots' to 'heads'.

886

895

887

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

896

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

888

topologically sorted list of all nodes N that satisfy both of

897

topologically sorted list of all nodes N that satisfy both of

889

these constraints:

898

these constraints:

890

899

891

1. N is a descendant of some node in 'roots'

900

1. N is a descendant of some node in 'roots'

892

2. N is an ancestor of some node in 'heads'

901

2. N is an ancestor of some node in 'heads'

893

902

894

Every node is considered to be both a descendant and an ancestor

903

Every node is considered to be both a descendant and an ancestor

895

of itself, so every reachable node in 'roots' and 'heads' will be

904

of itself, so every reachable node in 'roots' and 'heads' will be

896

included in 'nodes'.

905

included in 'nodes'.

897

906

898

'outroots' is the list of reachable nodes in 'roots', i.e., the

907

'outroots' is the list of reachable nodes in 'roots', i.e., the

899

subset of 'roots' that is returned in 'nodes'. Likewise,

908

subset of 'roots' that is returned in 'nodes'. Likewise,

900

'outheads' is the subset of 'heads' that is also in 'nodes'.

909

'outheads' is the subset of 'heads' that is also in 'nodes'.

901

910

902

'roots' and 'heads' are both lists of node IDs. If 'roots' is

911

'roots' and 'heads' are both lists of node IDs. If 'roots' is

903

unspecified, uses nullid as the only root. If 'heads' is

912

unspecified, uses nullid as the only root. If 'heads' is

904

unspecified, uses list of all of the revlog's heads."""

913

unspecified, uses list of all of the revlog's heads."""

905

nonodes = ([], [], [])

914

nonodes = ([], [], [])

906

if roots is not None:

915

if roots is not None:

907

roots = list(roots)

916

roots = list(roots)

908

if not roots:

917

if not roots:

909

return nonodes

918

return nonodes

910

lowestrev = min([self.rev(n) for n in roots])

919

lowestrev = min([self.rev(n) for n in roots])

911

else:

920

else:

912

roots = [nullid] # Everybody's a descendant of nullid

921

roots = [nullid] # Everybody's a descendant of nullid

913

lowestrev = nullrev

922

lowestrev = nullrev

914

if (lowestrev == nullrev) and (heads is None):

923

if (lowestrev == nullrev) and (heads is None):

915

# We want _all_ the nodes!

924

# We want _all_ the nodes!

916

return ([self.node(r) for r in self], [nullid], list(self.heads()))

925

return ([self.node(r) for r in self], [nullid], list(self.heads()))

917

if heads is None:

926

if heads is None:

918

# All nodes are ancestors, so the latest ancestor is the last

927

# All nodes are ancestors, so the latest ancestor is the last

919

# node.

928

# node.

920

highestrev = len(self) - 1

929

highestrev = len(self) - 1

921

# Set ancestors to None to signal that every node is an ancestor.

930

# Set ancestors to None to signal that every node is an ancestor.

922

ancestors = None

931

ancestors = None

923

# Set heads to an empty dictionary for later discovery of heads

932

# Set heads to an empty dictionary for later discovery of heads

924

heads = {}

933

heads = {}

925

else:

934

else:

926

heads = list(heads)

935

heads = list(heads)

927

if not heads:

936

if not heads:

928

return nonodes

937

return nonodes

929

ancestors = set()

938

ancestors = set()

930

# Turn heads into a dictionary so we can remove 'fake' heads.

939

# Turn heads into a dictionary so we can remove 'fake' heads.

931

# Also, later we will be using it to filter out the heads we can't

940

# Also, later we will be using it to filter out the heads we can't

932

# find from roots.

941

# find from roots.

933

heads = dict.fromkeys(heads, False)

942

heads = dict.fromkeys(heads, False)

934

# Start at the top and keep marking parents until we're done.

943

# Start at the top and keep marking parents until we're done.

935

nodestotag = set(heads)

944

nodestotag = set(heads)

936

# Remember where the top was so we can use it as a limit later.

945

# Remember where the top was so we can use it as a limit later.

937

highestrev = max([self.rev(n) for n in nodestotag])

946

highestrev = max([self.rev(n) for n in nodestotag])

938

while nodestotag:

947

while nodestotag:

939

# grab a node to tag

948

# grab a node to tag

940

n = nodestotag.pop()

949

n = nodestotag.pop()

941

# Never tag nullid

950

# Never tag nullid

942

if n == nullid:

951

if n == nullid:

943

continue

952

continue

944

# A node's revision number represents its place in a

953

# A node's revision number represents its place in a

945

# topologically sorted list of nodes.

954

# topologically sorted list of nodes.

946

r = self.rev(n)

955

r = self.rev(n)

947

if r >= lowestrev:

956

if r >= lowestrev:

948

if n not in ancestors:

957

if n not in ancestors:

949

# If we are possibly a descendant of one of the roots

958

# If we are possibly a descendant of one of the roots

950

# and we haven't already been marked as an ancestor

959

# and we haven't already been marked as an ancestor

951

ancestors.add(n) # Mark as ancestor

960

ancestors.add(n) # Mark as ancestor

952

# Add non-nullid parents to list of nodes to tag.

961

# Add non-nullid parents to list of nodes to tag.

953

nodestotag.update([p for p in self.parents(n) if

962

nodestotag.update([p for p in self.parents(n) if

954

p != nullid])

963

p != nullid])

955

elif n in heads: # We've seen it before, is it a fake head?

964

elif n in heads: # We've seen it before, is it a fake head?

956

# So it is, real heads should not be the ancestors of

965

# So it is, real heads should not be the ancestors of

957

# any other heads.

966

# any other heads.

958

heads.pop(n)

967

heads.pop(n)

959

if not ancestors:

968

if not ancestors:

960

return nonodes

969

return nonodes

961

# Now that we have our set of ancestors, we want to remove any

970

# Now that we have our set of ancestors, we want to remove any

962

# roots that are not ancestors.

971

# roots that are not ancestors.

963

972

964

# If one of the roots was nullid, everything is included anyway.

973

# If one of the roots was nullid, everything is included anyway.

965

if lowestrev > nullrev:

974

if lowestrev > nullrev:

966

# But, since we weren't, let's recompute the lowest rev to not

975

# But, since we weren't, let's recompute the lowest rev to not

967

# include roots that aren't ancestors.

976

# include roots that aren't ancestors.

968

977

969

# Filter out roots that aren't ancestors of heads

978

# Filter out roots that aren't ancestors of heads

970

roots = [root for root in roots if root in ancestors]

979

roots = [root for root in roots if root in ancestors]

971

# Recompute the lowest revision

980

# Recompute the lowest revision

972

if roots:

981

if roots:

973

lowestrev = min([self.rev(root) for root in roots])

982

lowestrev = min([self.rev(root) for root in roots])

974

else:

983

else:

975

# No more roots? Return empty list

984

# No more roots? Return empty list

976

return nonodes

985

return nonodes

977

else:

986

else:

978

# We are descending from nullid, and don't need to care about

987

# We are descending from nullid, and don't need to care about

979

# any other roots.

988

# any other roots.

980

lowestrev = nullrev

989

lowestrev = nullrev

981

roots = [nullid]

990

roots = [nullid]

982

# Transform our roots list into a set.

991

# Transform our roots list into a set.

983

descendants = set(roots)

992

descendants = set(roots)

984

# Also, keep the original roots so we can filter out roots that aren't

993

# Also, keep the original roots so we can filter out roots that aren't

985

# 'real' roots (i.e. are descended from other roots).

994

# 'real' roots (i.e. are descended from other roots).

986

roots = descendants.copy()

995

roots = descendants.copy()

987

# Our topologically sorted list of output nodes.

996

# Our topologically sorted list of output nodes.

988

orderedout = []

997

orderedout = []

989

# Don't start at nullid since we don't want nullid in our output list,

998

# Don't start at nullid since we don't want nullid in our output list,

990

# and if nullid shows up in descendants, empty parents will look like

999

# and if nullid shows up in descendants, empty parents will look like

991

# they're descendants.

1000

# they're descendants.

992

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1001

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

993

n = self.node(r)

1002

n = self.node(r)

994

isdescendant = False

1003

isdescendant = False

995

if lowestrev == nullrev: # Everybody is a descendant of nullid

1004

if lowestrev == nullrev: # Everybody is a descendant of nullid

996

isdescendant = True

1005

isdescendant = True

997

elif n in descendants:

1006

elif n in descendants:

998

# n is already a descendant

1007

# n is already a descendant

999

isdescendant = True

1008

isdescendant = True

1000

# This check only needs to be done here because all the roots

1009

# This check only needs to be done here because all the roots

1001

# will start being marked is descendants before the loop.

1010

# will start being marked is descendants before the loop.

1002

if n in roots:

1011

if n in roots:

1003

# If n was a root, check if it's a 'real' root.

1012

# If n was a root, check if it's a 'real' root.

1004

p = tuple(self.parents(n))

1013

p = tuple(self.parents(n))

1005

# If any of its parents are descendants, it's not a root.

1014

# If any of its parents are descendants, it's not a root.

1006

if (p[0] in descendants) or (p[1] in descendants):

1015

if (p[0] in descendants) or (p[1] in descendants):

1007

roots.remove(n)

1016

roots.remove(n)

1008

else:

1017

else:

1009

p = tuple(self.parents(n))

1018

p = tuple(self.parents(n))

1010

# A node is a descendant if either of its parents are

1019

# A node is a descendant if either of its parents are

1011

# descendants. (We seeded the dependents list with the roots

1020

# descendants. (We seeded the dependents list with the roots

1012

# up there, remember?)

1021

# up there, remember?)

1013

if (p[0] in descendants) or (p[1] in descendants):

1022

if (p[0] in descendants) or (p[1] in descendants):

1014

descendants.add(n)

1023

descendants.add(n)

1015

isdescendant = True

1024

isdescendant = True

1016

if isdescendant and ((ancestors is None) or (n in ancestors)):

1025

if isdescendant and ((ancestors is None) or (n in ancestors)):

1017

# Only include nodes that are both descendants and ancestors.

1026

# Only include nodes that are both descendants and ancestors.

1018

orderedout.append(n)

1027

orderedout.append(n)

1019

if (ancestors is not None) and (n in heads):

1028

if (ancestors is not None) and (n in heads):

1020

# We're trying to figure out which heads are reachable

1029

# We're trying to figure out which heads are reachable

1021

# from roots.

1030

# from roots.

1022

# Mark this head as having been reached

1031

# Mark this head as having been reached

1023

heads[n] = True

1032

heads[n] = True

1024

elif ancestors is None:

1033

elif ancestors is None:

1025

# Otherwise, we're trying to discover the heads.

1034

# Otherwise, we're trying to discover the heads.

1026

# Assume this is a head because if it isn't, the next step

1035

# Assume this is a head because if it isn't, the next step

1027

# will eventually remove it.

1036

# will eventually remove it.

1028

heads[n] = True

1037

heads[n] = True

1029

# But, obviously its parents aren't.

1038

# But, obviously its parents aren't.

1030

for p in self.parents(n):

1039

for p in self.parents(n):

1031

heads.pop(p, None)

1040

heads.pop(p, None)

1032

heads = [head for head, flag in heads.iteritems() if flag]

1041

heads = [head for head, flag in heads.iteritems() if flag]

1033

roots = list(roots)

1042

roots = list(roots)

1034

assert orderedout

1043

assert orderedout

1035

assert roots

1044

assert roots

1036

assert heads

1045

assert heads

1037

return (orderedout, roots, heads)

1046

return (orderedout, roots, heads)

1038

1047

1039

def headrevs(self):

1048

def headrevs(self):

1040

try:

1049

try:

1041

return self.index.headrevs()

1050

return self.index.headrevs()

1042

except AttributeError:

1051

except AttributeError:

1043

return self._headrevs()

1052

return self._headrevs()

1044

1053

1045

def computephases(self, roots):

1054

def computephases(self, roots):

1046

return self.index.computephasesmapsets(roots)

1055

return self.index.computephasesmapsets(roots)

1047

1056

1048

def _headrevs(self):

1057

def _headrevs(self):

1049

count = len(self)

1058

count = len(self)

1050

if not count:

1059

if not count:

1051

return [nullrev]

1060

return [nullrev]

1052

# we won't iter over filtered rev so nobody is a head at start

1061

# we won't iter over filtered rev so nobody is a head at start

1053

ishead = [0] * (count + 1)

1062

ishead = [0] * (count + 1)

1054

index = self.index

1063

index = self.index

1055

for r in self:

1064

for r in self:

1056

ishead[r] = 1 # I may be an head

1065

ishead[r] = 1 # I may be an head

1057

e = index[r]

1066

e = index[r]

1058

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1067

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1059

return [r for r, val in enumerate(ishead) if val]

1068

return [r for r, val in enumerate(ishead) if val]

1060

1069

1061

def heads(self, start=None, stop=None):

1070

def heads(self, start=None, stop=None):

1062

"""return the list of all nodes that have no children

1071

"""return the list of all nodes that have no children

1063

1072

1064

if start is specified, only heads that are descendants of

1073

if start is specified, only heads that are descendants of

1065

start will be returned

1074

start will be returned

1066

if stop is specified, it will consider all the revs from stop

1075

if stop is specified, it will consider all the revs from stop

1067

as if they had no children

1076

as if they had no children

1068

"""

1077

"""

1069

if start is None and stop is None:

1078

if start is None and stop is None:

1070

if not len(self):

1079

if not len(self):

1071

return [nullid]

1080

return [nullid]

1072

return [self.node(r) for r in self.headrevs()]

1081

return [self.node(r) for r in self.headrevs()]

1073

1082

1074

if start is None:

1083

if start is None:

1075

start = nullrev

1084

start = nullrev

1076

else:

1085

else:

1077

start = self.rev(start)

1086

start = self.rev(start)

1078

1087

1079

stoprevs = set(self.rev(n) for n in stop or [])

1088

stoprevs = set(self.rev(n) for n in stop or [])

1080

1089

1081

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1090

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1082

stoprevs=stoprevs)

1091

stoprevs=stoprevs)

1083

1092

1084

return [self.node(rev) for rev in revs]

1093

return [self.node(rev) for rev in revs]

1085

1094

1086

def children(self, node):

1095

def children(self, node):

1087

"""find the children of a given node"""

1096

"""find the children of a given node"""

1088

c = []

1097

c = []

1089

p = self.rev(node)

1098

p = self.rev(node)

1090

for r in self.revs(start=p + 1):

1099

for r in self.revs(start=p + 1):

1091

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1100

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1092

if prevs:

1101

if prevs:

1093

for pr in prevs:

1102

for pr in prevs:

1094

if pr == p:

1103

if pr == p:

1095

c.append(self.node(r))

1104

c.append(self.node(r))

1096

elif p == nullrev:

1105

elif p == nullrev:

1097

c.append(self.node(r))

1106

c.append(self.node(r))

1098

return c

1107

return c

1099

1108

1100

def commonancestorsheads(self, a, b):

1109

def commonancestorsheads(self, a, b):

1101

"""calculate all the heads of the common ancestors of nodes a and b"""

1110

"""calculate all the heads of the common ancestors of nodes a and b"""

1102

a, b = self.rev(a), self.rev(b)

1111

a, b = self.rev(a), self.rev(b)

1103

ancs = self._commonancestorsheads(a, b)

1112

ancs = self._commonancestorsheads(a, b)

1104

return pycompat.maplist(self.node, ancs)

1113

return pycompat.maplist(self.node, ancs)

1105

1114

1106

def _commonancestorsheads(self, *revs):

1115

def _commonancestorsheads(self, *revs):

1107

"""calculate all the heads of the common ancestors of revs"""

1116

"""calculate all the heads of the common ancestors of revs"""

1108

try:

1117

try:

1109

ancs = self.index.commonancestorsheads(*revs)

1118

ancs = self.index.commonancestorsheads(*revs)

1110

except (AttributeError, OverflowError): # C implementation failed

1119

except (AttributeError, OverflowError): # C implementation failed

1111

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1120

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1112

return ancs

1121

return ancs

1113

1122

1114

def isancestor(self, a, b):

1123

def isancestor(self, a, b):

1115

"""return True if node a is an ancestor of node b

1124

"""return True if node a is an ancestor of node b

1116

1125

1117

A revision is considered an ancestor of itself."""

1126

A revision is considered an ancestor of itself."""

1118

a, b = self.rev(a), self.rev(b)

1127

a, b = self.rev(a), self.rev(b)

1119

return self.isancestorrev(a, b)

1128

return self.isancestorrev(a, b)

1120

1129

1121

def isancestorrev(self, a, b):

1130

def isancestorrev(self, a, b):

1122

"""return True if revision a is an ancestor of revision b

1131

"""return True if revision a is an ancestor of revision b

1123

1132

1124

A revision is considered an ancestor of itself.

1133

A revision is considered an ancestor of itself.

1125

1134

1126

The implementation of this is trivial but the use of

1135

The implementation of this is trivial but the use of

1127

commonancestorsheads is not."""

1136

commonancestorsheads is not."""

1128

if a == nullrev:

1137

if a == nullrev:

1129

return True

1138

return True

1130

elif a == b:

1139

elif a == b:

1131

return True

1140

return True

1132

elif a > b:

1141

elif a > b:

1133

return False

1142

return False

1134

return a in self._commonancestorsheads(a, b)

1143

return a in self._commonancestorsheads(a, b)

1135

1144

1136

def ancestor(self, a, b):

1145

def ancestor(self, a, b):

1137

"""calculate the "best" common ancestor of nodes a and b"""

1146

"""calculate the "best" common ancestor of nodes a and b"""

1138

1147

1139

a, b = self.rev(a), self.rev(b)

1148

a, b = self.rev(a), self.rev(b)

1140

try:

1149

try:

1141

ancs = self.index.ancestors(a, b)

1150

ancs = self.index.ancestors(a, b)

1142

except (AttributeError, OverflowError):

1151

except (AttributeError, OverflowError):

1143

ancs = ancestor.ancestors(self.parentrevs, a, b)

1152

ancs = ancestor.ancestors(self.parentrevs, a, b)

1144

if ancs:

1153

if ancs:

1145

# choose a consistent winner when there's a tie

1154

# choose a consistent winner when there's a tie

1146

return min(map(self.node, ancs))

1155

return min(map(self.node, ancs))

1147

return nullid

1156

return nullid

1148

1157

1149

def _match(self, id):

1158

def _match(self, id):

1150

if isinstance(id, int):

1159

if isinstance(id, int):

1151

# rev

1160

# rev

1152

return self.node(id)

1161

return self.node(id)

1153

if len(id) == 20:

1162

if len(id) == 20:

1154

# possibly a binary node

1163

# possibly a binary node

1155

# odds of a binary node being all hex in ASCII are 1 in 10**25

1164

# odds of a binary node being all hex in ASCII are 1 in 10**25

1156

try:

1165

try:

1157

node = id

1166

node = id

1158

self.rev(node) # quick search the index

1167

self.rev(node) # quick search the index

1159

return node

1168

return node

1160

except error.LookupError:

1169

except error.LookupError:

1161

pass # may be partial hex id

1170

pass # may be partial hex id

1162

try:

1171

try:

1163

# str(rev)

1172

# str(rev)

1164

rev = int(id)

1173

rev = int(id)

1165

if "%d" % rev != id:

1174

if "%d" % rev != id:

1166

raise ValueError

1175

raise ValueError

1167

if rev < 0:

1176

if rev < 0:

1168

rev = len(self) + rev

1177

rev = len(self) + rev

1169

if rev < 0 or rev >= len(self):

1178

if rev < 0 or rev >= len(self):

1170

raise ValueError

1179

raise ValueError

1171

return self.node(rev)

1180

return self.node(rev)

1172

except (ValueError, OverflowError):

1181

except (ValueError, OverflowError):

1173

pass

1182

pass

1174

if len(id) == 40:

1183

if len(id) == 40:

1175

try:

1184

try:

1176

# a full hex nodeid?

1185

# a full hex nodeid?

1177

node = bin(id)

1186

node = bin(id)

1178

self.rev(node)

1187

self.rev(node)

1179

return node

1188

return node

1180

except (TypeError, error.LookupError):

1189

except (TypeError, error.LookupError):

1181

pass

1190

pass

1182

1191

1183

def _partialmatch(self, id):

1192

def _partialmatch(self, id):

1184

# we don't care wdirfilenodeids as they should be always full hash

1193

# we don't care wdirfilenodeids as they should be always full hash

1185

maybewdir = wdirhex.startswith(id)

1194

maybewdir = wdirhex.startswith(id)

1186

try:

1195

try:

1187

partial = self.index.partialmatch(id)

1196

partial = self.index.partialmatch(id)

1188

if partial and self.hasnode(partial):

1197

if partial and self.hasnode(partial):

1189

if maybewdir:

1198

if maybewdir:

1190

# single 'ff...' match in radix tree, ambiguous with wdir

1199

# single 'ff...' match in radix tree, ambiguous with wdir

1191

raise error.RevlogError

1200

raise error.RevlogError

1192

return partial

1201

return partial

1193

if maybewdir:

1202

if maybewdir:

1194

# no 'ff...' match in radix tree, wdir identified

1203

# no 'ff...' match in radix tree, wdir identified

1195

raise error.WdirUnsupported

1204

raise error.WdirUnsupported

1196

return None

1205

return None

1197

except error.RevlogError:

1206

except error.RevlogError:

1198

# parsers.c radix tree lookup gave multiple matches

1207

# parsers.c radix tree lookup gave multiple matches

1199

# fast path: for unfiltered changelog, radix tree is accurate

1208

# fast path: for unfiltered changelog, radix tree is accurate

1200

if not getattr(self, 'filteredrevs', None):

1209

if not getattr(self, 'filteredrevs', None):

1201

raise error.AmbiguousPrefixLookupError(

1210

raise error.AmbiguousPrefixLookupError(

1202

id, self.indexfile, _('ambiguous identifier'))

1211

id, self.indexfile, _('ambiguous identifier'))

1203

# fall through to slow path that filters hidden revisions

1212

# fall through to slow path that filters hidden revisions

1204

except (AttributeError, ValueError):

1213

except (AttributeError, ValueError):

1205

# we are pure python, or key was too short to search radix tree

1214

# we are pure python, or key was too short to search radix tree

1206

pass

1215

pass

1207

1216

1208

if id in self._pcache:

1217

if id in self._pcache:

1209

return self._pcache[id]

1218

return self._pcache[id]

1210

1219

1211

if len(id) <= 40:

1220

if len(id) <= 40:

1212

try:

1221

try:

1213

# hex(node)[:...]

1222

# hex(node)[:...]

1214

l = len(id) // 2 # grab an even number of digits

1223

l = len(id) // 2 # grab an even number of digits

1215

prefix = bin(id[:l * 2])

1224

prefix = bin(id[:l * 2])

1216

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1225

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1217

nl = [n for n in nl if hex(n).startswith(id) and

1226

nl = [n for n in nl if hex(n).startswith(id) and

1218

self.hasnode(n)]

1227

self.hasnode(n)]

1219

if nullhex.startswith(id):

1228

if nullhex.startswith(id):

1220

nl.append(nullid)

1229

nl.append(nullid)

1221

if len(nl) > 0:

1230

if len(nl) > 0:

1222

if len(nl) == 1 and not maybewdir:

1231

if len(nl) == 1 and not maybewdir:

1223

self._pcache[id] = nl[0]

1232

self._pcache[id] = nl[0]

1224

return nl[0]

1233

return nl[0]

1225

raise error.AmbiguousPrefixLookupError(

1234

raise error.AmbiguousPrefixLookupError(

1226

id, self.indexfile, _('ambiguous identifier'))

1235

id, self.indexfile, _('ambiguous identifier'))

1227

if maybewdir:

1236

if maybewdir:

1228

raise error.WdirUnsupported

1237

raise error.WdirUnsupported

1229

return None

1238

return None

1230

except TypeError:

1239

except TypeError:

1231

pass

1240

pass

1232

1241

1233

def lookup(self, id):

1242

def lookup(self, id):

1234

"""locate a node based on:

1243

"""locate a node based on:

1235

- revision number or str(revision number)

1244

- revision number or str(revision number)

1236

- nodeid or subset of hex nodeid

1245

- nodeid or subset of hex nodeid

1237

"""

1246

"""

1238

n = self._match(id)

1247

n = self._match(id)

1239

if n is not None:

1248

if n is not None:

1240

return n

1249

return n

1241

n = self._partialmatch(id)

1250

n = self._partialmatch(id)

1242

if n:

1251

if n:

1243

return n

1252

return n

1244

1253

1245

raise error.LookupError(id, self.indexfile, _('no match found'))

1254

raise error.LookupError(id, self.indexfile, _('no match found'))

1246

1255

1247

def shortest(self, node, minlength=1):

1256

def shortest(self, node, minlength=1):

1248

"""Find the shortest unambiguous prefix that matches node."""

1257

"""Find the shortest unambiguous prefix that matches node."""

1249

def isvalid(prefix):

1258

def isvalid(prefix):

1250

try:

1259

try:

1251

node = self._partialmatch(prefix)

1260

node = self._partialmatch(prefix)

1252

except error.AmbiguousPrefixLookupError:

1261

except error.AmbiguousPrefixLookupError:

1253

return False

1262

return False

1254

except error.WdirUnsupported:

1263

except error.WdirUnsupported:

1255

# single 'ff...' match

1264

# single 'ff...' match

1256

return True

1265

return True

1257

if node is None:

1266

if node is None:

1258

raise error.LookupError(node, self.indexfile, _('no node'))

1267

raise error.LookupError(node, self.indexfile, _('no node'))

1259

return True

1268

return True

1260

1269

1261

def maybewdir(prefix):

1270

def maybewdir(prefix):

1262

return all(c == 'f' for c in prefix)

1271

return all(c == 'f' for c in prefix)

1263

1272

1264

hexnode = hex(node)

1273

hexnode = hex(node)

1265

1274

1266

def disambiguate(hexnode, minlength):

1275

def disambiguate(hexnode, minlength):

1267

"""Disambiguate against wdirid."""

1276

"""Disambiguate against wdirid."""

1268

for length in range(minlength, 41):

1277

for length in range(minlength, 41):

1269

prefix = hexnode[:length]

1278

prefix = hexnode[:length]

1270

if not maybewdir(prefix):

1279

if not maybewdir(prefix):

1271

return prefix

1280

return prefix

1272

1281

1273

if not getattr(self, 'filteredrevs', None):

1282

if not getattr(self, 'filteredrevs', None):

1274

try:

1283

try:

1275

length = max(self.index.shortest(node), minlength)

1284

length = max(self.index.shortest(node), minlength)

1276

return disambiguate(hexnode, length)

1285

return disambiguate(hexnode, length)

1277

except error.RevlogError:

1286

except error.RevlogError:

1278

if node != wdirid:

1287

if node != wdirid:

1279

raise error.LookupError(node, self.indexfile, _('no node'))

1288

raise error.LookupError(node, self.indexfile, _('no node'))

1280

except AttributeError:

1289

except AttributeError:

1281

# Fall through to pure code

1290

# Fall through to pure code

1282

pass

1291

pass

1283

1292

1284

if node == wdirid:

1293

if node == wdirid:

1285

for length in range(minlength, 41):

1294

for length in range(minlength, 41):

1286

prefix = hexnode[:length]

1295

prefix = hexnode[:length]

1287

if isvalid(prefix):

1296

if isvalid(prefix):

1288

return prefix

1297

return prefix

1289

1298

1290

for length in range(minlength, 41):

1299

for length in range(minlength, 41):

1291

prefix = hexnode[:length]

1300

prefix = hexnode[:length]

1292

if isvalid(prefix):

1301

if isvalid(prefix):

1293

return disambiguate(hexnode, length)

1302

return disambiguate(hexnode, length)

1294

1303

1295

def cmp(self, node, text):

1304

def cmp(self, node, text):

1296

"""compare text with a given file revision

1305

"""compare text with a given file revision

1297

1306

1298

returns True if text is different than what is stored.

1307

returns True if text is different than what is stored.

1299

"""

1308

"""

1300

p1, p2 = self.parents(node)

1309

p1, p2 = self.parents(node)

1301

return storageutil.hashrevisionsha1(text, p1, p2) != node

1310

return storageutil.hashrevisionsha1(text, p1, p2) != node

1302

1311

1303

def _cachesegment(self, offset, data):

1312

def _cachesegment(self, offset, data):

1304

"""Add a segment to the revlog cache.

1313

"""Add a segment to the revlog cache.

1305

1314

1306

Accepts an absolute offset and the data that is at that location.

1315

Accepts an absolute offset and the data that is at that location.

1307

"""

1316

"""

1308

o, d = self._chunkcache

1317

o, d = self._chunkcache

1309

# try to add to existing cache

1318

# try to add to existing cache

1310

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1319

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1311

self._chunkcache = o, d + data

1320

self._chunkcache = o, d + data

1312

else:

1321

else:

1313

self._chunkcache = offset, data

1322

self._chunkcache = offset, data

1314

1323

1315

def _readsegment(self, offset, length, df=None):

1324

def _readsegment(self, offset, length, df=None):

1316

"""Load a segment of raw data from the revlog.

1325

"""Load a segment of raw data from the revlog.

1317

1326

1318

Accepts an absolute offset, length to read, and an optional existing

1327

Accepts an absolute offset, length to read, and an optional existing

1319

file handle to read from.

1328

file handle to read from.

1320

1329

1321

If an existing file handle is passed, it will be seeked and the

1330

If an existing file handle is passed, it will be seeked and the

1322

original seek position will NOT be restored.

1331

original seek position will NOT be restored.

1323

1332

1324

Returns a str or buffer of raw byte data.

1333

Returns a str or buffer of raw byte data.

1325

"""

1334

"""

1326

# Cache data both forward and backward around the requested

1335

# Cache data both forward and backward around the requested

1327

# data, in a fixed size window. This helps speed up operations

1336

# data, in a fixed size window. This helps speed up operations

1328

# involving reading the revlog backwards.

1337

# involving reading the revlog backwards.

1329

cachesize = self._chunkcachesize

1338

cachesize = self._chunkcachesize

1330

realoffset = offset & ~(cachesize - 1)

1339

realoffset = offset & ~(cachesize - 1)

1331

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1340

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1332

- realoffset)

1341

- realoffset)

1333

with self._datareadfp(df) as df:

1342

with self._datareadfp(df) as df:

1334

df.seek(realoffset)

1343

df.seek(realoffset)

1335

d = df.read(reallength)

1344

d = df.read(reallength)

1336

self._cachesegment(realoffset, d)

1345

self._cachesegment(realoffset, d)

1337

if offset != realoffset or reallength != length:

1346

if offset != realoffset or reallength != length:

1338

return util.buffer(d, offset - realoffset, length)

1347

return util.buffer(d, offset - realoffset, length)

1339

return d

1348

return d

1340

1349

1341

def _getsegment(self, offset, length, df=None):

1350

def _getsegment(self, offset, length, df=None):

1342

"""Obtain a segment of raw data from the revlog.

1351

"""Obtain a segment of raw data from the revlog.

1343

1352

1344

Accepts an absolute offset, length of bytes to obtain, and an

1353

Accepts an absolute offset, length of bytes to obtain, and an

1345

optional file handle to the already-opened revlog. If the file

1354

optional file handle to the already-opened revlog. If the file

1346

handle is used, it's original seek position will not be preserved.

1355

handle is used, it's original seek position will not be preserved.

1347

1356

1348

Requests for data may be returned from a cache.

1357

Requests for data may be returned from a cache.

1349

1358

1350

Returns a str or a buffer instance of raw byte data.

1359

Returns a str or a buffer instance of raw byte data.

1351

"""

1360

"""

1352

o, d = self._chunkcache

1361

o, d = self._chunkcache

1353

l = len(d)

1362

l = len(d)

1354

1363

1355

# is it in the cache?

1364

# is it in the cache?

1356

cachestart = offset - o

1365

cachestart = offset - o

1357

cacheend = cachestart + length

1366

cacheend = cachestart + length

1358

if cachestart >= 0 and cacheend <= l:

1367

if cachestart >= 0 and cacheend <= l:

1359

if cachestart == 0 and cacheend == l:

1368

if cachestart == 0 and cacheend == l:

1360

return d # avoid a copy

1369

return d # avoid a copy

1361

return util.buffer(d, cachestart, cacheend - cachestart)

1370

return util.buffer(d, cachestart, cacheend - cachestart)

1362

1371

1363

return self._readsegment(offset, length, df=df)

1372

return self._readsegment(offset, length, df=df)

1364

1373

1365

def _getsegmentforrevs(self, startrev, endrev, df=None):

1374

def _getsegmentforrevs(self, startrev, endrev, df=None):

1366

"""Obtain a segment of raw data corresponding to a range of revisions.

1375

"""Obtain a segment of raw data corresponding to a range of revisions.

1367

1376

1368

Accepts the start and end revisions and an optional already-open

1377

Accepts the start and end revisions and an optional already-open

1369

file handle to be used for reading. If the file handle is read, its

1378

file handle to be used for reading. If the file handle is read, its

1370

seek position will not be preserved.

1379

seek position will not be preserved.

1371

1380

1372

Requests for data may be satisfied by a cache.

1381

Requests for data may be satisfied by a cache.

1373

1382

1374

Returns a 2-tuple of (offset, data) for the requested range of

1383

Returns a 2-tuple of (offset, data) for the requested range of

1375

revisions. Offset is the integer offset from the beginning of the

1384

revisions. Offset is the integer offset from the beginning of the

1376

revlog and data is a str or buffer of the raw byte data.

1385

revlog and data is a str or buffer of the raw byte data.

1377

1386

1378

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1387

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1379

to determine where each revision's data begins and ends.

1388

to determine where each revision's data begins and ends.

1380

"""

1389

"""

1381

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1390

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1382

# (functions are expensive).

1391

# (functions are expensive).

1383

index = self.index

1392

index = self.index

1384

istart = index[startrev]

1393

istart = index[startrev]

1385

start = int(istart[0] >> 16)

1394

start = int(istart[0] >> 16)

1386

if startrev == endrev:

1395

if startrev == endrev:

1387

end = start + istart[1]

1396

end = start + istart[1]

1388

else:

1397

else:

1389

iend = index[endrev]

1398

iend = index[endrev]

1390

end = int(iend[0] >> 16) + iend[1]

1399

end = int(iend[0] >> 16) + iend[1]

1391

1400

1392

if self._inline:

1401

if self._inline:

1393

start += (startrev + 1) * self._io.size

1402

start += (startrev + 1) * self._io.size

1394

end += (endrev + 1) * self._io.size

1403

end += (endrev + 1) * self._io.size

1395

length = end - start

1404

length = end - start

1396

1405

1397

return start, self._getsegment(start, length, df=df)

1406

return start, self._getsegment(start, length, df=df)

1398

1407

1399

def _chunk(self, rev, df=None):

1408

def _chunk(self, rev, df=None):

1400

"""Obtain a single decompressed chunk for a revision.

1409

"""Obtain a single decompressed chunk for a revision.

1401

1410

1402

Accepts an integer revision and an optional already-open file handle

1411

Accepts an integer revision and an optional already-open file handle

1403

to be used for reading. If used, the seek position of the file will not

1412

to be used for reading. If used, the seek position of the file will not

1404

be preserved.

1413

be preserved.

1405

1414

1406

Returns a str holding uncompressed data for the requested revision.

1415

Returns a str holding uncompressed data for the requested revision.

1407

"""

1416

"""

1408

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1417

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1409

1418

1410

def _chunks(self, revs, df=None, targetsize=None):

1419

def _chunks(self, revs, df=None, targetsize=None):

1411

"""Obtain decompressed chunks for the specified revisions.

1420

"""Obtain decompressed chunks for the specified revisions.

1412

1421

1413

Accepts an iterable of numeric revisions that are assumed to be in

1422

Accepts an iterable of numeric revisions that are assumed to be in

1414

ascending order. Also accepts an optional already-open file handle

1423

ascending order. Also accepts an optional already-open file handle

1415

to be used for reading. If used, the seek position of the file will

1424

to be used for reading. If used, the seek position of the file will

1416

not be preserved.

1425

not be preserved.

1417

1426

1418

This function is similar to calling ``self._chunk()`` multiple times,

1427

This function is similar to calling ``self._chunk()`` multiple times,

1419

but is faster.

1428

but is faster.

1420

1429

1421

Returns a list with decompressed data for each requested revision.

1430

Returns a list with decompressed data for each requested revision.

1422

"""

1431

"""

1423

if not revs:

1432

if not revs:

1424

return []

1433

return []

1425

start = self.start

1434

start = self.start

1426

length = self.length

1435

length = self.length

1427

inline = self._inline

1436

inline = self._inline

1428

iosize = self._io.size

1437

iosize = self._io.size

1429

buffer = util.buffer

1438

buffer = util.buffer

1430

1439

1431

l = []

1440

l = []

1432

ladd = l.append

1441

ladd = l.append

1433

1442

1434

if not self._withsparseread:

1443

if not self._withsparseread:

1435

slicedchunks = (revs,)

1444

slicedchunks = (revs,)

1436

else:

1445

else:

1437

slicedchunks = deltautil.slicechunk(self, revs,

1446

slicedchunks = deltautil.slicechunk(self, revs,

1438

targetsize=targetsize)

1447

targetsize=targetsize)

1439

1448

1440

for revschunk in slicedchunks:

1449

for revschunk in slicedchunks:

1441

firstrev = revschunk[0]

1450

firstrev = revschunk[0]

1442

# Skip trailing revisions with empty diff

1451

# Skip trailing revisions with empty diff

1443

for lastrev in revschunk[::-1]:

1452

for lastrev in revschunk[::-1]:

1444

if length(lastrev) != 0:

1453

if length(lastrev) != 0:

1445

break

1454

break

1446

1455

1447

try:

1456

try:

1448

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1457

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1449

except OverflowError:

1458

except OverflowError:

1450

# issue4215 - we can't cache a run of chunks greater than

1459

# issue4215 - we can't cache a run of chunks greater than

1451

# 2G on Windows

1460

# 2G on Windows

1452

return [self._chunk(rev, df=df) for rev in revschunk]

1461

return [self._chunk(rev, df=df) for rev in revschunk]

1453

1462

1454

decomp = self.decompress

1463

decomp = self.decompress

1455

for rev in revschunk:

1464

for rev in revschunk:

1456

chunkstart = start(rev)

1465

chunkstart = start(rev)

1457

if inline:

1466

if inline:

1458

chunkstart += (rev + 1) * iosize

1467

chunkstart += (rev + 1) * iosize

1459

chunklength = length(rev)

1468

chunklength = length(rev)

1460

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1469

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1461

1470

1462

return l

1471

return l

1463

1472

1464

def _chunkclear(self):

1473

def _chunkclear(self):

1465

"""Clear the raw chunk cache."""

1474

"""Clear the raw chunk cache."""

1466

self._chunkcache = (0, '')

1475

self._chunkcache = (0, '')

1467

1476

1468

def deltaparent(self, rev):

1477

def deltaparent(self, rev):

1469

"""return deltaparent of the given revision"""

1478

"""return deltaparent of the given revision"""

1470

base = self.index[rev][3]

1479

base = self.index[rev][3]

1471

if base == rev:

1480

if base == rev:

1472

return nullrev

1481

return nullrev

1473

elif self._generaldelta:

1482

elif self._generaldelta:

1474

return base

1483

return base

1475

else:

1484

else:

1476

return rev - 1

1485

return rev - 1

1477

1486

1478

def issnapshot(self, rev):

1487

def issnapshot(self, rev):

1479

"""tells whether rev is a snapshot

1488

"""tells whether rev is a snapshot

1480

"""

1489

"""

1481

if rev == nullrev:

1490

if rev == nullrev:

1482

return True

1491

return True

1483

deltap = self.deltaparent(rev)

1492

deltap = self.deltaparent(rev)

1484

if deltap == nullrev:

1493

if deltap == nullrev:

1485

return True

1494

return True

1486

p1, p2 = self.parentrevs(rev)

1495

p1, p2 = self.parentrevs(rev)

1487

if deltap in (p1, p2):

1496

if deltap in (p1, p2):

1488

return False

1497

return False

1489

return self.issnapshot(deltap)

1498

return self.issnapshot(deltap)

1490

1499

1491

def snapshotdepth(self, rev):

1500

def snapshotdepth(self, rev):

1492

"""number of snapshot in the chain before this one"""

1501

"""number of snapshot in the chain before this one"""

1493

if not self.issnapshot(rev):

1502

if not self.issnapshot(rev):

1494

raise error.ProgrammingError('revision %d not a snapshot')

1503

raise error.ProgrammingError('revision %d not a snapshot')

1495

return len(self._deltachain(rev)[0]) - 1

1504

return len(self._deltachain(rev)[0]) - 1

1496

1505

1497

def revdiff(self, rev1, rev2):

1506

def revdiff(self, rev1, rev2):

1498

"""return or calculate a delta between two revisions

1507

"""return or calculate a delta between two revisions

1499

1508

1500

The delta calculated is in binary form and is intended to be written to

1509

The delta calculated is in binary form and is intended to be written to

1501

revlog data directly. So this function needs raw revision data.

1510

revlog data directly. So this function needs raw revision data.

1502

"""

1511

"""

1503

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1512

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1504

return bytes(self._chunk(rev2))

1513

return bytes(self._chunk(rev2))

1505

1514

1506

return mdiff.textdiff(self.revision(rev1, raw=True),

1515

return mdiff.textdiff(self.revision(rev1, raw=True),

1507

self.revision(rev2, raw=True))

1516

self.revision(rev2, raw=True))

1508

1517

1509

def revision(self, nodeorrev, _df=None, raw=False):

1518

def revision(self, nodeorrev, _df=None, raw=False):

1510

"""return an uncompressed revision of a given node or revision

1519

"""return an uncompressed revision of a given node or revision

1511

number.

1520

number.

1512

1521

1513

_df - an existing file handle to read from. (internal-only)

1522

_df - an existing file handle to read from. (internal-only)

1514

raw - an optional argument specifying if the revision data is to be

1523

raw - an optional argument specifying if the revision data is to be

1515

treated as raw data when applying flag transforms. 'raw' should be set

1524

treated as raw data when applying flag transforms. 'raw' should be set

1516

to True when generating changegroups or in debug commands.

1525

to True when generating changegroups or in debug commands.

1517

"""

1526

"""

1518

if isinstance(nodeorrev, int):

1527

if isinstance(nodeorrev, int):

1519

rev = nodeorrev

1528

rev = nodeorrev

1520

node = self.node(rev)

1529

node = self.node(rev)

1521

else:

1530

else:

1522

node = nodeorrev

1531

node = nodeorrev

1523

rev = None

1532

rev = None

1524

1533

1525

cachedrev = None

1534

cachedrev = None

1526

flags = None

1535

flags = None

1527

rawtext = None

1536

rawtext = None

1528

if node == nullid:

1537

if node == nullid:

1529

return ""

1538

return ""

1530

if self._revisioncache:

1539

if self._revisioncache:

1531

if self._revisioncache[0] == node:

1540

if self._revisioncache[0] == node:

1532

# _cache only stores rawtext

1541

# _cache only stores rawtext

1533

if raw:

1542

if raw:

1534

return self._revisioncache[2]

1543

return self._revisioncache[2]

1535

# duplicated, but good for perf

1544

# duplicated, but good for perf

1536

if rev is None:

1545

if rev is None:

1537

rev = self.rev(node)

1546

rev = self.rev(node)

1538

if flags is None:

1547

if flags is None:

1539

flags = self.flags(rev)

1548

flags = self.flags(rev)

1540

# no extra flags set, no flag processor runs, text = rawtext

1549

# no extra flags set, no flag processor runs, text = rawtext

1541

if flags == REVIDX_DEFAULT_FLAGS:

1550

if flags == REVIDX_DEFAULT_FLAGS:

1542

return self._revisioncache[2]

1551

return self._revisioncache[2]

1543

# rawtext is reusable. need to run flag processor

1552

# rawtext is reusable. need to run flag processor

1544

rawtext = self._revisioncache[2]

1553

rawtext = self._revisioncache[2]

1545

1554

1546

cachedrev = self._revisioncache[1]

1555

cachedrev = self._revisioncache[1]

1547

1556

1548

# look up what we need to read

1557

# look up what we need to read

1549

if rawtext is None:

1558

if rawtext is None:

1550

if rev is None:

1559

if rev is None:

1551

rev = self.rev(node)

1560

rev = self.rev(node)

1552

1561

1553

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1562

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1554

if stopped:

1563

if stopped:

1555

rawtext = self._revisioncache[2]

1564

rawtext = self._revisioncache[2]

1556

1565

1557

# drop cache to save memory

1566

# drop cache to save memory

1558

self._revisioncache = None

1567

self._revisioncache = None

1559

1568

1560

targetsize = None

1569

targetsize = None

1561

rawsize = self.index[rev][2]

1570

rawsize = self.index[rev][2]

1562

if 0 <= rawsize:

1571

if 0 <= rawsize:

1563

targetsize = 4 * rawsize

1572

targetsize = 4 * rawsize

1564

1573

1565

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1574

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1566

if rawtext is None:

1575

if rawtext is None:

1567

rawtext = bytes(bins[0])

1576

rawtext = bytes(bins[0])

1568

bins = bins[1:]

1577

bins = bins[1:]

1569

1578

1570

rawtext = mdiff.patches(rawtext, bins)

1579

rawtext = mdiff.patches(rawtext, bins)

1571

self._revisioncache = (node, rev, rawtext)

1580

self._revisioncache = (node, rev, rawtext)

1572

1581

1573

if flags is None:

1582

if flags is None:

1574

if rev is None:

1583

if rev is None:

1575

rev = self.rev(node)

1584

rev = self.rev(node)

1576

flags = self.flags(rev)

1585

flags = self.flags(rev)

1577

1586

1578

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1587

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1579

if validatehash:

1588

if validatehash:

1580

self.checkhash(text, node, rev=rev)

1589

self.checkhash(text, node, rev=rev)

1581

1590

1582

return text

1591

return text

1583

1592

1584

def hash(self, text, p1, p2):

1593

def hash(self, text, p1, p2):

1585

"""Compute a node hash.

1594

"""Compute a node hash.

1586

1595

1587

Available as a function so that subclasses can replace the hash

1596

Available as a function so that subclasses can replace the hash

1588

as needed.

1597

as needed.

1589

"""

1598

"""

1590

return storageutil.hashrevisionsha1(text, p1, p2)

1599

return storageutil.hashrevisionsha1(text, p1, p2)

1591

1600

1592

def _processflags(self, text, flags, operation, raw=False):

1601

def _processflags(self, text, flags, operation, raw=False):

1593

"""Inspect revision data flags and applies transforms defined by

1602

"""Inspect revision data flags and applies transforms defined by

1594

registered flag processors.

1603

registered flag processors.

1595

1604

1596

``text`` - the revision data to process

1605

``text`` - the revision data to process

1597

``flags`` - the revision flags

1606

``flags`` - the revision flags

1598

``operation`` - the operation being performed (read or write)

1607

``operation`` - the operation being performed (read or write)

1599

``raw`` - an optional argument describing if the raw transform should be

1608

``raw`` - an optional argument describing if the raw transform should be

1600

applied.

1609

applied.

1601

1610

1602

This method processes the flags in the order (or reverse order if

1611

This method processes the flags in the order (or reverse order if

1603

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1612

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1604

flag processors registered for present flags. The order of flags defined

1613

flag processors registered for present flags. The order of flags defined

1605

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1614

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1606

1615

1607

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1616

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1608

processed text and ``validatehash`` is a bool indicating whether the

1617

processed text and ``validatehash`` is a bool indicating whether the

1609

returned text should be checked for hash integrity.

1618

returned text should be checked for hash integrity.

1610

1619

1611

Note: If the ``raw`` argument is set, it has precedence over the

1620

Note: If the ``raw`` argument is set, it has precedence over the

1612

operation and will only update the value of ``validatehash``.

1621

operation and will only update the value of ``validatehash``.

1613

"""

1622

"""

1614

# fast path: no flag processors will run

1623

# fast path: no flag processors will run

1615

if flags == 0:

1624

if flags == 0:

1616

return text, True

1625

return text, True

1617

if not operation in ('read', 'write'):

1626

if not operation in ('read', 'write'):

1618

raise error.ProgrammingError(_("invalid '%s' operation") %

1627

raise error.ProgrammingError(_("invalid '%s' operation") %

1619

operation)

1628

operation)

1620

# Check all flags are known.

1629

# Check all flags are known.

1621

if flags & ~REVIDX_KNOWN_FLAGS:

1630

if flags & ~REVIDX_KNOWN_FLAGS:

1622

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1631

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1623

(flags & ~REVIDX_KNOWN_FLAGS))

1632

(flags & ~REVIDX_KNOWN_FLAGS))

1624

validatehash = True

1633

validatehash = True

1625

# Depending on the operation (read or write), the order might be

1634

# Depending on the operation (read or write), the order might be

1626

# reversed due to non-commutative transforms.

1635

# reversed due to non-commutative transforms.

1627

orderedflags = REVIDX_FLAGS_ORDER

1636

orderedflags = REVIDX_FLAGS_ORDER

1628

if operation == 'write':

1637

if operation == 'write':

1629

orderedflags = reversed(orderedflags)

1638

orderedflags = reversed(orderedflags)

1630

1639

1631

for flag in orderedflags:

1640

for flag in orderedflags:

1632

# If a flagprocessor has been registered for a known flag, apply the

1641

# If a flagprocessor has been registered for a known flag, apply the

1633

# related operation transform and update result tuple.

1642

# related operation transform and update result tuple.

1634

if flag & flags:

1643

if flag & flags:

1635

vhash = True

1644

vhash = True

1636

1645

1637

if flag not in self._flagprocessors:

1646

if flag not in self._flagprocessors:

1638

message = _("missing processor for flag '%#x'") % (flag)

1647

message = _("missing processor for flag '%#x'") % (flag)

1639

raise error.RevlogError(message)

1648

raise error.RevlogError(message)

1640

1649

1641

processor = self._flagprocessors[flag]

1650

processor = self._flagprocessors[flag]

1642

if processor is not None:

1651

if processor is not None:

1643

readtransform, writetransform, rawtransform = processor

1652

readtransform, writetransform, rawtransform = processor

1644

1653

1645

if raw:

1654

if raw:

1646

vhash = rawtransform(self, text)

1655

vhash = rawtransform(self, text)

1647

elif operation == 'read':

1656

elif operation == 'read':

1648

text, vhash = readtransform(self, text)

1657

text, vhash = readtransform(self, text)

1649

else: # write operation

1658

else: # write operation

1650

text, vhash = writetransform(self, text)

1659

text, vhash = writetransform(self, text)

1651

validatehash = validatehash and vhash

1660

validatehash = validatehash and vhash

1652

1661

1653

return text, validatehash

1662

return text, validatehash

1654

1663

1655

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1664

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1656

"""Check node hash integrity.

1665

"""Check node hash integrity.

1657

1666

1658

Available as a function so that subclasses can extend hash mismatch

1667

Available as a function so that subclasses can extend hash mismatch

1659

behaviors as needed.

1668

behaviors as needed.

1660

"""

1669

"""

1661

try:

1670

try:

1662

if p1 is None and p2 is None:

1671

if p1 is None and p2 is None:

1663

p1, p2 = self.parents(node)

1672

p1, p2 = self.parents(node)

1664

if node != self.hash(text, p1, p2):

1673

if node != self.hash(text, p1, p2):

1665

# Clear the revision cache on hash failure. The revision cache

1674

# Clear the revision cache on hash failure. The revision cache

1666

# only stores the raw revision and clearing the cache does have

1675

# only stores the raw revision and clearing the cache does have

1667

# the side-effect that we won't have a cache hit when the raw

1676

# the side-effect that we won't have a cache hit when the raw

1668

# revision data is accessed. But this case should be rare and

1677

# revision data is accessed. But this case should be rare and

1669

# it is extra work to teach the cache about the hash

1678

# it is extra work to teach the cache about the hash

1670

# verification state.

1679

# verification state.

1671

if self._revisioncache and self._revisioncache[0] == node:

1680

if self._revisioncache and self._revisioncache[0] == node:

1672

self._revisioncache = None

1681

self._revisioncache = None

1673

1682

1674

revornode = rev

1683

revornode = rev

1675

if revornode is None:

1684

if revornode is None:

1676

revornode = templatefilters.short(hex(node))

1685

revornode = templatefilters.short(hex(node))

1677

raise error.RevlogError(_("integrity check failed on %s:%s")

1686

raise error.RevlogError(_("integrity check failed on %s:%s")

1678

% (self.indexfile, pycompat.bytestr(revornode)))

1687

% (self.indexfile, pycompat.bytestr(revornode)))

1679

except error.RevlogError:

1688

except error.RevlogError:

1680

if self._censorable and storageutil.iscensoredtext(text):

1689

if self._censorable and storageutil.iscensoredtext(text):

1681

raise error.CensoredNodeError(self.indexfile, node, text)

1690

raise error.CensoredNodeError(self.indexfile, node, text)

1682

raise

1691

raise

1683

1692

1684

def _enforceinlinesize(self, tr, fp=None):

1693

def _enforceinlinesize(self, tr, fp=None):

1685

"""Check if the revlog is too big for inline and convert if so.

1694

"""Check if the revlog is too big for inline and convert if so.

1686

1695

1687

This should be called after revisions are added to the revlog. If the

1696

This should be called after revisions are added to the revlog. If the

1688

revlog has grown too large to be an inline revlog, it will convert it

1697

revlog has grown too large to be an inline revlog, it will convert it

1689

to use multiple index and data files.

1698

to use multiple index and data files.

1690

"""

1699

"""

1691

tiprev = len(self) - 1

1700

tiprev = len(self) - 1

1692

if (not self._inline or

1701

if (not self._inline or

1693

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1702

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1694

return

1703

return

1695

1704

1696

trinfo = tr.find(self.indexfile)

1705

trinfo = tr.find(self.indexfile)

1697

if trinfo is None:

1706

if trinfo is None:

1698

raise error.RevlogError(_("%s not found in the transaction")

1707

raise error.RevlogError(_("%s not found in the transaction")

1699

% self.indexfile)

1708

% self.indexfile)

1700

1709

1701

trindex = trinfo[2]

1710

trindex = trinfo[2]

1702

if trindex is not None:

1711

if trindex is not None:

1703

dataoff = self.start(trindex)

1712

dataoff = self.start(trindex)

1704

else:

1713

else:

1705

# revlog was stripped at start of transaction, use all leftover data

1714

# revlog was stripped at start of transaction, use all leftover data

1706

trindex = len(self) - 1

1715

trindex = len(self) - 1

1707

dataoff = self.end(tiprev)

1716

dataoff = self.end(tiprev)

1708

1717

1709

tr.add(self.datafile, dataoff)

1718

tr.add(self.datafile, dataoff)

1710

1719

1711

if fp:

1720

if fp:

1712

fp.flush()

1721

fp.flush()

1713

fp.close()

1722

fp.close()

1714

1723

1715

with self._datafp('w') as df:

1724

with self._datafp('w') as df:

1716

for r in self:

1725

for r in self:

1717

df.write(self._getsegmentforrevs(r, r)[1])

1726

df.write(self._getsegmentforrevs(r, r)[1])

1718

1727

1719

with self._indexfp('w') as fp:

1728

with self._indexfp('w') as fp:

1720

self.version &= ~FLAG_INLINE_DATA

1729

self.version &= ~FLAG_INLINE_DATA

1721

self._inline = False

1730

self._inline = False

1722

io = self._io

1731

io = self._io

1723

for i in self:

1732

for i in self:

1724

e = io.packentry(self.index[i], self.node, self.version, i)

1733

e = io.packentry(self.index[i], self.node, self.version, i)

1725

fp.write(e)

1734

fp.write(e)

1726

1735

1727

# the temp file replace the real index when we exit the context

1736

# the temp file replace the real index when we exit the context

1728

# manager

1737

# manager

1729

1738

1730

tr.replace(self.indexfile, trindex * self._io.size)

1739

tr.replace(self.indexfile, trindex * self._io.size)

1731

self._chunkclear()

1740

self._chunkclear()

1732

1741

1733

def _nodeduplicatecallback(self, transaction, node):

1742

def _nodeduplicatecallback(self, transaction, node):

1734

"""called when trying to add a node already stored.

1743

"""called when trying to add a node already stored.

1735

"""

1744

"""

1736

1745

1737

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1746

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1738

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1747

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1739

"""add a revision to the log

1748

"""add a revision to the log

1740

1749

1741

text - the revision data to add

1750

text - the revision data to add

1742

transaction - the transaction object used for rollback

1751

transaction - the transaction object used for rollback

1743

link - the linkrev data to add

1752

link - the linkrev data to add

1744

p1, p2 - the parent nodeids of the revision

1753

p1, p2 - the parent nodeids of the revision

1745

cachedelta - an optional precomputed delta

1754

cachedelta - an optional precomputed delta

1746

node - nodeid of revision; typically node is not specified, and it is

1755

node - nodeid of revision; typically node is not specified, and it is

1747

computed by default as hash(text, p1, p2), however subclasses might

1756

computed by default as hash(text, p1, p2), however subclasses might

1748

use different hashing method (and override checkhash() in such case)

1757

use different hashing method (and override checkhash() in such case)

1749

flags - the known flags to set on the revision

1758

flags - the known flags to set on the revision

1750

deltacomputer - an optional deltacomputer instance shared between

1759

deltacomputer - an optional deltacomputer instance shared between

1751

multiple calls

1760

multiple calls

1752

"""

1761

"""

1753

if link == nullrev:

1762

if link == nullrev:

1754

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1763

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1755

% self.indexfile)

1764

% self.indexfile)

1756

1765

1757

if flags:

1766

if flags:

1758

node = node or self.hash(text, p1, p2)

1767

node = node or self.hash(text, p1, p2)

1759

1768

1760

rawtext, validatehash = self._processflags(text, flags, 'write')

1769

rawtext, validatehash = self._processflags(text, flags, 'write')

1761

1770

1762

# If the flag processor modifies the revision data, ignore any provided

1771

# If the flag processor modifies the revision data, ignore any provided

1763

# cachedelta.

1772

# cachedelta.

1764

if rawtext != text:

1773

if rawtext != text:

1765

cachedelta = None

1774

cachedelta = None

1766

1775

1767

if len(rawtext) > _maxentrysize:

1776

if len(rawtext) > _maxentrysize:

1768

raise error.RevlogError(

1777

raise error.RevlogError(

1769

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1778

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1770

% (self.indexfile, len(rawtext)))

1779

% (self.indexfile, len(rawtext)))

1771

1780

1772

node = node or self.hash(rawtext, p1, p2)

1781

node = node or self.hash(rawtext, p1, p2)

1773

if node in self.nodemap:

1782

if node in self.nodemap:

1774

return node

1783

return node

1775

1784

1776

if validatehash:

1785

if validatehash:

1777

self.checkhash(rawtext, node, p1=p1, p2=p2)

1786

self.checkhash(rawtext, node, p1=p1, p2=p2)

1778

1787

1779

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1788

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1780

flags, cachedelta=cachedelta,

1789

flags, cachedelta=cachedelta,

1781

deltacomputer=deltacomputer)

1790

deltacomputer=deltacomputer)

1782

1791

1783

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1792

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1784

cachedelta=None, deltacomputer=None):

1793

cachedelta=None, deltacomputer=None):

1785

"""add a raw revision with known flags, node and parents

1794

"""add a raw revision with known flags, node and parents

1786

useful when reusing a revision not stored in this revlog (ex: received

1795

useful when reusing a revision not stored in this revlog (ex: received

1787

over wire, or read from an external bundle).

1796

over wire, or read from an external bundle).

1788

"""

1797

"""

1789

dfh = None

1798

dfh = None

1790

if not self._inline:

1799

if not self._inline:

1791

dfh = self._datafp("a+")

1800

dfh = self._datafp("a+")

1792

ifh = self._indexfp("a+")

1801

ifh = self._indexfp("a+")

1793

try:

1802

try:

1794

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1803

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1795

flags, cachedelta, ifh, dfh,

1804

flags, cachedelta, ifh, dfh,

1796

deltacomputer=deltacomputer)

1805

deltacomputer=deltacomputer)

1797

finally:

1806

finally:

1798

if dfh:

1807

if dfh:

1799

dfh.close()

1808

dfh.close()

1800

ifh.close()

1809

ifh.close()

1801

1810

1802

def compress(self, data):

1811

def compress(self, data):

1803

"""Generate a possibly-compressed representation of data."""

1812

"""Generate a possibly-compressed representation of data."""

1804

if not data:

1813

if not data:

1805

return '', data

1814

return '', data

1806

1815

1807

compressed = self._compressor.compress(data)

1816

compressed = self._compressor.compress(data)

1808

1817

1809

if compressed:

1818

if compressed:

1810

# The revlog compressor added the header in the returned data.

1819

# The revlog compressor added the header in the returned data.

1811

return '', compressed

1820

return '', compressed

1812

1821

1813

if data[0:1] == '\0':

1822

if data[0:1] == '\0':

1814

return '', data

1823

return '', data

1815

return 'u', data

1824

return 'u', data

1816

1825

1817

def decompress(self, data):

1826

def decompress(self, data):

1818

"""Decompress a revlog chunk.

1827

"""Decompress a revlog chunk.

1819

1828

1820

The chunk is expected to begin with a header identifying the

1829

The chunk is expected to begin with a header identifying the

1821

format type so it can be routed to an appropriate decompressor.

1830

format type so it can be routed to an appropriate decompressor.

1822

"""

1831

"""

1823

if not data:

1832

if not data:

1824

return data

1833

return data

1825

1834

1826

# Revlogs are read much more frequently than they are written and many

1835

# Revlogs are read much more frequently than they are written and many

1827

# chunks only take microseconds to decompress, so performance is

1836

# chunks only take microseconds to decompress, so performance is

1828

# important here.

1837

# important here.

1829

#

1838

#

1830

# We can make a few assumptions about revlogs:

1839

# We can make a few assumptions about revlogs:

1831

#

1840

#

1832

# 1) the majority of chunks will be compressed (as opposed to inline

1841

# 1) the majority of chunks will be compressed (as opposed to inline

1833

# raw data).

1842

# raw data).

1834

# 2) decompressing *any* data will likely by at least 10x slower than

1843

# 2) decompressing *any* data will likely by at least 10x slower than

1835

# returning raw inline data.

1844

# returning raw inline data.

1836

# 3) we want to prioritize common and officially supported compression

1845

# 3) we want to prioritize common and officially supported compression

1837

# engines

1846

# engines

1838

#

1847

#

1839

# It follows that we want to optimize for "decompress compressed data

1848

# It follows that we want to optimize for "decompress compressed data

1840

# when encoded with common and officially supported compression engines"

1849

# when encoded with common and officially supported compression engines"

1841

# case over "raw data" and "data encoded by less common or non-official

1850

# case over "raw data" and "data encoded by less common or non-official

1842

# compression engines." That is why we have the inline lookup first

1851

# compression engines." That is why we have the inline lookup first

1843

# followed by the compengines lookup.

1852

# followed by the compengines lookup.

1844

#

1853

#

1845

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1854

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1846

# compressed chunks. And this matters for changelog and manifest reads.

1855

# compressed chunks. And this matters for changelog and manifest reads.

1847

t = data[0:1]

1856

t = data[0:1]

1848

1857

1849

if t == 'x':

1858

if t == 'x':

1850

try:

1859

try:

1851

return _zlibdecompress(data)

1860

return _zlibdecompress(data)

1852

except zlib.error as e:

1861

except zlib.error as e:

1853

raise error.RevlogError(_('revlog decompress error: %s') %

1862

raise error.RevlogError(_('revlog decompress error: %s') %

1854

stringutil.forcebytestr(e))

1863

stringutil.forcebytestr(e))

1855

# '\0' is more common than 'u' so it goes first.

1864

# '\0' is more common than 'u' so it goes first.

1856

elif t == '\0':

1865

elif t == '\0':

1857

return data

1866

return data

1858

elif t == 'u':

1867

elif t == 'u':

1859

return util.buffer(data, 1)

1868

return util.buffer(data, 1)

1860

1869

1861

try:

1870

try:

1862

compressor = self._decompressors[t]

1871

compressor = self._decompressors[t]

1863

except KeyError:

1872

except KeyError:

1864

try:

1873

try:

1865

engine = util.compengines.forrevlogheader(t)

1874

engine = util.compengines.forrevlogheader(t)

1866

compressor = engine.revlogcompressor()

1875

compressor = engine.revlogcompressor()

1867

self._decompressors[t] = compressor

1876

self._decompressors[t] = compressor

1868

except KeyError:

1877

except KeyError:

1869

raise error.RevlogError(_('unknown compression type %r') % t)

1878

raise error.RevlogError(_('unknown compression type %r') % t)

1870

1879

1871

return compressor.decompress(data)

1880

return compressor.decompress(data)

1872

1881

1873

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1882

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1874

cachedelta, ifh, dfh, alwayscache=False,

1883

cachedelta, ifh, dfh, alwayscache=False,

1875

deltacomputer=None):

1884

deltacomputer=None):

1876

"""internal function to add revisions to the log

1885

"""internal function to add revisions to the log

1877

1886

1878

see addrevision for argument descriptions.

1887

see addrevision for argument descriptions.

1879

1888

1880

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1889

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1881

1890

1882

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1891

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1883

be used.

1892

be used.

1884

1893

1885

invariants:

1894

invariants:

1886

- rawtext is optional (can be None); if not set, cachedelta must be set.

1895

- rawtext is optional (can be None); if not set, cachedelta must be set.

1887

if both are set, they must correspond to each other.

1896

if both are set, they must correspond to each other.

1888

"""

1897

"""

1889

if node == nullid:

1898

if node == nullid:

1890

raise error.RevlogError(_("%s: attempt to add null revision") %

1899

raise error.RevlogError(_("%s: attempt to add null revision") %

1891

self.indexfile)

1900

self.indexfile)

1892

if node == wdirid or node in wdirfilenodeids:

1901

if node == wdirid or node in wdirfilenodeids:

1893

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1902

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1894

self.indexfile)

1903

self.indexfile)

1895

1904

1896

if self._inline:

1905

if self._inline:

1897

fh = ifh

1906

fh = ifh

1898

else:

1907

else:

1899

fh = dfh

1908

fh = dfh

1900

1909

1901

btext = [rawtext]

1910

btext = [rawtext]

1902

1911

1903

curr = len(self)

1912

curr = len(self)

1904

prev = curr - 1

1913

prev = curr - 1

1905

offset = self.end(prev)

1914

offset = self.end(prev)

1906

p1r, p2r = self.rev(p1), self.rev(p2)

1915

p1r, p2r = self.rev(p1), self.rev(p2)

1907

1916

1908

# full versions are inserted when the needed deltas

1917

# full versions are inserted when the needed deltas

1909

# become comparable to the uncompressed text

1918

# become comparable to the uncompressed text

1910

if rawtext is None:

1919

if rawtext is None:

1911

# need rawtext size, before changed by flag processors, which is

1920

# need rawtext size, before changed by flag processors, which is

1912

# the non-raw size. use revlog explicitly to avoid filelog's extra

1921

# the non-raw size. use revlog explicitly to avoid filelog's extra

1913

# logic that might remove metadata size.

1922

# logic that might remove metadata size.

1914

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1923

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1915

cachedelta[1])

1924

cachedelta[1])

1916

else:

1925

else:

1917

textlen = len(rawtext)

1926

textlen = len(rawtext)

1918

1927

1919

if deltacomputer is None:

1928

if deltacomputer is None:

1920

deltacomputer = deltautil.deltacomputer(self)

1929

deltacomputer = deltautil.deltacomputer(self)

1921

1930

1922

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

1931

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

1923

1932

1924

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

1933

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

1925

1934

1926

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

1935

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

1927

deltainfo.base, link, p1r, p2r, node)

1936

deltainfo.base, link, p1r, p2r, node)

1928

self.index.append(e)

1937

self.index.append(e)

1929

self.nodemap[node] = curr

1938

self.nodemap[node] = curr

1930

1939

1931

entry = self._io.packentry(e, self.node, self.version, curr)

1940

entry = self._io.packentry(e, self.node, self.version, curr)

1932

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

1941

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

1933

link, offset)

1942

link, offset)

1934

1943

1935

rawtext = btext[0]

1944

rawtext = btext[0]

1936

1945

1937

if alwayscache and rawtext is None:

1946

if alwayscache and rawtext is None:

1938

rawtext = deltacomputer.buildtext(revinfo, fh)

1947

rawtext = deltacomputer.buildtext(revinfo, fh)

1939

1948

1940

if type(rawtext) == bytes: # only accept immutable objects

1949

if type(rawtext) == bytes: # only accept immutable objects

1941

self._revisioncache = (node, curr, rawtext)

1950

self._revisioncache = (node, curr, rawtext)

1942

self._chainbasecache[curr] = deltainfo.chainbase

1951

self._chainbasecache[curr] = deltainfo.chainbase

1943

return node

1952

return node

1944

1953

1945

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1954

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1946

# Files opened in a+ mode have inconsistent behavior on various

1955

# Files opened in a+ mode have inconsistent behavior on various

1947

# platforms. Windows requires that a file positioning call be made

1956

# platforms. Windows requires that a file positioning call be made

1948

# when the file handle transitions between reads and writes. See

1957

# when the file handle transitions between reads and writes. See

1949

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1958

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1950

# platforms, Python or the platform itself can be buggy. Some versions

1959

# platforms, Python or the platform itself can be buggy. Some versions

1951

# of Solaris have been observed to not append at the end of the file

1960

# of Solaris have been observed to not append at the end of the file

1952

# if the file was seeked to before the end. See issue4943 for more.

1961

# if the file was seeked to before the end. See issue4943 for more.

1953

#

1962

#

1954

# We work around this issue by inserting a seek() before writing.

1963

# We work around this issue by inserting a seek() before writing.

1955

# Note: This is likely not necessary on Python 3.

1964

# Note: This is likely not necessary on Python 3.

1956

ifh.seek(0, os.SEEK_END)

1965

ifh.seek(0, os.SEEK_END)

1957

if dfh:

1966

if dfh:

1958

dfh.seek(0, os.SEEK_END)

1967

dfh.seek(0, os.SEEK_END)

1959

1968

1960

curr = len(self) - 1

1969

curr = len(self) - 1

1961

if not self._inline:

1970

if not self._inline:

1962

transaction.add(self.datafile, offset)

1971

transaction.add(self.datafile, offset)

1963

transaction.add(self.indexfile, curr * len(entry))

1972

transaction.add(self.indexfile, curr * len(entry))

1964

if data[0]:

1973

if data[0]:

1965

dfh.write(data[0])

1974

dfh.write(data[0])

1966

dfh.write(data[1])

1975

dfh.write(data[1])

1967

ifh.write(entry)

1976

ifh.write(entry)

1968

else:

1977

else:

1969

offset += curr * self._io.size

1978

offset += curr * self._io.size

1970

transaction.add(self.indexfile, offset, curr)

1979

transaction.add(self.indexfile, offset, curr)

1971

ifh.write(entry)

1980

ifh.write(entry)

1972

ifh.write(data[0])

1981

ifh.write(data[0])

1973

ifh.write(data[1])

1982

ifh.write(data[1])

1974

self._enforceinlinesize(transaction, ifh)

1983

self._enforceinlinesize(transaction, ifh)

1975

1984

1976

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

1985

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

1977

"""

1986

"""

1978

add a delta group

1987

add a delta group

1979

1988

1980

given a set of deltas, add them to the revision log. the

1989

given a set of deltas, add them to the revision log. the

1981

first delta is against its parent, which should be in our

1990

first delta is against its parent, which should be in our

1982

log, the rest are against the previous delta.

1991

log, the rest are against the previous delta.

1983

1992

1984

If ``addrevisioncb`` is defined, it will be called with arguments of

1993

If ``addrevisioncb`` is defined, it will be called with arguments of

1985

this revlog and the node that was added.

1994

this revlog and the node that was added.

1986

"""

1995

"""

1987

1996

1988

nodes = []

1997

nodes = []

1989

1998

1990

r = len(self)

1999

r = len(self)

1991

end = 0

2000

end = 0

1992

if r:

2001

if r:

1993

end = self.end(r - 1)

2002

end = self.end(r - 1)

1994

ifh = self._indexfp("a+")

2003

ifh = self._indexfp("a+")

1995

isize = r * self._io.size

2004

isize = r * self._io.size

1996

if self._inline:

2005

if self._inline:

1997

transaction.add(self.indexfile, end + isize, r)

2006

transaction.add(self.indexfile, end + isize, r)

1998

dfh = None

2007

dfh = None

1999

else:

2008

else:

2000

transaction.add(self.indexfile, isize, r)

2009

transaction.add(self.indexfile, isize, r)

2001

transaction.add(self.datafile, end)

2010

transaction.add(self.datafile, end)

2002

dfh = self._datafp("a+")

2011

dfh = self._datafp("a+")

2003

def flush():

2012

def flush():

2004

if dfh:

2013

if dfh:

2005

dfh.flush()

2014

dfh.flush()

2006

ifh.flush()

2015

ifh.flush()

2007

try:

2016

try:

2008

deltacomputer = deltautil.deltacomputer(self)

2017

deltacomputer = deltautil.deltacomputer(self)

2009

# loop through our set of deltas

2018

# loop through our set of deltas

2010

for data in deltas:

2019

for data in deltas:

2011

node, p1, p2, linknode, deltabase, delta, flags = data

2020

node, p1, p2, linknode, deltabase, delta, flags = data

2012

link = linkmapper(linknode)

2021

link = linkmapper(linknode)

2013

flags = flags or REVIDX_DEFAULT_FLAGS

2022

flags = flags or REVIDX_DEFAULT_FLAGS

2014

2023

2015

nodes.append(node)

2024

nodes.append(node)

2016

2025

2017

if node in self.nodemap:

2026

if node in self.nodemap:

2018

self._nodeduplicatecallback(transaction, node)

2027

self._nodeduplicatecallback(transaction, node)

2019

# this can happen if two branches make the same change

2028

# this can happen if two branches make the same change

2020

continue

2029

continue

2021

2030

2022

for p in (p1, p2):

2031

for p in (p1, p2):

2023

if p not in self.nodemap:

2032

if p not in self.nodemap:

2024

raise error.LookupError(p, self.indexfile,

2033

raise error.LookupError(p, self.indexfile,

2025

_('unknown parent'))

2034

_('unknown parent'))

2026

2035

2027

if deltabase not in self.nodemap:

2036

if deltabase not in self.nodemap:

2028

raise error.LookupError(deltabase, self.indexfile,

2037

raise error.LookupError(deltabase, self.indexfile,

2029

_('unknown delta base'))

2038

_('unknown delta base'))

2030

2039

2031

baserev = self.rev(deltabase)

2040

baserev = self.rev(deltabase)

2032

2041

2033

if baserev != nullrev and self.iscensored(baserev):

2042

if baserev != nullrev and self.iscensored(baserev):

2034

# if base is censored, delta must be full replacement in a

2043

# if base is censored, delta must be full replacement in a

2035

# single patch operation

2044

# single patch operation

2036

hlen = struct.calcsize(">lll")

2045

hlen = struct.calcsize(">lll")

2037

oldlen = self.rawsize(baserev)

2046

oldlen = self.rawsize(baserev)

2038

newlen = len(delta) - hlen

2047

newlen = len(delta) - hlen

2039

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2048

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2040

raise error.CensoredBaseError(self.indexfile,

2049

raise error.CensoredBaseError(self.indexfile,

2041

self.node(baserev))

2050

self.node(baserev))

2042

2051

2043

if not flags and self._peek_iscensored(baserev, delta, flush):

2052

if not flags and self._peek_iscensored(baserev, delta, flush):

2044

flags |= REVIDX_ISCENSORED

2053

flags |= REVIDX_ISCENSORED

2045

2054

2046

# We assume consumers of addrevisioncb will want to retrieve

2055

# We assume consumers of addrevisioncb will want to retrieve

2047

# the added revision, which will require a call to

2056

# the added revision, which will require a call to

2048

# revision(). revision() will fast path if there is a cache

2057

# revision(). revision() will fast path if there is a cache

2049

# hit. So, we tell _addrevision() to always cache in this case.

2058

# hit. So, we tell _addrevision() to always cache in this case.

2050

# We're only using addgroup() in the context of changegroup

2059

# We're only using addgroup() in the context of changegroup

2051

# generation so the revision data can always be handled as raw

2060

# generation so the revision data can always be handled as raw

2052

# by the flagprocessor.

2061

# by the flagprocessor.

2053

self._addrevision(node, None, transaction, link,

2062

self._addrevision(node, None, transaction, link,

2054

p1, p2, flags, (baserev, delta),

2063

p1, p2, flags, (baserev, delta),

2055

ifh, dfh,

2064

ifh, dfh,

2056

alwayscache=bool(addrevisioncb),

2065

alwayscache=bool(addrevisioncb),

2057

deltacomputer=deltacomputer)

2066

deltacomputer=deltacomputer)

2058

2067

2059

if addrevisioncb:

2068

if addrevisioncb:

2060

addrevisioncb(self, node)

2069

addrevisioncb(self, node)

2061

2070

2062

if not dfh and not self._inline:

2071

if not dfh and not self._inline:

2063

# addrevision switched from inline to conventional

2072

# addrevision switched from inline to conventional

2064

# reopen the index

2073

# reopen the index

2065

ifh.close()

2074

ifh.close()

2066

dfh = self._datafp("a+")

2075

dfh = self._datafp("a+")

2067

ifh = self._indexfp("a+")

2076

ifh = self._indexfp("a+")

2068

finally:

2077

finally:

2069

if dfh:

2078

if dfh:

2070

dfh.close()

2079

dfh.close()

2071

ifh.close()

2080

ifh.close()

2072

2081

2073

return nodes

2082

return nodes

2074

2083

2075

def iscensored(self, rev):

2084

def iscensored(self, rev):

2076

"""Check if a file revision is censored."""

2085

"""Check if a file revision is censored."""

2077

if not self._censorable:

2086

if not self._censorable:

2078

return False

2087

return False

2079

2088

2080

return self.flags(rev) & REVIDX_ISCENSORED

2089

return self.flags(rev) & REVIDX_ISCENSORED

2081

2090

2082

def _peek_iscensored(self, baserev, delta, flush):

2091

def _peek_iscensored(self, baserev, delta, flush):

2083

"""Quickly check if a delta produces a censored revision."""

2092

"""Quickly check if a delta produces a censored revision."""

2084

if not self._censorable:

2093

if not self._censorable:

2085

return False

2094

return False

2086

2095

2087

# Fragile heuristic: unless new file meta keys are added alphabetically

2096

# Fragile heuristic: unless new file meta keys are added alphabetically

2088

# preceding "censored", all censored revisions are prefixed by

2097

# preceding "censored", all censored revisions are prefixed by

2089

# "\1\ncensored:". A delta producing such a censored revision must be a

2098

# "\1\ncensored:". A delta producing such a censored revision must be a

2090

# full-replacement delta, so we inspect the first and only patch in the

2099

# full-replacement delta, so we inspect the first and only patch in the

2091

# delta for this prefix.

2100

# delta for this prefix.

2092

hlen = struct.calcsize(">lll")

2101

hlen = struct.calcsize(">lll")

2093

if len(delta) <= hlen:

2102

if len(delta) <= hlen:

2094

return False

2103

return False

2095

2104

2096

oldlen = self.rawsize(baserev)

2105

oldlen = self.rawsize(baserev)

2097

newlen = len(delta) - hlen

2106

newlen = len(delta) - hlen

2098

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2107

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2099

return False

2108

return False

2100

2109

2101

add = "\1\ncensored:"

2110

add = "\1\ncensored:"

2102

addlen = len(add)

2111

addlen = len(add)

2103

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2112

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2104

2113

2105

def getstrippoint(self, minlink):

2114

def getstrippoint(self, minlink):

2106

"""find the minimum rev that must be stripped to strip the linkrev

2115

"""find the minimum rev that must be stripped to strip the linkrev

2107

2116

2108

Returns a tuple containing the minimum rev and a set of all revs that

2117

Returns a tuple containing the minimum rev and a set of all revs that

2109

have linkrevs that will be broken by this strip.

2118

have linkrevs that will be broken by this strip.

2110

"""

2119

"""

2111

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2120

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2112

self.headrevs(),

2121

self.headrevs(),

2113

self.linkrev, self.parentrevs)

2122

self.linkrev, self.parentrevs)

2114

2123

2115

def strip(self, minlink, transaction):

2124

def strip(self, minlink, transaction):

2116

"""truncate the revlog on the first revision with a linkrev >= minlink

2125

"""truncate the revlog on the first revision with a linkrev >= minlink

2117

2126

2118

This function is called when we're stripping revision minlink and

2127

This function is called when we're stripping revision minlink and

2119

its descendants from the repository.

2128

its descendants from the repository.

2120

2129

2121

We have to remove all revisions with linkrev >= minlink, because

2130

We have to remove all revisions with linkrev >= minlink, because

2122

the equivalent changelog revisions will be renumbered after the

2131

the equivalent changelog revisions will be renumbered after the

2123

strip.

2132

strip.

2124

2133

2125

So we truncate the revlog on the first of these revisions, and

2134

So we truncate the revlog on the first of these revisions, and

2126

trust that the caller has saved the revisions that shouldn't be

2135

trust that the caller has saved the revisions that shouldn't be

2127

removed and that it'll re-add them after this truncation.

2136

removed and that it'll re-add them after this truncation.

2128

"""

2137

"""

2129

if len(self) == 0:

2138

if len(self) == 0:

2130

return

2139

return

2131

2140

2132

rev, _ = self.getstrippoint(minlink)

2141

rev, _ = self.getstrippoint(minlink)

2133

if rev == len(self):

2142

if rev == len(self):

2134

return

2143

return

2135

2144

2136

# first truncate the files on disk

2145

# first truncate the files on disk

2137

end = self.start(rev)

2146

end = self.start(rev)

2138

if not self._inline:

2147

if not self._inline:

2139

transaction.add(self.datafile, end)

2148

transaction.add(self.datafile, end)

2140

end = rev * self._io.size

2149

end = rev * self._io.size

2141

else:

2150

else:

2142

end += rev * self._io.size

2151

end += rev * self._io.size

2143

2152

2144

transaction.add(self.indexfile, end)

2153

transaction.add(self.indexfile, end)

2145

2154

2146

# then reset internal state in memory to forget those revisions

2155

# then reset internal state in memory to forget those revisions

2147

self._revisioncache = None

2156

self._revisioncache = None

2148

self._chaininfocache = {}

2157

self._chaininfocache = {}

2149

self._chunkclear()

2158

self._chunkclear()

2150

for x in pycompat.xrange(rev, len(self)):

2159

for x in pycompat.xrange(rev, len(self)):

2151

del self.nodemap[self.node(x)]

2160

del self.nodemap[self.node(x)]

2152

2161

2153

del self.index[rev:-1]

2162

del self.index[rev:-1]

2154

self._nodepos = None

2163

self._nodepos = None

2155

2164

2156

def checksize(self):

2165

def checksize(self):

2157

expected = 0

2166

expected = 0

2158

if len(self):

2167

if len(self):

2159

expected = max(0, self.end(len(self) - 1))

2168

expected = max(0, self.end(len(self) - 1))

2160

2169

2161

try:

2170

try:

2162

with self._datafp() as f:

2171

with self._datafp() as f:

2163

f.seek(0, 2)

2172

f.seek(0, 2)

2164

actual = f.tell()

2173

actual = f.tell()

2165

dd = actual - expected

2174

dd = actual - expected

2166

except IOError as inst:

2175

except IOError as inst:

2167

if inst.errno != errno.ENOENT:

2176

if inst.errno != errno.ENOENT:

2168

raise

2177

raise

2169

dd = 0

2178

dd = 0

2170

2179

2171

try:

2180

try:

2172

f = self.opener(self.indexfile)

2181

f = self.opener(self.indexfile)

2173

f.seek(0, 2)

2182

f.seek(0, 2)

2174

actual = f.tell()

2183

actual = f.tell()

2175

f.close()

2184

f.close()

2176

s = self._io.size

2185

s = self._io.size

2177

i = max(0, actual // s)

2186

i = max(0, actual // s)

2178

di = actual - (i * s)

2187

di = actual - (i * s)

2179

if self._inline:

2188

if self._inline:

2180

databytes = 0

2189

databytes = 0

2181

for r in self:

2190

for r in self:

2182

databytes += max(0, self.length(r))

2191

databytes += max(0, self.length(r))

2183

dd = 0

2192

dd = 0

2184

di = actual - len(self) * s - databytes

2193

di = actual - len(self) * s - databytes

2185

except IOError as inst:

2194

except IOError as inst:

2186

if inst.errno != errno.ENOENT:

2195

if inst.errno != errno.ENOENT:

2187

raise

2196

raise

2188

di = 0

2197

di = 0

2189

2198

2190

return (dd, di)

2199

return (dd, di)

2191

2200

2192

def files(self):

2201

def files(self):

2193

res = [self.indexfile]

2202

res = [self.indexfile]

2194

if not self._inline:

2203

if not self._inline:

2195

res.append(self.datafile)

2204

res.append(self.datafile)

2196

return res

2205

return res

2197

2206

2198

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2207

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2199

assumehaveparentrevisions=False, deltaprevious=False):

2208

assumehaveparentrevisions=False, deltaprevious=False):

2200

if nodesorder not in ('nodes', 'storage', None):

2209

if nodesorder not in ('nodes', 'storage', None):

2201

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2210

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2202

nodesorder)

2211

nodesorder)

2203

2212

2204

if nodesorder is None and not self._generaldelta:

2213

if nodesorder is None and not self._generaldelta:

2205

nodesorder = 'storage'

2214

nodesorder = 'storage'

2206

2215

2207

return storageutil.emitrevisions(

2216

return storageutil.emitrevisions(

2208

self, nodes, nodesorder, revlogrevisiondelta,

2217

self, nodes, nodesorder, revlogrevisiondelta,

2209

deltaparentfn=self.deltaparent,

2218

deltaparentfn=self.deltaparent,

2210

candeltafn=self.candelta,

2219

candeltafn=self.candelta,

2211

rawsizefn=self.rawsize,

2220

rawsizefn=self.rawsize,

2212

revdifffn=self.revdiff,

2221

revdifffn=self.revdiff,

2213

flagsfn=self.flags,

2222

flagsfn=self.flags,

2214

sendfulltext=not self._storedeltachains,

2223

sendfulltext=not self._storedeltachains,

2215

revisiondata=revisiondata,

2224

revisiondata=revisiondata,

2216

assumehaveparentrevisions=assumehaveparentrevisions,

2225

assumehaveparentrevisions=assumehaveparentrevisions,

2217

deltaprevious=deltaprevious)

2226

deltaprevious=deltaprevious)

2218

2227

2219

DELTAREUSEALWAYS = 'always'

2228

DELTAREUSEALWAYS = 'always'

2220

DELTAREUSESAMEREVS = 'samerevs'

2229

DELTAREUSESAMEREVS = 'samerevs'

2221

DELTAREUSENEVER = 'never'

2230

DELTAREUSENEVER = 'never'

2222

2231

2223

DELTAREUSEFULLADD = 'fulladd'

2232

DELTAREUSEFULLADD = 'fulladd'

2224

2233

2225

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2234

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2226

2235

2227

def clone(self, tr, destrevlog, addrevisioncb=None,

2236

def clone(self, tr, destrevlog, addrevisioncb=None,

2228

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2237

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2229

"""Copy this revlog to another, possibly with format changes.

2238

"""Copy this revlog to another, possibly with format changes.

2230

2239

2231

The destination revlog will contain the same revisions and nodes.

2240

The destination revlog will contain the same revisions and nodes.

2232

However, it may not be bit-for-bit identical due to e.g. delta encoding

2241

However, it may not be bit-for-bit identical due to e.g. delta encoding

2233

differences.

2242

differences.

2234

2243

2235

The ``deltareuse`` argument control how deltas from the existing revlog

2244

The ``deltareuse`` argument control how deltas from the existing revlog

2236

are preserved in the destination revlog. The argument can have the

2245

are preserved in the destination revlog. The argument can have the

2237

following values:

2246

following values:

2238

2247

2239

DELTAREUSEALWAYS

2248

DELTAREUSEALWAYS

2240

Deltas will always be reused (if possible), even if the destination

2249

Deltas will always be reused (if possible), even if the destination

2241

revlog would not select the same revisions for the delta. This is the

2250

revlog would not select the same revisions for the delta. This is the

2242

fastest mode of operation.

2251

fastest mode of operation.

2243

DELTAREUSESAMEREVS

2252

DELTAREUSESAMEREVS

2244

Deltas will be reused if the destination revlog would pick the same

2253

Deltas will be reused if the destination revlog would pick the same

2245

revisions for the delta. This mode strikes a balance between speed

2254

revisions for the delta. This mode strikes a balance between speed

2246

and optimization.

2255

and optimization.

2247

DELTAREUSENEVER

2256

DELTAREUSENEVER

2248

Deltas will never be reused. This is the slowest mode of execution.

2257

Deltas will never be reused. This is the slowest mode of execution.

2249

This mode can be used to recompute deltas (e.g. if the diff/delta

2258

This mode can be used to recompute deltas (e.g. if the diff/delta

2250

algorithm changes).

2259

algorithm changes).

2251

2260

2252

Delta computation can be slow, so the choice of delta reuse policy can

2261

Delta computation can be slow, so the choice of delta reuse policy can

2253

significantly affect run time.

2262

significantly affect run time.

2254

2263

2255

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2264

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2256

two extremes. Deltas will be reused if they are appropriate. But if the

2265

two extremes. Deltas will be reused if they are appropriate. But if the

2257

delta could choose a better revision, it will do so. This means if you

2266

delta could choose a better revision, it will do so. This means if you

2258

are converting a non-generaldelta revlog to a generaldelta revlog,

2267

are converting a non-generaldelta revlog to a generaldelta revlog,

2259

deltas will be recomputed if the delta's parent isn't a parent of the

2268

deltas will be recomputed if the delta's parent isn't a parent of the

2260

revision.

2269

revision.

2261

2270

2262

In addition to the delta policy, the ``deltabothparents`` argument

2271

In addition to the delta policy, the ``deltabothparents`` argument

2263

controls whether to compute deltas against both parents for merges.

2272

controls whether to compute deltas against both parents for merges.

2264

By default, the current default is used.

2273

By default, the current default is used.

2265

"""

2274

"""

2266

if deltareuse not in self.DELTAREUSEALL:

2275

if deltareuse not in self.DELTAREUSEALL:

2267

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2276

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2268

2277

2269

if len(destrevlog):

2278

if len(destrevlog):

2270

raise ValueError(_('destination revlog is not empty'))

2279

raise ValueError(_('destination revlog is not empty'))

2271

2280

2272

if getattr(self, 'filteredrevs', None):

2281

if getattr(self, 'filteredrevs', None):

2273

raise ValueError(_('source revlog has filtered revisions'))

2282

raise ValueError(_('source revlog has filtered revisions'))

2274

if getattr(destrevlog, 'filteredrevs', None):

2283

if getattr(destrevlog, 'filteredrevs', None):

2275

raise ValueError(_('destination revlog has filtered revisions'))

2284

raise ValueError(_('destination revlog has filtered revisions'))

2276

2285

2277

# lazydeltabase controls whether to reuse a cached delta, if possible.

2286

# lazydeltabase controls whether to reuse a cached delta, if possible.

2278

oldlazydeltabase = destrevlog._lazydeltabase

2287

oldlazydeltabase = destrevlog._lazydeltabase

2279

oldamd = destrevlog._deltabothparents

2288

oldamd = destrevlog._deltabothparents

2280

2289

2281

try:

2290

try:

2282

if deltareuse == self.DELTAREUSEALWAYS:

2291

if deltareuse == self.DELTAREUSEALWAYS:

2283

destrevlog._lazydeltabase = True

2292

destrevlog._lazydeltabase = True

2284

elif deltareuse == self.DELTAREUSESAMEREVS:

2293

elif deltareuse == self.DELTAREUSESAMEREVS:

2285

destrevlog._lazydeltabase = False

2294

destrevlog._lazydeltabase = False

2286

2295

2287

destrevlog._deltabothparents = deltabothparents or oldamd

2296

destrevlog._deltabothparents = deltabothparents or oldamd

2288

2297

2289

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2298

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2290

self.DELTAREUSESAMEREVS)

2299

self.DELTAREUSESAMEREVS)

2291

2300

2292

deltacomputer = deltautil.deltacomputer(destrevlog)

2301

deltacomputer = deltautil.deltacomputer(destrevlog)

2293

index = self.index

2302

index = self.index

2294

for rev in self:

2303

for rev in self:

2295

entry = index[rev]

2304

entry = index[rev]

2296

2305

2297

# Some classes override linkrev to take filtered revs into

2306

# Some classes override linkrev to take filtered revs into

2298

# account. Use raw entry from index.

2307

# account. Use raw entry from index.

2299

flags = entry[0] & 0xffff

2308

flags = entry[0] & 0xffff

2300

linkrev = entry[4]

2309

linkrev = entry[4]

2301

p1 = index[entry[5]][7]

2310

p1 = index[entry[5]][7]

2302

p2 = index[entry[6]][7]

2311

p2 = index[entry[6]][7]

2303

node = entry[7]

2312

node = entry[7]

2304

2313

2305

# (Possibly) reuse the delta from the revlog if allowed and

2314

# (Possibly) reuse the delta from the revlog if allowed and

2306

# the revlog chunk is a delta.

2315

# the revlog chunk is a delta.

2307

cachedelta = None

2316

cachedelta = None

2308

rawtext = None

2317

rawtext = None

2309

if populatecachedelta:

2318

if populatecachedelta:

2310

dp = self.deltaparent(rev)

2319

dp = self.deltaparent(rev)

2311

if dp != nullrev:

2320

if dp != nullrev:

2312

cachedelta = (dp, bytes(self._chunk(rev)))

2321

cachedelta = (dp, bytes(self._chunk(rev)))

2313

2322

2314

if not cachedelta:

2323

if not cachedelta:

2315

rawtext = self.revision(rev, raw=True)

2324

rawtext = self.revision(rev, raw=True)

2316

2325

2317

2326

2318

if deltareuse == self.DELTAREUSEFULLADD:

2327

if deltareuse == self.DELTAREUSEFULLADD:

2319

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2328

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2320

cachedelta=cachedelta,

2329

cachedelta=cachedelta,

2321

node=node, flags=flags,

2330

node=node, flags=flags,

2322

deltacomputer=deltacomputer)

2331

deltacomputer=deltacomputer)

2323

else:

2332

else:

2324

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2333

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2325

checkambig=False)

2334

checkambig=False)

2326

dfh = None

2335

dfh = None

2327

if not destrevlog._inline:

2336

if not destrevlog._inline:

2328

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2337

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2329

try:

2338

try:

2330

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2339

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2331

p2, flags, cachedelta, ifh, dfh,

2340

p2, flags, cachedelta, ifh, dfh,

2332

deltacomputer=deltacomputer)

2341

deltacomputer=deltacomputer)

2333

finally:

2342

finally:

2334

if dfh:

2343

if dfh:

2335

dfh.close()

2344

dfh.close()

2336

ifh.close()

2345

ifh.close()

2337

2346

2338

if addrevisioncb:

2347

if addrevisioncb:

2339

addrevisioncb(self, rev, node)

2348

addrevisioncb(self, rev, node)

2340

finally:

2349

finally:

2341

destrevlog._lazydeltabase = oldlazydeltabase

2350

destrevlog._lazydeltabase = oldlazydeltabase

2342

destrevlog._deltabothparents = oldamd

2351

destrevlog._deltabothparents = oldamd

2343

2352

2344

def censorrevision(self, tr, censornode, tombstone=b''):

2353

def censorrevision(self, tr, censornode, tombstone=b''):

2345

if (self.version & 0xFFFF) == REVLOGV0:

2354

if (self.version & 0xFFFF) == REVLOGV0:

2346

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2355

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2347

self.version)

2356

self.version)

2348

2357

2349

censorrev = self.rev(censornode)

2358

censorrev = self.rev(censornode)

2350

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2359

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2351

2360

2352

if len(tombstone) > self.rawsize(censorrev):

2361

if len(tombstone) > self.rawsize(censorrev):

2353

raise error.Abort(_('censor tombstone must be no longer than '

2362

raise error.Abort(_('censor tombstone must be no longer than '

2354

'censored data'))

2363

'censored data'))

2355

2364

2356

# Rewriting the revlog in place is hard. Our strategy for censoring is

2365

# Rewriting the revlog in place is hard. Our strategy for censoring is

2357

# to create a new revlog, copy all revisions to it, then replace the

2366

# to create a new revlog, copy all revisions to it, then replace the

2358

# revlogs on transaction close.

2367

# revlogs on transaction close.

2359

2368

2360

newindexfile = self.indexfile + b'.tmpcensored'

2369

newindexfile = self.indexfile + b'.tmpcensored'

2361

newdatafile = self.datafile + b'.tmpcensored'

2370

newdatafile = self.datafile + b'.tmpcensored'

2362

2371

2363

# This is a bit dangerous. We could easily have a mismatch of state.

2372

# This is a bit dangerous. We could easily have a mismatch of state.

2364

newrl = revlog(self.opener, newindexfile, newdatafile,

2373

newrl = revlog(self.opener, newindexfile, newdatafile,

2365

censorable=True)

2374

censorable=True)

2366

newrl.version = self.version

2375

newrl.version = self.version

2367

newrl._generaldelta = self._generaldelta

2376

newrl._generaldelta = self._generaldelta

2368

newrl._io = self._io

2377

newrl._io = self._io

2369

2378

2370

for rev in self.revs():

2379

for rev in self.revs():

2371

node = self.node(rev)

2380

node = self.node(rev)

2372

p1, p2 = self.parents(node)

2381

p1, p2 = self.parents(node)

2373

2382

2374

if rev == censorrev:

2383

if rev == censorrev:

2375

newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),

2384

newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),

2376

p1, p2, censornode, REVIDX_ISCENSORED)

2385

p1, p2, censornode, REVIDX_ISCENSORED)

2377

2386

2378

if newrl.deltaparent(rev) != nullrev:

2387

if newrl.deltaparent(rev) != nullrev:

2379

raise error.Abort(_('censored revision stored as delta; '

2388

raise error.Abort(_('censored revision stored as delta; '

2380

'cannot censor'),

2389

'cannot censor'),

2381

hint=_('censoring of revlogs is not '

2390

hint=_('censoring of revlogs is not '

2382

'fully implemented; please report '

2391

'fully implemented; please report '

2383

'this bug'))

2392

'this bug'))

2384

continue

2393

continue

2385

2394

2386

if self.iscensored(rev):

2395

if self.iscensored(rev):

2387

if self.deltaparent(rev) != nullrev:

2396

if self.deltaparent(rev) != nullrev:

2388

raise error.Abort(_('cannot censor due to censored '

2397

raise error.Abort(_('cannot censor due to censored '

2389

'revision having delta stored'))

2398

'revision having delta stored'))

2390

rawtext = self._chunk(rev)

2399

rawtext = self._chunk(rev)

2391

else:

2400

else:

2392

rawtext = self.revision(rev, raw=True)

2401

rawtext = self.revision(rev, raw=True)

2393

2402

2394

newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,

2403

newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,

2395

self.flags(rev))

2404

self.flags(rev))

2396

2405

2397

tr.addbackup(self.indexfile, location='store')

2406

tr.addbackup(self.indexfile, location='store')

2398

if not self._inline:

2407

if not self._inline:

2399

tr.addbackup(self.datafile, location='store')

2408

tr.addbackup(self.datafile, location='store')

2400

2409

2401

self.opener.rename(newrl.indexfile, self.indexfile)

2410

self.opener.rename(newrl.indexfile, self.indexfile)

2402

if not self._inline:

2411

if not self._inline:

2403

self.opener.rename(newrl.datafile, self.datafile)

2412

self.opener.rename(newrl.datafile, self.datafile)

2404

2413

2405

self.clearcaches()

2414

self.clearcaches()

2406

self._loadindex(self.version, None)

2415

self._loadindex(self.version, None)

2407

2416

2408

def verifyintegrity(self, state):

2417

def verifyintegrity(self, state):

2409

"""Verifies the integrity of the revlog.

2418

"""Verifies the integrity of the revlog.

2410

2419

2411

Yields ``revlogproblem`` instances describing problems that are

2420

Yields ``revlogproblem`` instances describing problems that are

2412

found.

2421

found.

2413

"""

2422

"""

2414

dd, di = self.checksize()

2423

dd, di = self.checksize()

2415

if dd:

2424

if dd:

2416

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2425

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2417

if di:

2426

if di:

2418

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2427

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2419

2428

2420

version = self.version & 0xFFFF

2429

version = self.version & 0xFFFF

2421

2430

2422

# The verifier tells us what version revlog we should be.

2431

# The verifier tells us what version revlog we should be.

2423

if version != state['expectedversion']:

2432

if version != state['expectedversion']:

2424

yield revlogproblem(

2433

yield revlogproblem(

2425

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2434

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2426

(self.indexfile, version, state['expectedversion']))

2435

(self.indexfile, version, state['expectedversion']))

2427

2436

2428

state['skipread'] = set()

2437

state['skipread'] = set()

2429

2438

2430

for rev in self:

2439

for rev in self:

2431

node = self.node(rev)

2440

node = self.node(rev)

2432

2441

2433

# Verify contents. 4 cases to care about:

2442

# Verify contents. 4 cases to care about:

2434

#

2443

#

2435

# common: the most common case

2444

# common: the most common case

2436

# rename: with a rename

2445

# rename: with a rename

2437

# meta: file content starts with b'\1\n', the metadata

2446

# meta: file content starts with b'\1\n', the metadata

2438

# header defined in filelog.py, but without a rename

2447

# header defined in filelog.py, but without a rename

2439

# ext: content stored externally

2448

# ext: content stored externally

2440

#

2449

#

2441

# More formally, their differences are shown below:

2450

# More formally, their differences are shown below:

2442

#

2451

#

2443

# | common | rename | meta | ext

2452

# | common | rename | meta | ext

2444

# -------------------------------------------------------

2453

# -------------------------------------------------------

2445

# flags() | 0 | 0 | 0 | not 0

2454

# flags() | 0 | 0 | 0 | not 0

2446

# renamed() | False | True | False | ?

2455

# renamed() | False | True | False | ?

2447

# rawtext[0:2]=='\1\n'| False | True | True | ?

2456

# rawtext[0:2]=='\1\n'| False | True | True | ?

2448

#

2457

#

2449

# "rawtext" means the raw text stored in revlog data, which

2458

# "rawtext" means the raw text stored in revlog data, which

2450

# could be retrieved by "revision(rev, raw=True)". "text"

2459

# could be retrieved by "revision(rev, raw=True)". "text"

2451

# mentioned below is "revision(rev, raw=False)".

2460

# mentioned below is "revision(rev, raw=False)".

2452

#

2461

#

2453

# There are 3 different lengths stored physically:

2462

# There are 3 different lengths stored physically:

2454

# 1. L1: rawsize, stored in revlog index

2463

# 1. L1: rawsize, stored in revlog index

2455

# 2. L2: len(rawtext), stored in revlog data

2464

# 2. L2: len(rawtext), stored in revlog data

2456

# 3. L3: len(text), stored in revlog data if flags==0, or

2465

# 3. L3: len(text), stored in revlog data if flags==0, or

2457

# possibly somewhere else if flags!=0

2466

# possibly somewhere else if flags!=0

2458

#

2467

#

2459

# L1 should be equal to L2. L3 could be different from them.

2468

# L1 should be equal to L2. L3 could be different from them.

2460

# "text" may or may not affect commit hash depending on flag

2469

# "text" may or may not affect commit hash depending on flag

2461

# processors (see revlog.addflagprocessor).

2470

# processors (see revlog.addflagprocessor).

2462

#

2471

#

2463

# | common | rename | meta | ext

2472

# | common | rename | meta | ext

2464

# -------------------------------------------------

2473

# -------------------------------------------------

2465

# rawsize() | L1 | L1 | L1 | L1

2474

# rawsize() | L1 | L1 | L1 | L1

2466

# size() | L1 | L2-LM | L1(*) | L1 (?)

2475

# size() | L1 | L2-LM | L1(*) | L1 (?)

2467

# len(rawtext) | L2 | L2 | L2 | L2

2476

# len(rawtext) | L2 | L2 | L2 | L2

2468

# len(text) | L2 | L2 | L2 | L3

2477

# len(text) | L2 | L2 | L2 | L3

2469

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2478

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2470

#

2479

#

2471

# LM: length of metadata, depending on rawtext

2480

# LM: length of metadata, depending on rawtext

2472

# (*): not ideal, see comment in filelog.size

2481

# (*): not ideal, see comment in filelog.size

2473

# (?): could be "- len(meta)" if the resolved content has

2482

# (?): could be "- len(meta)" if the resolved content has

2474

# rename metadata

2483

# rename metadata

2475

#

2484

#

2476

# Checks needed to be done:

2485

# Checks needed to be done:

2477

# 1. length check: L1 == L2, in all cases.

2486

# 1. length check: L1 == L2, in all cases.

2478

# 2. hash check: depending on flag processor, we may need to

2487

# 2. hash check: depending on flag processor, we may need to

2479

# use either "text" (external), or "rawtext" (in revlog).

2488

# use either "text" (external), or "rawtext" (in revlog).

2480

2489

2481

try:

2490

try:

2482

skipflags = state.get('skipflags', 0)

2491

skipflags = state.get('skipflags', 0)

2483

if skipflags:

2492

if skipflags:

2484

skipflags &= self.flags(rev)

2493

skipflags &= self.flags(rev)

2485

2494

2486

if skipflags:

2495

if skipflags:

2487

state['skipread'].add(node)

2496

state['skipread'].add(node)

2488

else:

2497

else:

2489

# Side-effect: read content and verify hash.

2498

# Side-effect: read content and verify hash.

2490

self.revision(node)

2499

self.revision(node)

2491

2500

2492

l1 = self.rawsize(rev)

2501

l1 = self.rawsize(rev)

2493

l2 = len(self.revision(node, raw=True))

2502

l2 = len(self.revision(node, raw=True))

2494

2503

2495

if l1 != l2:

2504

if l1 != l2:

2496

yield revlogproblem(

2505

yield revlogproblem(

2497

error=_('unpacked size is %d, %d expected') % (l2, l1),

2506

error=_('unpacked size is %d, %d expected') % (l2, l1),

2498

node=node)

2507

node=node)

2499

2508

2500

except error.CensoredNodeError:

2509

except error.CensoredNodeError:

2501

if state['erroroncensored']:

2510

if state['erroroncensored']:

2502

yield revlogproblem(error=_('censored file data'),

2511

yield revlogproblem(error=_('censored file data'),

2503

node=node)

2512

node=node)

2504

state['skipread'].add(node)

2513

state['skipread'].add(node)

2505

except Exception as e:

2514

except Exception as e:

2506

yield revlogproblem(

2515

yield revlogproblem(

2507

error=_('unpacking %s: %s') % (short(node),

2516

error=_('unpacking %s: %s') % (short(node),

2508

stringutil.forcebytestr(e)),

2517

stringutil.forcebytestr(e)),

2509

node=node)

2518

node=node)

2510

state['skipread'].add(node)

2519

state['skipread'].add(node)

2511

2520

2512

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2521

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2513

revisionscount=False, trackedsize=False,

2522

revisionscount=False, trackedsize=False,

2514

storedsize=False):

2523

storedsize=False):

2515

d = {}

2524

d = {}

2516

2525

2517

if exclusivefiles:

2526

if exclusivefiles:

2518

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2527

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2519

if not self._inline:

2528

if not self._inline:

2520

d['exclusivefiles'].append((self.opener, self.datafile))

2529

d['exclusivefiles'].append((self.opener, self.datafile))

2521

2530

2522

if sharedfiles:

2531

if sharedfiles:

2523

d['sharedfiles'] = []

2532

d['sharedfiles'] = []

2524

2533

2525

if revisionscount:

2534

if revisionscount:

2526

d['revisionscount'] = len(self)

2535

d['revisionscount'] = len(self)

2527

2536

2528

if trackedsize:

2537

if trackedsize:

2529

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2538

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2530

2539

2531

if storedsize:

2540

if storedsize:

2532

d['storedsize'] = sum(self.opener.stat(path).st_size

2541

d['storedsize'] = sum(self.opener.stat(path).st_size

2533

for path in self.files())

2542

for path in self.files())

2534

2543

2535

return d

2544

return d

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullhex,
                 nullid,
                 nullrev,
                 short,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .revlogutils.constants import (
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_ISCENSORED,
                 REVIDX_KNOWN_FLAGS,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
             )
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 repository,
                 templatefilters,
                 util,
             )
             from .revlogutils import (
                 deltas as deltautil,
             )
             from .utils import (
                 interfaceutil,
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_KNOWN_FLAGS
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise error.RevlogError(_('index entry flags need revlog '
                                                   'version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(_flagprocessors)
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                         if opts.get('enableellipsis'):
                             self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(_('revlog chunk cache size %r is not '
                                                   'greater than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(_('revlog chunk cache size %r is not a '
                                                   'power of 2') % self._chunkcachesize)
                     self._loadindex(v, mmapindexthreshold)
                 def _loadindex(self, v, mmapindexthreshold):
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                 self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     else:
                         raise error.RevlogError(_('unknown version (%d) in revlog %s') %
                                                 (fmt, self.indexfile))
                     self._storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise error.RevlogError(_("index %s is corrupted") %
                                                 self.indexfile)
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
+                # fast parentrevs(rev) where rev isn't filtered
+                _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
-                    return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
+                    # first, make sure start revisions aren't filtered
-                                                  inclusive=inclusive)
+                    revs = list(revs)
+                    checkrev = self.node
+                    for r in revs:
+                        checkrev(r)
+                    # and we're sure ancestors aren't filtered as well
+                    return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
+                                                  stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = set(self.rev(n) for n in stop or [])
                     revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
                                                 stoprevs=stoprevs)
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except error.LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise error.RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise error.AmbiguousPrefixLookupError(
                                 id, self.indexfile, _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if nullhex.startswith(id):
                                 nl.append(nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.indexfile, _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise error.LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != wdirid:
                                 raise error.LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(self, revs,
                                                             targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
                     p1, p2 = self.parentrevs(rev)
                     if deltap in (p1, p2):
                         return False
                     return self.issnapshot(deltap)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._revisioncache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._revisioncache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._revisioncache[2]
                         cachedrev = self._revisioncache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._revisioncache[2]
                         # drop cache to save memory
                         self._revisioncache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._revisioncache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise error.ProgrammingError(_("invalid '%s' operation") %
                                                      operation)
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise error.RevlogError(_("incompatible revision flag '%#x'") %
                                                 (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in self._flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise error.RevlogError(message)
                             processor = self._flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise error.RevlogError(_("%s not found in the transaction")
                                                 % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored.
                     """
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(_("attempted to add linkrev -1 to %s")
                                                 % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(_('revlog decompress error: %s') %
                                                     stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise error.RevlogError(_("%s: attempt to add null revision") %
                                                 self.indexfile)
                     if node == wdirid or node in wdirfilenodeids:
                         raise error.RevlogError(_("%s: attempt to add wdir revision") %
                                                 self.indexfile)
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
                          deltainfo.base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
                                      link, offset)
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = deltautil.deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 self._nodeduplicatecallback(transaction, node)
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise error.LookupError(p, self.indexfile,
                                                             _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise error.LookupError(deltabase, self.indexfile,
                                                         _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(minlink, len(self) - 1,
                                                         self.headrevs(),
                                                         self.linkrev, self.parentrevs)
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
                                   assumehaveparentrevisions=False, deltaprevious=False):
                     if nodesorder not in ('nodes', 'storage', None):
                         raise error.ProgrammingError('unhandled value for nodesorder: %s' %
                                                      nodesorder)
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = 'storage'
                     return storageutil.emitrevisions(
                         self, nodes, nodesorder, revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         sendfulltext=not self._storedeltachains,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         deltaprevious=deltaprevious)
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = deltautil.deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if (self.version & 0xFFFF) == REVLOGV0:
                         raise error.RevlogError(_('cannot censor with version %d revlogs') %
                                                 self.version)
                     censorrev = self.rev(censornode)
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     if len(tombstone) > self.rawsize(censorrev):
                         raise error.Abort(_('censor tombstone must be no longer than '
                                             'censored data'))
                     # Rewriting the revlog in place is hard. Our strategy for censoring is
                     # to create a new revlog, copy all revisions to it, then replace the
                     # revlogs on transaction close.
                     newindexfile = self.indexfile + b'.tmpcensored'
                     newdatafile = self.datafile + b'.tmpcensored'
                     # This is a bit dangerous. We could easily have a mismatch of state.
                     newrl = revlog(self.opener, newindexfile, newdatafile,
                                    censorable=True)
                     newrl.version = self.version
                     newrl._generaldelta = self._generaldelta
                     newrl._io = self._io
                     for rev in self.revs():
                         node = self.node(rev)
                         p1, p2 = self.parents(node)
                         if rev == censorrev:
                             newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
                                                  p1, p2, censornode, REVIDX_ISCENSORED)
                             if newrl.deltaparent(rev) != nullrev:
                                 raise error.Abort(_('censored revision stored as delta; '
                                                     'cannot censor'),
                                                   hint=_('censoring of revlogs is not '
                                                          'fully implemented; please report '
                                                          'this bug'))
                             continue
                         if self.iscensored(rev):
                             if self.deltaparent(rev) != nullrev:
                                 raise error.Abort(_('cannot censor due to censored '
                                                     'revision having delta stored'))
                             rawtext = self._chunk(rev)
                         else:
                             rawtext = self.revision(rev, raw=True)
                         newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
                                              self.flags(rev))
                     tr.addbackup(self.indexfile, location='store')
                     if not self._inline:
                         tr.addbackup(self.datafile, location='store')
                     self.opener.rename(newrl.indexfile, self.indexfile)
                     if not self._inline:
                         self.opener.rename(newrl.datafile, self.datafile)
                     self.clearcaches()
                     self._loadindex(self.version, None)
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_('data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_('index contains %d extra bytes') % di)
                     version = self.version & 0xFFFF
                     # The verifier tells us what version revlog we should be.
                     if version != state['expectedversion']:
                         yield revlogproblem(
                             warning=_("warning: '%s' uses revlog format %d; expected %d") %
                                     (self.indexfile, version, state['expectedversion']))
                     state['skipread'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "revision(rev, raw=True)". "text"
                         # mentioned below is "revision(rev, raw=False)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see revlog.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get('skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             if skipflags:
                                 state['skipread'].add(node)
                             else:
                                 # Side-effect: read content and verify hash.
                                 self.revision(node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.revision(node, raw=True))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_('unpacked size is %d, %d expected') % (l2, l1),
                                     node=node)
                         except error.CensoredNodeError:
                             if state['erroroncensored']:
                                 yield revlogproblem(error=_('censored file data'),
                                                     node=node)
                                 state['skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_('unpacking %s: %s') % (short(node),
                                                                stringutil.forcebytestr(e)),
                                 node=node)
                             state['skipread'].add(node)
                 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                                 revisionscount=False, trackedsize=False,
                                 storedsize=False):
                     d = {}
                     if exclusivefiles:
                         d['exclusivefiles'] = [(self.opener, self.indexfile)]
                         if not self._inline:
                             d['exclusivefiles'].append((self.opener, self.datafile))
                     if sharedfiles:
                         d['sharedfiles'] = []
                     if revisionscount:
                         d['revisionscount'] = len(self)
                     if trackedsize:
                         d['trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d['storedsize'] = sum(self.opener.stat(path).st_size
                                               for path in self.files())
                     return d