upstream/mercurial-mirror Commit - r41150:536c8353

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullhex,

27

nullhex,

28

nullid,

28

nullid,

29

nullrev,

29

nullrev,

30

short,

30

short,

31

wdirfilenodeids,

31

wdirfilenodeids,

32

wdirhex,

32

wdirhex,

33

wdirid,

33

wdirid,

34

wdirrev,

34

wdirrev,

35

)

35

)

36

from .i18n import _

36

from .i18n import _

37

from .revlogutils.constants import (

37

from .revlogutils.constants import (

38

FLAG_GENERALDELTA,

38

FLAG_GENERALDELTA,

39

FLAG_INLINE_DATA,

39

FLAG_INLINE_DATA,

40

REVIDX_DEFAULT_FLAGS,

40

REVIDX_DEFAULT_FLAGS,

41

REVIDX_ELLIPSIS,

41

REVIDX_ELLIPSIS,

42

REVIDX_EXTSTORED,

42

REVIDX_EXTSTORED,

43

REVIDX_FLAGS_ORDER,

43

REVIDX_FLAGS_ORDER,

44

REVIDX_ISCENSORED,

44

REVIDX_ISCENSORED,

45

REVIDX_KNOWN_FLAGS,

45

REVIDX_KNOWN_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

47

REVLOGV0,

47

REVLOGV0,

48

REVLOGV1,

48

REVLOGV1,

49

REVLOGV1_FLAGS,

49

REVLOGV1_FLAGS,

50

REVLOGV2,

50

REVLOGV2,

51

REVLOGV2_FLAGS,

51

REVLOGV2_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FORMAT,

53

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_VERSION,

54

REVLOG_DEFAULT_VERSION,

55

)

55

)

56

from .thirdparty import (

56

from .thirdparty import (

57

attr,

57

attr,

58

)

58

)

59

from . import (

59

from . import (

60

ancestor,

60

ancestor,

61

dagop,

61

dagop,

62

error,

62

error,

63

mdiff,

63

mdiff,

64

policy,

64

policy,

65

pycompat,

65

pycompat,

66

repository,

66

repository,

67

templatefilters,

67

templatefilters,

68

util,

68

util,

69

)

69

)

70

from .revlogutils import (

70

from .revlogutils import (

71

deltas as deltautil,

71

deltas as deltautil,

72

)

72

)

73

from .utils import (

73

from .utils import (

74

interfaceutil,

74

interfaceutil,

75

storageutil,

75

storageutil,

76

stringutil,

76

stringutil,

77

)

77

)

78

79

# blanked usage of all the name to prevent pyflakes constraints

79

# blanked usage of all the name to prevent pyflakes constraints

80

# We need these name available in the module for extensions.

80

# We need these name available in the module for extensions.

81

REVLOGV0

81

REVLOGV0

82

REVLOGV1

82

REVLOGV1

83

REVLOGV2

83

REVLOGV2

84

FLAG_INLINE_DATA

84

FLAG_INLINE_DATA

85

FLAG_GENERALDELTA

85

FLAG_GENERALDELTA

86

REVLOG_DEFAULT_FLAGS

86

REVLOG_DEFAULT_FLAGS

87

REVLOG_DEFAULT_FORMAT

87

REVLOG_DEFAULT_FORMAT

88

REVLOG_DEFAULT_VERSION

88

REVLOG_DEFAULT_VERSION

89

REVLOGV1_FLAGS

89

REVLOGV1_FLAGS

90

REVLOGV2_FLAGS

90

REVLOGV2_FLAGS

91

REVIDX_ISCENSORED

91

REVIDX_ISCENSORED

92

REVIDX_ELLIPSIS

92

REVIDX_ELLIPSIS

93

REVIDX_EXTSTORED

93

REVIDX_EXTSTORED

94

REVIDX_DEFAULT_FLAGS

94

REVIDX_DEFAULT_FLAGS

95

REVIDX_FLAGS_ORDER

95

REVIDX_FLAGS_ORDER

96

REVIDX_KNOWN_FLAGS

96

REVIDX_KNOWN_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

98

99

parsers = policy.importmod(r'parsers')

99

parsers = policy.importmod(r'parsers')

100

try:

101

from . import rustext

102

rustext.__name__ # force actual import (see hgdemandimport)

103

except ImportError:

104

rustext = None

100

105

101

# Aliased for performance.

106

# Aliased for performance.

102

_zlibdecompress = zlib.decompress

107

_zlibdecompress = zlib.decompress

103

108

104

# max size of revlog with inline data

109

# max size of revlog with inline data

105

_maxinline = 131072

110

_maxinline = 131072

106

_chunksize = 1048576

111

_chunksize = 1048576

107

112

108

# Store flag processors (cf. 'addflagprocessor()' to register)

113

# Store flag processors (cf. 'addflagprocessor()' to register)

109

_flagprocessors = {

114

_flagprocessors = {

110

REVIDX_ISCENSORED: None,

115

REVIDX_ISCENSORED: None,

111

}

116

}

112

117

113

# Flag processors for REVIDX_ELLIPSIS.

118

# Flag processors for REVIDX_ELLIPSIS.

114

def ellipsisreadprocessor(rl, text):

119

def ellipsisreadprocessor(rl, text):

115

return text, False

120

return text, False

116

121

117

def ellipsiswriteprocessor(rl, text):

122

def ellipsiswriteprocessor(rl, text):

118

return text, False

123

return text, False

119

124

120

def ellipsisrawprocessor(rl, text):

125

def ellipsisrawprocessor(rl, text):

121

return False

126

return False

122

127

123

ellipsisprocessor = (

128

ellipsisprocessor = (

124

ellipsisreadprocessor,

129

ellipsisreadprocessor,

125

ellipsiswriteprocessor,

130

ellipsiswriteprocessor,

126

ellipsisrawprocessor,

131

ellipsisrawprocessor,

127

)

132

)

128

133

129

def addflagprocessor(flag, processor):

134

def addflagprocessor(flag, processor):

130

"""Register a flag processor on a revision data flag.

135

"""Register a flag processor on a revision data flag.

131

136

132

Invariant:

137

Invariant:

133

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

138

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

134

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

139

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

135

- Only one flag processor can be registered on a specific flag.

140

- Only one flag processor can be registered on a specific flag.

136

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

141

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

137

following signatures:

142

following signatures:

138

- (read) f(self, rawtext) -> text, bool

143

- (read) f(self, rawtext) -> text, bool

139

- (write) f(self, text) -> rawtext, bool

144

- (write) f(self, text) -> rawtext, bool

140

- (raw) f(self, rawtext) -> bool

145

- (raw) f(self, rawtext) -> bool

141

"text" is presented to the user. "rawtext" is stored in revlog data, not

146

"text" is presented to the user. "rawtext" is stored in revlog data, not

142

directly visible to the user.

147

directly visible to the user.

143

The boolean returned by these transforms is used to determine whether

148

The boolean returned by these transforms is used to determine whether

144

the returned text can be used for hash integrity checking. For example,

149

the returned text can be used for hash integrity checking. For example,

145

if "write" returns False, then "text" is used to generate hash. If

150

if "write" returns False, then "text" is used to generate hash. If

146

"write" returns True, that basically means "rawtext" returned by "write"

151

"write" returns True, that basically means "rawtext" returned by "write"

147

should be used to generate hash. Usually, "write" and "read" return

152

should be used to generate hash. Usually, "write" and "read" return

148

different booleans. And "raw" returns a same boolean as "write".

153

different booleans. And "raw" returns a same boolean as "write".

149

154

150

Note: The 'raw' transform is used for changegroup generation and in some

155

Note: The 'raw' transform is used for changegroup generation and in some

151

debug commands. In this case the transform only indicates whether the

156

debug commands. In this case the transform only indicates whether the

152

contents can be used for hash integrity checks.

157

contents can be used for hash integrity checks.

153

"""

158

"""

154

_insertflagprocessor(flag, processor, _flagprocessors)

159

_insertflagprocessor(flag, processor, _flagprocessors)

155

160

156

def _insertflagprocessor(flag, processor, flagprocessors):

161

def _insertflagprocessor(flag, processor, flagprocessors):

157

if not flag & REVIDX_KNOWN_FLAGS:

162

if not flag & REVIDX_KNOWN_FLAGS:

158

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

163

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

159

raise error.ProgrammingError(msg)

164

raise error.ProgrammingError(msg)

160

if flag not in REVIDX_FLAGS_ORDER:

165

if flag not in REVIDX_FLAGS_ORDER:

161

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

166

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

162

raise error.ProgrammingError(msg)

167

raise error.ProgrammingError(msg)

163

if flag in flagprocessors:

168

if flag in flagprocessors:

164

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

169

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

165

raise error.Abort(msg)

170

raise error.Abort(msg)

166

flagprocessors[flag] = processor

171

flagprocessors[flag] = processor

167

172

168

def getoffset(q):

173

def getoffset(q):

169

return int(q >> 16)

174

return int(q >> 16)

170

175

171

def gettype(q):

176

def gettype(q):

172

return int(q & 0xFFFF)

177

return int(q & 0xFFFF)

173

178

174

def offset_type(offset, type):

179

def offset_type(offset, type):

175

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

180

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

176

raise ValueError('unknown revlog index flags')

181

raise ValueError('unknown revlog index flags')

177

return int(int(offset) << 16 | type)

182

return int(int(offset) << 16 | type)

178

183

179

@attr.s(slots=True, frozen=True)

184

@attr.s(slots=True, frozen=True)

180

class _revisioninfo(object):

185

class _revisioninfo(object):

181

"""Information about a revision that allows building its fulltext

186

"""Information about a revision that allows building its fulltext

182

node: expected hash of the revision

187

node: expected hash of the revision

183

p1, p2: parent revs of the revision

188

p1, p2: parent revs of the revision

184

btext: built text cache consisting of a one-element list

189

btext: built text cache consisting of a one-element list

185

cachedelta: (baserev, uncompressed_delta) or None

190

cachedelta: (baserev, uncompressed_delta) or None

186

flags: flags associated to the revision storage

191

flags: flags associated to the revision storage

187

192

188

One of btext[0] or cachedelta must be set.

193

One of btext[0] or cachedelta must be set.

189

"""

194

"""

190

node = attr.ib()

195

node = attr.ib()

191

p1 = attr.ib()

196

p1 = attr.ib()

192

p2 = attr.ib()

197

p2 = attr.ib()

193

btext = attr.ib()

198

btext = attr.ib()

194

textlen = attr.ib()

199

textlen = attr.ib()

195

cachedelta = attr.ib()

200

cachedelta = attr.ib()

196

flags = attr.ib()

201

flags = attr.ib()

197

202

198

@interfaceutil.implementer(repository.irevisiondelta)

203

@interfaceutil.implementer(repository.irevisiondelta)

199

@attr.s(slots=True)

204

@attr.s(slots=True)

200

class revlogrevisiondelta(object):

205

class revlogrevisiondelta(object):

201

node = attr.ib()

206

node = attr.ib()

202

p1node = attr.ib()

207

p1node = attr.ib()

203

p2node = attr.ib()

208

p2node = attr.ib()

204

basenode = attr.ib()

209

basenode = attr.ib()

205

flags = attr.ib()

210

flags = attr.ib()

206

baserevisionsize = attr.ib()

211

baserevisionsize = attr.ib()

207

revision = attr.ib()

212

revision = attr.ib()

208

delta = attr.ib()

213

delta = attr.ib()

209

linknode = attr.ib(default=None)

214

linknode = attr.ib(default=None)

210

215

211

@interfaceutil.implementer(repository.iverifyproblem)

216

@interfaceutil.implementer(repository.iverifyproblem)

212

@attr.s(frozen=True)

217

@attr.s(frozen=True)

213

class revlogproblem(object):

218

class revlogproblem(object):

214

warning = attr.ib(default=None)

219

warning = attr.ib(default=None)

215

error = attr.ib(default=None)

220

error = attr.ib(default=None)

216

node = attr.ib(default=None)

221

node = attr.ib(default=None)

217

222

218

# index v0:

223

# index v0:

219

# 4 bytes: offset

224

# 4 bytes: offset

220

# 4 bytes: compressed length

225

# 4 bytes: compressed length

221

# 4 bytes: base rev

226

# 4 bytes: base rev

222

# 4 bytes: link rev

227

# 4 bytes: link rev

223

# 20 bytes: parent 1 nodeid

228

# 20 bytes: parent 1 nodeid

224

# 20 bytes: parent 2 nodeid

229

# 20 bytes: parent 2 nodeid

225

# 20 bytes: nodeid

230

# 20 bytes: nodeid

226

indexformatv0 = struct.Struct(">4l20s20s20s")

231

indexformatv0 = struct.Struct(">4l20s20s20s")

227

indexformatv0_pack = indexformatv0.pack

232

indexformatv0_pack = indexformatv0.pack

228

indexformatv0_unpack = indexformatv0.unpack

233

indexformatv0_unpack = indexformatv0.unpack

229

234

230

class revlogoldindex(list):

235

class revlogoldindex(list):

231

def __getitem__(self, i):

236

def __getitem__(self, i):

232

if i == -1:

237

if i == -1:

233

return (0, 0, 0, -1, -1, -1, -1, nullid)

238

return (0, 0, 0, -1, -1, -1, -1, nullid)

234

return list.__getitem__(self, i)

239

return list.__getitem__(self, i)

235

240

236

class revlogoldio(object):

241

class revlogoldio(object):

237

def __init__(self):

242

def __init__(self):

238

self.size = indexformatv0.size

243

self.size = indexformatv0.size

239

244

240

def parseindex(self, data, inline):

245

def parseindex(self, data, inline):

241

s = self.size

246

s = self.size

242

index = []

247

index = []

243

nodemap = {nullid: nullrev}

248

nodemap = {nullid: nullrev}

244

n = off = 0

249

n = off = 0

245

l = len(data)

250

l = len(data)

246

while off + s <= l:

251

while off + s <= l:

247

cur = data[off:off + s]

252

cur = data[off:off + s]

248

off += s

253

off += s

249

e = indexformatv0_unpack(cur)

254

e = indexformatv0_unpack(cur)

250

# transform to revlogv1 format

255

# transform to revlogv1 format

251

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

256

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

252

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

257

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

253

index.append(e2)

258

index.append(e2)

254

nodemap[e[6]] = n

259

nodemap[e[6]] = n

255

n += 1

260

n += 1

256

261

257

return revlogoldindex(index), nodemap, None

262

return revlogoldindex(index), nodemap, None

258

263

259

def packentry(self, entry, node, version, rev):

264

def packentry(self, entry, node, version, rev):

260

if gettype(entry[0]):

265

if gettype(entry[0]):

261

raise error.RevlogError(_('index entry flags need revlog '

266

raise error.RevlogError(_('index entry flags need revlog '

262

'version 1'))

267

'version 1'))

263

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

268

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

264

node(entry[5]), node(entry[6]), entry[7])

269

node(entry[5]), node(entry[6]), entry[7])

265

return indexformatv0_pack(*e2)

270

return indexformatv0_pack(*e2)

266

271

267

# index ng:

272

# index ng:

268

# 6 bytes: offset

273

# 6 bytes: offset

269

# 2 bytes: flags

274

# 2 bytes: flags

270

# 4 bytes: compressed length

275

# 4 bytes: compressed length

271

# 4 bytes: uncompressed length

276

# 4 bytes: uncompressed length

272

# 4 bytes: base rev

277

# 4 bytes: base rev

273

# 4 bytes: link rev

278

# 4 bytes: link rev

274

# 4 bytes: parent 1 rev

279

# 4 bytes: parent 1 rev

275

# 4 bytes: parent 2 rev

280

# 4 bytes: parent 2 rev

276

# 32 bytes: nodeid

281

# 32 bytes: nodeid

277

indexformatng = struct.Struct(">Qiiiiii20s12x")

282

indexformatng = struct.Struct(">Qiiiiii20s12x")

278

indexformatng_pack = indexformatng.pack

283

indexformatng_pack = indexformatng.pack

279

versionformat = struct.Struct(">I")

284

versionformat = struct.Struct(">I")

280

versionformat_pack = versionformat.pack

285

versionformat_pack = versionformat.pack

281

versionformat_unpack = versionformat.unpack

286

versionformat_unpack = versionformat.unpack

282

287

283

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

288

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

284

# signed integer)

289

# signed integer)

285

_maxentrysize = 0x7fffffff

290

_maxentrysize = 0x7fffffff

286

291

287

class revlogio(object):

292

class revlogio(object):

288

def __init__(self):

293

def __init__(self):

289

self.size = indexformatng.size

294

self.size = indexformatng.size

290

295

291

def parseindex(self, data, inline):

296

def parseindex(self, data, inline):

292

# call the C implementation to parse the index data

297

# call the C implementation to parse the index data

293

index, cache = parsers.parse_index2(data, inline)

298

index, cache = parsers.parse_index2(data, inline)

294

return index, getattr(index, 'nodemap', None), cache

299

return index, getattr(index, 'nodemap', None), cache

295

300

296

def packentry(self, entry, node, version, rev):

301

def packentry(self, entry, node, version, rev):

297

p = indexformatng_pack(*entry)

302

p = indexformatng_pack(*entry)

298

if rev == 0:

303

if rev == 0:

299

p = versionformat_pack(version) + p[4:]

304

p = versionformat_pack(version) + p[4:]

300

return p

305

return p

301

306

302

class revlog(object):

307

class revlog(object):

303

"""

308

"""

304

the underlying revision storage object

309

the underlying revision storage object

305

310

306

A revlog consists of two parts, an index and the revision data.

311

A revlog consists of two parts, an index and the revision data.

307

312

308

The index is a file with a fixed record size containing

313

The index is a file with a fixed record size containing

309

information on each revision, including its nodeid (hash), the

314

information on each revision, including its nodeid (hash), the

310

nodeids of its parents, the position and offset of its data within

315

nodeids of its parents, the position and offset of its data within

311

the data file, and the revision it's based on. Finally, each entry

316

the data file, and the revision it's based on. Finally, each entry

312

contains a linkrev entry that can serve as a pointer to external

317

contains a linkrev entry that can serve as a pointer to external

313

data.

318

data.

314

319

315

The revision data itself is a linear collection of data chunks.

320

The revision data itself is a linear collection of data chunks.

316

Each chunk represents a revision and is usually represented as a

321

Each chunk represents a revision and is usually represented as a

317

delta against the previous chunk. To bound lookup time, runs of

322

delta against the previous chunk. To bound lookup time, runs of

318

deltas are limited to about 2 times the length of the original

323

deltas are limited to about 2 times the length of the original

319

version data. This makes retrieval of a version proportional to

324

version data. This makes retrieval of a version proportional to

320

its size, or O(1) relative to the number of revisions.

325

its size, or O(1) relative to the number of revisions.

321

326

322

Both pieces of the revlog are written to in an append-only

327

Both pieces of the revlog are written to in an append-only

323

fashion, which means we never need to rewrite a file to insert or

328

fashion, which means we never need to rewrite a file to insert or

324

remove data, and can use some simple techniques to avoid the need

329

remove data, and can use some simple techniques to avoid the need

325

for locking while reading.

330

for locking while reading.

326

331

327

If checkambig, indexfile is opened with checkambig=True at

332

If checkambig, indexfile is opened with checkambig=True at

328

writing, to avoid file stat ambiguity.

333

writing, to avoid file stat ambiguity.

329

334

330

If mmaplargeindex is True, and an mmapindexthreshold is set, the

335

If mmaplargeindex is True, and an mmapindexthreshold is set, the

331

index will be mmapped rather than read if it is larger than the

336

index will be mmapped rather than read if it is larger than the

332

configured threshold.

337

configured threshold.

333

338

334

If censorable is True, the revlog can have censored revisions.

339

If censorable is True, the revlog can have censored revisions.

335

"""

340

"""

336

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

341

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

337

mmaplargeindex=False, censorable=False):

342

mmaplargeindex=False, censorable=False):

338

"""

343

"""

339

create a revlog object

344

create a revlog object

340

345

341

opener is a function that abstracts the file opening operation

346

opener is a function that abstracts the file opening operation

342

and can be used to implement COW semantics or the like.

347

and can be used to implement COW semantics or the like.

343

"""

348

"""

344

self.indexfile = indexfile

349

self.indexfile = indexfile

345

self.datafile = datafile or (indexfile[:-2] + ".d")

350

self.datafile = datafile or (indexfile[:-2] + ".d")

346

self.opener = opener

351

self.opener = opener

347

# When True, indexfile is opened with checkambig=True at writing, to

352

# When True, indexfile is opened with checkambig=True at writing, to

348

# avoid file stat ambiguity.

353

# avoid file stat ambiguity.

349

self._checkambig = checkambig

354

self._checkambig = checkambig

350

self._censorable = censorable

355

self._censorable = censorable

351

# 3-tuple of (node, rev, text) for a raw revision.

356

# 3-tuple of (node, rev, text) for a raw revision.

352

self._revisioncache = None

357

self._revisioncache = None

353

# Maps rev to chain base rev.

358

# Maps rev to chain base rev.

354

self._chainbasecache = util.lrucachedict(100)

359

self._chainbasecache = util.lrucachedict(100)

355

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

360

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

356

self._chunkcache = (0, '')

361

self._chunkcache = (0, '')

357

# How much data to read and cache into the raw revlog data cache.

362

# How much data to read and cache into the raw revlog data cache.

358

self._chunkcachesize = 65536

363

self._chunkcachesize = 65536

359

self._maxchainlen = None

364

self._maxchainlen = None

360

self._deltabothparents = True

365

self._deltabothparents = True

361

self.index = []

366

self.index = []

362

# Mapping of partial identifiers to full nodes.

367

# Mapping of partial identifiers to full nodes.

363

self._pcache = {}

368

self._pcache = {}

364

# Mapping of revision integer to full node.

369

# Mapping of revision integer to full node.

365

self._nodecache = {nullid: nullrev}

370

self._nodecache = {nullid: nullrev}

366

self._nodepos = None

371

self._nodepos = None

367

self._compengine = 'zlib'

372

self._compengine = 'zlib'

368

self._maxdeltachainspan = -1

373

self._maxdeltachainspan = -1

369

self._withsparseread = False

374

self._withsparseread = False

370

self._sparserevlog = False

375

self._sparserevlog = False

371

self._srdensitythreshold = 0.50

376

self._srdensitythreshold = 0.50

372

self._srmingapsize = 262144

377

self._srmingapsize = 262144

373

378

374

# Make copy of flag processors so each revlog instance can support

379

# Make copy of flag processors so each revlog instance can support

375

# custom flags.

380

# custom flags.

376

self._flagprocessors = dict(_flagprocessors)

381

self._flagprocessors = dict(_flagprocessors)

377

382

378

# 2-tuple of file handles being used for active writing.

383

# 2-tuple of file handles being used for active writing.

379

self._writinghandles = None

384

self._writinghandles = None

380

385

381

mmapindexthreshold = None

386

mmapindexthreshold = None

382

v = REVLOG_DEFAULT_VERSION

387

v = REVLOG_DEFAULT_VERSION

383

opts = getattr(opener, 'options', None)

388

opts = getattr(opener, 'options', None)

384

if opts is not None:

389

if opts is not None:

385

if 'revlogv2' in opts:

390

if 'revlogv2' in opts:

386

# version 2 revlogs always use generaldelta.

391

# version 2 revlogs always use generaldelta.

387

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

392

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

388

elif 'revlogv1' in opts:

393

elif 'revlogv1' in opts:

389

if 'generaldelta' in opts:

394

if 'generaldelta' in opts:

390

v |= FLAG_GENERALDELTA

395

v |= FLAG_GENERALDELTA

391

else:

396

else:

392

v = 0

397

v = 0

393

if 'chunkcachesize' in opts:

398

if 'chunkcachesize' in opts:

394

self._chunkcachesize = opts['chunkcachesize']

399

self._chunkcachesize = opts['chunkcachesize']

395

if 'maxchainlen' in opts:

400

if 'maxchainlen' in opts:

396

self._maxchainlen = opts['maxchainlen']

401

self._maxchainlen = opts['maxchainlen']

397

if 'deltabothparents' in opts:

402

if 'deltabothparents' in opts:

398

self._deltabothparents = opts['deltabothparents']

403

self._deltabothparents = opts['deltabothparents']

399

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

404

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

400

if 'compengine' in opts:

405

if 'compengine' in opts:

401

self._compengine = opts['compengine']

406

self._compengine = opts['compengine']

402

if 'maxdeltachainspan' in opts:

407

if 'maxdeltachainspan' in opts:

403

self._maxdeltachainspan = opts['maxdeltachainspan']

408

self._maxdeltachainspan = opts['maxdeltachainspan']

404

if mmaplargeindex and 'mmapindexthreshold' in opts:

409

if mmaplargeindex and 'mmapindexthreshold' in opts:

405

mmapindexthreshold = opts['mmapindexthreshold']

410

mmapindexthreshold = opts['mmapindexthreshold']

406

self._sparserevlog = bool(opts.get('sparse-revlog', False))

411

self._sparserevlog = bool(opts.get('sparse-revlog', False))

407

withsparseread = bool(opts.get('with-sparse-read', False))

412

withsparseread = bool(opts.get('with-sparse-read', False))

408

# sparse-revlog forces sparse-read

413

# sparse-revlog forces sparse-read

409

self._withsparseread = self._sparserevlog or withsparseread

414

self._withsparseread = self._sparserevlog or withsparseread

410

if 'sparse-read-density-threshold' in opts:

415

if 'sparse-read-density-threshold' in opts:

411

self._srdensitythreshold = opts['sparse-read-density-threshold']

416

self._srdensitythreshold = opts['sparse-read-density-threshold']

412

if 'sparse-read-min-gap-size' in opts:

417

if 'sparse-read-min-gap-size' in opts:

413

self._srmingapsize = opts['sparse-read-min-gap-size']

418

self._srmingapsize = opts['sparse-read-min-gap-size']

414

if opts.get('enableellipsis'):

419

if opts.get('enableellipsis'):

415

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

420

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

416

421

417

# revlog v0 doesn't have flag processors

422

# revlog v0 doesn't have flag processors

418

for flag, processor in opts.get(b'flagprocessors', {}).iteritems():

423

for flag, processor in opts.get(b'flagprocessors', {}).iteritems():

419

_insertflagprocessor(flag, processor, self._flagprocessors)

424

_insertflagprocessor(flag, processor, self._flagprocessors)

420

425

421

if self._chunkcachesize <= 0:

426

if self._chunkcachesize <= 0:

422

raise error.RevlogError(_('revlog chunk cache size %r is not '

427

raise error.RevlogError(_('revlog chunk cache size %r is not '

423

'greater than 0') % self._chunkcachesize)

428

'greater than 0') % self._chunkcachesize)

424

elif self._chunkcachesize & (self._chunkcachesize - 1):

429

elif self._chunkcachesize & (self._chunkcachesize - 1):

425

raise error.RevlogError(_('revlog chunk cache size %r is not a '

430

raise error.RevlogError(_('revlog chunk cache size %r is not a '

426

'power of 2') % self._chunkcachesize)

431

'power of 2') % self._chunkcachesize)

427

432

428

self._loadindex(v, mmapindexthreshold)

433

self._loadindex(v, mmapindexthreshold)

429

434

430

def _loadindex(self, v, mmapindexthreshold):

435

def _loadindex(self, v, mmapindexthreshold):

431

indexdata = ''

436

indexdata = ''

432

self._initempty = True

437

self._initempty = True

433

try:

438

try:

434

with self._indexfp() as f:

439

with self._indexfp() as f:

435

if (mmapindexthreshold is not None and

440

if (mmapindexthreshold is not None and

436

self.opener.fstat(f).st_size >= mmapindexthreshold):

441

self.opener.fstat(f).st_size >= mmapindexthreshold):

437

indexdata = util.buffer(util.mmapread(f))

442

indexdata = util.buffer(util.mmapread(f))

438

else:

443

else:

439

indexdata = f.read()

444

indexdata = f.read()

440

if len(indexdata) > 0:

445

if len(indexdata) > 0:

441

v = versionformat_unpack(indexdata[:4])[0]

446

v = versionformat_unpack(indexdata[:4])[0]

442

self._initempty = False

447

self._initempty = False

443

except IOError as inst:

448

except IOError as inst:

444

if inst.errno != errno.ENOENT:

449

if inst.errno != errno.ENOENT:

445

raise

450

raise

446

451

447

self.version = v

452

self.version = v

448

self._inline = v & FLAG_INLINE_DATA

453

self._inline = v & FLAG_INLINE_DATA

449

self._generaldelta = v & FLAG_GENERALDELTA

454

self._generaldelta = v & FLAG_GENERALDELTA

450

flags = v & ~0xFFFF

455

flags = v & ~0xFFFF

451

fmt = v & 0xFFFF

456

fmt = v & 0xFFFF

452

if fmt == REVLOGV0:

457

if fmt == REVLOGV0:

453

if flags:

458

if flags:

454

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

459

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

455

'revlog %s') %

460

'revlog %s') %

456

(flags >> 16, fmt, self.indexfile))

461

(flags >> 16, fmt, self.indexfile))

457

elif fmt == REVLOGV1:

462

elif fmt == REVLOGV1:

458

if flags & ~REVLOGV1_FLAGS:

463

if flags & ~REVLOGV1_FLAGS:

459

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

464

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

460

'revlog %s') %

465

'revlog %s') %

461

(flags >> 16, fmt, self.indexfile))

466

(flags >> 16, fmt, self.indexfile))

462

elif fmt == REVLOGV2:

467

elif fmt == REVLOGV2:

463

if flags & ~REVLOGV2_FLAGS:

468

if flags & ~REVLOGV2_FLAGS:

464

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

469

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

465

'revlog %s') %

470

'revlog %s') %

466

(flags >> 16, fmt, self.indexfile))

471

(flags >> 16, fmt, self.indexfile))

467

else:

472

else:

468

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

473

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

469

(fmt, self.indexfile))

474

(fmt, self.indexfile))

470

475

471

self._storedeltachains = True

476

self._storedeltachains = True

472

477

473

self._io = revlogio()

478

self._io = revlogio()

474

if self.version == REVLOGV0:

479

if self.version == REVLOGV0:

475

self._io = revlogoldio()

480

self._io = revlogoldio()

476

try:

481

try:

477

d = self._io.parseindex(indexdata, self._inline)

482

d = self._io.parseindex(indexdata, self._inline)

478

except (ValueError, IndexError):

483

except (ValueError, IndexError):

479

raise error.RevlogError(_("index %s is corrupted") %

484

raise error.RevlogError(_("index %s is corrupted") %

480

self.indexfile)

485

self.indexfile)

481

self.index, nodemap, self._chunkcache = d

486

self.index, nodemap, self._chunkcache = d

482

if nodemap is not None:

487

if nodemap is not None:

483

self.nodemap = self._nodecache = nodemap

488

self.nodemap = self._nodecache = nodemap

484

if not self._chunkcache:

489

if not self._chunkcache:

485

self._chunkclear()

490

self._chunkclear()

486

# revnum -> (chain-length, sum-delta-length)

491

# revnum -> (chain-length, sum-delta-length)

487

self._chaininfocache = {}

492

self._chaininfocache = {}

488

# revlog header -> revlog compressor

493

# revlog header -> revlog compressor

489

self._decompressors = {}

494

self._decompressors = {}

490

495

491

@util.propertycache

496

@util.propertycache

492

def _compressor(self):

497

def _compressor(self):

493

return util.compengines[self._compengine].revlogcompressor()

498

return util.compengines[self._compengine].revlogcompressor()

494

499

495

def _indexfp(self, mode='r'):

500

def _indexfp(self, mode='r'):

496

"""file object for the revlog's index file"""

501

"""file object for the revlog's index file"""

497

args = {r'mode': mode}

502

args = {r'mode': mode}

498

if mode != 'r':

503

if mode != 'r':

499

args[r'checkambig'] = self._checkambig

504

args[r'checkambig'] = self._checkambig

500

if mode == 'w':

505

if mode == 'w':

501

args[r'atomictemp'] = True

506

args[r'atomictemp'] = True

502

return self.opener(self.indexfile, **args)

507

return self.opener(self.indexfile, **args)

503

508

504

def _datafp(self, mode='r'):

509

def _datafp(self, mode='r'):

505

"""file object for the revlog's data file"""

510

"""file object for the revlog's data file"""

506

return self.opener(self.datafile, mode=mode)

511

return self.opener(self.datafile, mode=mode)

507

512

508

@contextlib.contextmanager

513

@contextlib.contextmanager

509

def _datareadfp(self, existingfp=None):

514

def _datareadfp(self, existingfp=None):

510

"""file object suitable to read data"""

515

"""file object suitable to read data"""

511

# Use explicit file handle, if given.

516

# Use explicit file handle, if given.

512

if existingfp is not None:

517

if existingfp is not None:

513

yield existingfp

518

yield existingfp

514

519

515

# Use a file handle being actively used for writes, if available.

520

# Use a file handle being actively used for writes, if available.

516

# There is some danger to doing this because reads will seek the

521

# There is some danger to doing this because reads will seek the

517

# file. However, _writeentry() performs a SEEK_END before all writes,

522

# file. However, _writeentry() performs a SEEK_END before all writes,

518

# so we should be safe.

523

# so we should be safe.

519

elif self._writinghandles:

524

elif self._writinghandles:

520

if self._inline:

525

if self._inline:

521

yield self._writinghandles[0]

526

yield self._writinghandles[0]

522

else:

527

else:

523

yield self._writinghandles[1]

528

yield self._writinghandles[1]

524

529

525

# Otherwise open a new file handle.

530

# Otherwise open a new file handle.

526

else:

531

else:

527

if self._inline:

532

if self._inline:

528

func = self._indexfp

533

func = self._indexfp

529

else:

534

else:

530

func = self._datafp

535

func = self._datafp

531

with func() as fp:

536

with func() as fp:

532

yield fp

537

yield fp

533

538

534

def tip(self):

539

def tip(self):

535

return self.node(len(self.index) - 1)

540

return self.node(len(self.index) - 1)

536

def __contains__(self, rev):

541

def __contains__(self, rev):

537

return 0 <= rev < len(self)

542

return 0 <= rev < len(self)

538

def __len__(self):

543

def __len__(self):

539

return len(self.index)

544

return len(self.index)

540

def __iter__(self):

545

def __iter__(self):

541

return iter(pycompat.xrange(len(self)))

546

return iter(pycompat.xrange(len(self)))

542

def revs(self, start=0, stop=None):

547

def revs(self, start=0, stop=None):

543

"""iterate over all rev in this revlog (from start to stop)"""

548

"""iterate over all rev in this revlog (from start to stop)"""

544

return storageutil.iterrevs(len(self), start=start, stop=stop)

549

return storageutil.iterrevs(len(self), start=start, stop=stop)

545

550

546

@util.propertycache

551

@util.propertycache

547

def nodemap(self):

552

def nodemap(self):

548

if self.index:

553

if self.index:

549

# populate mapping down to the initial node

554

# populate mapping down to the initial node

550

node0 = self.index[0][7] # get around changelog filtering

555

node0 = self.index[0][7] # get around changelog filtering

551

self.rev(node0)

556

self.rev(node0)

552

return self._nodecache

557

return self._nodecache

553

558

554

def hasnode(self, node):

559

def hasnode(self, node):

555

try:

560

try:

556

self.rev(node)

561

self.rev(node)

557

return True

562

return True

558

except KeyError:

563

except KeyError:

559

return False

564

return False

560

565

561

def candelta(self, baserev, rev):

566

def candelta(self, baserev, rev):

562

"""whether two revisions (baserev, rev) can be delta-ed or not"""

567

"""whether two revisions (baserev, rev) can be delta-ed or not"""

563

# Disable delta if either rev requires a content-changing flag

568

# Disable delta if either rev requires a content-changing flag

564

# processor (ex. LFS). This is because such flag processor can alter

569

# processor (ex. LFS). This is because such flag processor can alter

565

# the rawtext content that the delta will be based on, and two clients

570

# the rawtext content that the delta will be based on, and two clients

566

# could have a same revlog node with different flags (i.e. different

571

# could have a same revlog node with different flags (i.e. different

567

# rawtext contents) and the delta could be incompatible.

572

# rawtext contents) and the delta could be incompatible.

568

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

573

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

569

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

574

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

570

return False

575

return False

571

return True

576

return True

572

577

573

def clearcaches(self):

578

def clearcaches(self):

574

self._revisioncache = None

579

self._revisioncache = None

575

self._chainbasecache.clear()

580

self._chainbasecache.clear()

576

self._chunkcache = (0, '')

581

self._chunkcache = (0, '')

577

self._pcache = {}

582

self._pcache = {}

578

583

579

try:

584

try:

580

self._nodecache.clearcaches()

585

self._nodecache.clearcaches()

581

except AttributeError:

586

except AttributeError:

582

self._nodecache = {nullid: nullrev}

587

self._nodecache = {nullid: nullrev}

583

self._nodepos = None

588

self._nodepos = None

584

589

585

def rev(self, node):

590

def rev(self, node):

586

try:

591

try:

587

return self._nodecache[node]

592

return self._nodecache[node]

588

except TypeError:

593

except TypeError:

589

raise

594

raise

590

except error.RevlogError:

595

except error.RevlogError:

591

# parsers.c radix tree lookup failed

596

# parsers.c radix tree lookup failed

592

if node == wdirid or node in wdirfilenodeids:

597

if node == wdirid or node in wdirfilenodeids:

593

raise error.WdirUnsupported

598

raise error.WdirUnsupported

594

raise error.LookupError(node, self.indexfile, _('no node'))

599

raise error.LookupError(node, self.indexfile, _('no node'))

595

except KeyError:

600

except KeyError:

596

# pure python cache lookup failed

601

# pure python cache lookup failed

597

n = self._nodecache

602

n = self._nodecache

598

i = self.index

603

i = self.index

599

p = self._nodepos

604

p = self._nodepos

600

if p is None:

605

if p is None:

601

p = len(i) - 1

606

p = len(i) - 1

602

else:

607

else:

603

assert p < len(i)

608

assert p < len(i)

604

for r in pycompat.xrange(p, -1, -1):

609

for r in pycompat.xrange(p, -1, -1):

605

v = i[r][7]

610

v = i[r][7]

606

n[v] = r

611

n[v] = r

607

if v == node:

612

if v == node:

608

self._nodepos = r - 1

613

self._nodepos = r - 1

609

return r

614

return r

610

if node == wdirid or node in wdirfilenodeids:

615

if node == wdirid or node in wdirfilenodeids:

611

raise error.WdirUnsupported

616

raise error.WdirUnsupported

612

raise error.LookupError(node, self.indexfile, _('no node'))

617

raise error.LookupError(node, self.indexfile, _('no node'))

613

618

614

# Accessors for index entries.

619

# Accessors for index entries.

615

620

616

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

621

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

617

# are flags.

622

# are flags.

618

def start(self, rev):

623

def start(self, rev):

619

return int(self.index[rev][0] >> 16)

624

return int(self.index[rev][0] >> 16)

620

625

621

def flags(self, rev):

626

def flags(self, rev):

622

return self.index[rev][0] & 0xFFFF

627

return self.index[rev][0] & 0xFFFF

623

628

624

def length(self, rev):

629

def length(self, rev):

625

return self.index[rev][1]

630

return self.index[rev][1]

626

631

627

def rawsize(self, rev):

632

def rawsize(self, rev):

628

"""return the length of the uncompressed text for a given revision"""

633

"""return the length of the uncompressed text for a given revision"""

629

l = self.index[rev][2]

634

l = self.index[rev][2]

630

if l >= 0:

635

if l >= 0:

631

return l

636

return l

632

637

633

t = self.revision(rev, raw=True)

638

t = self.revision(rev, raw=True)

634

return len(t)

639

return len(t)

635

640

636

def size(self, rev):

641

def size(self, rev):

637

"""length of non-raw text (processed by a "read" flag processor)"""

642

"""length of non-raw text (processed by a "read" flag processor)"""

638

# fast path: if no "read" flag processor could change the content,

643

# fast path: if no "read" flag processor could change the content,

639

# size is rawsize. note: ELLIPSIS is known to not change the content.

644

# size is rawsize. note: ELLIPSIS is known to not change the content.

640

flags = self.flags(rev)

645

flags = self.flags(rev)

641

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

646

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

642

return self.rawsize(rev)

647

return self.rawsize(rev)

643

648

644

return len(self.revision(rev, raw=False))

649

return len(self.revision(rev, raw=False))

645

650

646

def chainbase(self, rev):

651

def chainbase(self, rev):

647

base = self._chainbasecache.get(rev)

652

base = self._chainbasecache.get(rev)

648

if base is not None:

653

if base is not None:

649

return base

654

return base

650

655

651

index = self.index

656

index = self.index

652

iterrev = rev

657

iterrev = rev

653

base = index[iterrev][3]

658

base = index[iterrev][3]

654

while base != iterrev:

659

while base != iterrev:

655

iterrev = base

660

iterrev = base

656

base = index[iterrev][3]

661

base = index[iterrev][3]

657

662

658

self._chainbasecache[rev] = base

663

self._chainbasecache[rev] = base

659

return base

664

return base

660

665

661

def linkrev(self, rev):

666

def linkrev(self, rev):

662

return self.index[rev][4]

667

return self.index[rev][4]

663

668

664

def parentrevs(self, rev):

669

def parentrevs(self, rev):

665

try:

670

try:

666

entry = self.index[rev]

671

entry = self.index[rev]

667

except IndexError:

672

except IndexError:

668

if rev == wdirrev:

673

if rev == wdirrev:

669

raise error.WdirUnsupported

674

raise error.WdirUnsupported

670

raise

675

raise

671

676

672

return entry[5], entry[6]

677

return entry[5], entry[6]

673

678

674

# fast parentrevs(rev) where rev isn't filtered

679

# fast parentrevs(rev) where rev isn't filtered

675

_uncheckedparentrevs = parentrevs

680

_uncheckedparentrevs = parentrevs

676

681

677

def node(self, rev):

682

def node(self, rev):

678

try:

683

try:

679

return self.index[rev][7]

684

return self.index[rev][7]

680

except IndexError:

685

except IndexError:

681

if rev == wdirrev:

686

if rev == wdirrev:

682

raise error.WdirUnsupported

687

raise error.WdirUnsupported

683

raise

688

raise

684

689

685

# Derived from index values.

690

# Derived from index values.

686

691

687

def end(self, rev):

692

def end(self, rev):

688

return self.start(rev) + self.length(rev)

693

return self.start(rev) + self.length(rev)

689

694

690

def parents(self, node):

695

def parents(self, node):

691

i = self.index

696

i = self.index

692

d = i[self.rev(node)]

697

d = i[self.rev(node)]

693

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

698

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

694

699

695

def chainlen(self, rev):

700

def chainlen(self, rev):

696

return self._chaininfo(rev)[0]

701

return self._chaininfo(rev)[0]

697

702

698

def _chaininfo(self, rev):

703

def _chaininfo(self, rev):

699

chaininfocache = self._chaininfocache

704

chaininfocache = self._chaininfocache

700

if rev in chaininfocache:

705

if rev in chaininfocache:

701

return chaininfocache[rev]

706

return chaininfocache[rev]

702

index = self.index

707

index = self.index

703

generaldelta = self._generaldelta

708

generaldelta = self._generaldelta

704

iterrev = rev

709

iterrev = rev

705

e = index[iterrev]

710

e = index[iterrev]

706

clen = 0

711

clen = 0

707

compresseddeltalen = 0

712

compresseddeltalen = 0

708

while iterrev != e[3]:

713

while iterrev != e[3]:

709

clen += 1

714

clen += 1

710

compresseddeltalen += e[1]

715

compresseddeltalen += e[1]

711

if generaldelta:

716

if generaldelta:

712

iterrev = e[3]

717

iterrev = e[3]

713

else:

718

else:

714

iterrev -= 1

719

iterrev -= 1

715

if iterrev in chaininfocache:

720

if iterrev in chaininfocache:

716

t = chaininfocache[iterrev]

721

t = chaininfocache[iterrev]

717

clen += t[0]

722

clen += t[0]

718

compresseddeltalen += t[1]

723

compresseddeltalen += t[1]

719

break

724

break

720

e = index[iterrev]

725

e = index[iterrev]

721

else:

726

else:

722

# Add text length of base since decompressing that also takes

727

# Add text length of base since decompressing that also takes

723

# work. For cache hits the length is already included.

728

# work. For cache hits the length is already included.

724

compresseddeltalen += e[1]

729

compresseddeltalen += e[1]

725

r = (clen, compresseddeltalen)

730

r = (clen, compresseddeltalen)

726

chaininfocache[rev] = r

731

chaininfocache[rev] = r

727

return r

732

return r

728

733

729

def _deltachain(self, rev, stoprev=None):

734

def _deltachain(self, rev, stoprev=None):

730

"""Obtain the delta chain for a revision.

735

"""Obtain the delta chain for a revision.

731

736

732

``stoprev`` specifies a revision to stop at. If not specified, we

737

``stoprev`` specifies a revision to stop at. If not specified, we

733

stop at the base of the chain.

738

stop at the base of the chain.

734

739

735

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

740

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

736

revs in ascending order and ``stopped`` is a bool indicating whether

741

revs in ascending order and ``stopped`` is a bool indicating whether

737

``stoprev`` was hit.

742

``stoprev`` was hit.

738

"""

743

"""

739

# Try C implementation.

744

# Try C implementation.

740

try:

745

try:

741

return self.index.deltachain(rev, stoprev, self._generaldelta)

746

return self.index.deltachain(rev, stoprev, self._generaldelta)

742

except AttributeError:

747

except AttributeError:

743

pass

748

pass

744

749

745

chain = []

750

chain = []

746

751

747

# Alias to prevent attribute lookup in tight loop.

752

# Alias to prevent attribute lookup in tight loop.

748

index = self.index

753

index = self.index

749

generaldelta = self._generaldelta

754

generaldelta = self._generaldelta

750

755

751

iterrev = rev

756

iterrev = rev

752

e = index[iterrev]

757

e = index[iterrev]

753

while iterrev != e[3] and iterrev != stoprev:

758

while iterrev != e[3] and iterrev != stoprev:

754

chain.append(iterrev)

759

chain.append(iterrev)

755

if generaldelta:

760

if generaldelta:

756

iterrev = e[3]

761

iterrev = e[3]

757

else:

762

else:

758

iterrev -= 1

763

iterrev -= 1

759

e = index[iterrev]

764

e = index[iterrev]

760

765

761

if iterrev == stoprev:

766

if iterrev == stoprev:

762

stopped = True

767

stopped = True

763

else:

768

else:

764

chain.append(iterrev)

769

chain.append(iterrev)

765

stopped = False

770

stopped = False

766

771

767

chain.reverse()

772

chain.reverse()

768

return chain, stopped

773

return chain, stopped

769

774

770

def ancestors(self, revs, stoprev=0, inclusive=False):

775

def ancestors(self, revs, stoprev=0, inclusive=False):

771

"""Generate the ancestors of 'revs' in reverse revision order.

776

"""Generate the ancestors of 'revs' in reverse revision order.

772

Does not generate revs lower than stoprev.

777

Does not generate revs lower than stoprev.

773

778

774

See the documentation for ancestor.lazyancestors for more details."""

779

See the documentation for ancestor.lazyancestors for more details."""

775

780

776

# first, make sure start revisions aren't filtered

781

# first, make sure start revisions aren't filtered

777

revs = list(revs)

782

revs = list(revs)

778

checkrev = self.node

783

checkrev = self.node

779

for r in revs:

784

for r in revs:

780

checkrev(r)

785

checkrev(r)

781

# and we're sure ancestors aren't filtered as well

786

# and we're sure ancestors aren't filtered as well

782

if util.safehasattr(parsers, 'rustlazyancestors'):

787

783

return ancestor.rustlazyancestors(

788

if rustext is not None:

784

self.index, revs,

789

lazyancestors = rustext.ancestor.LazyAncestors

785

stoprev=stoprev, inclusive=inclusive)

790

arg = self.index

786

return ancestor.lazyancestors(self._uncheckedparentrevs, revs,

791

elif util.safehasattr(parsers, 'rustlazyancestors'):

787

stoprev=stoprev, inclusive=inclusive)

792

lazyancestors = ancestor.rustlazyancestors

793

arg = self.index

794

else:

795

lazyancestors = ancestor.lazyancestors

796

arg = self._uncheckedparentrevs

797

return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)

788

798

789

def descendants(self, revs):

799

def descendants(self, revs):

790

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

800

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

791

801

792

def findcommonmissing(self, common=None, heads=None):

802

def findcommonmissing(self, common=None, heads=None):

793

"""Return a tuple of the ancestors of common and the ancestors of heads

803

"""Return a tuple of the ancestors of common and the ancestors of heads

794

that are not ancestors of common. In revset terminology, we return the

804

that are not ancestors of common. In revset terminology, we return the

795

tuple:

805

tuple:

796

806

797

::common, (::heads) - (::common)

807

::common, (::heads) - (::common)

798

808

799

The list is sorted by revision number, meaning it is

809

The list is sorted by revision number, meaning it is

800

topologically sorted.

810

topologically sorted.

801

811

802

'heads' and 'common' are both lists of node IDs. If heads is

812

'heads' and 'common' are both lists of node IDs. If heads is

803

not supplied, uses all of the revlog's heads. If common is not

813

not supplied, uses all of the revlog's heads. If common is not

804

supplied, uses nullid."""

814

supplied, uses nullid."""

805

if common is None:

815

if common is None:

806

common = [nullid]

816

common = [nullid]

807

if heads is None:

817

if heads is None:

808

heads = self.heads()

818

heads = self.heads()

809

819

810

common = [self.rev(n) for n in common]

820

common = [self.rev(n) for n in common]

811

heads = [self.rev(n) for n in heads]

821

heads = [self.rev(n) for n in heads]

812

822

813

# we want the ancestors, but inclusive

823

# we want the ancestors, but inclusive

814

class lazyset(object):

824

class lazyset(object):

815

def __init__(self, lazyvalues):

825

def __init__(self, lazyvalues):

816

self.addedvalues = set()

826

self.addedvalues = set()

817

self.lazyvalues = lazyvalues

827

self.lazyvalues = lazyvalues

818

828

819

def __contains__(self, value):

829

def __contains__(self, value):

820

return value in self.addedvalues or value in self.lazyvalues

830

return value in self.addedvalues or value in self.lazyvalues

821

831

822

def __iter__(self):

832

def __iter__(self):

823

added = self.addedvalues

833

added = self.addedvalues

824

for r in added:

834

for r in added:

825

yield r

835

yield r

826

for r in self.lazyvalues:

836

for r in self.lazyvalues:

827

if not r in added:

837

if not r in added:

828

yield r

838

yield r

829

839

830

def add(self, value):

840

def add(self, value):

831

self.addedvalues.add(value)

841

self.addedvalues.add(value)

832

842

833

def update(self, values):

843

def update(self, values):

834

self.addedvalues.update(values)

844

self.addedvalues.update(values)

835

845

836

has = lazyset(self.ancestors(common))

846

has = lazyset(self.ancestors(common))

837

has.add(nullrev)

847

has.add(nullrev)

838

has.update(common)

848

has.update(common)

839

849

840

# take all ancestors from heads that aren't in has

850

# take all ancestors from heads that aren't in has

841

missing = set()

851

missing = set()

842

visit = collections.deque(r for r in heads if r not in has)

852

visit = collections.deque(r for r in heads if r not in has)

843

while visit:

853

while visit:

844

r = visit.popleft()

854

r = visit.popleft()

845

if r in missing:

855

if r in missing:

846

continue

856

continue

847

else:

857

else:

848

missing.add(r)

858

missing.add(r)

849

for p in self.parentrevs(r):

859

for p in self.parentrevs(r):

850

if p not in has:

860

if p not in has:

851

visit.append(p)

861

visit.append(p)

852

missing = list(missing)

862

missing = list(missing)

853

missing.sort()

863

missing.sort()

854

return has, [self.node(miss) for miss in missing]

864

return has, [self.node(miss) for miss in missing]

855

865

856

def incrementalmissingrevs(self, common=None):

866

def incrementalmissingrevs(self, common=None):

857

"""Return an object that can be used to incrementally compute the

867

"""Return an object that can be used to incrementally compute the

858

revision numbers of the ancestors of arbitrary sets that are not

868

revision numbers of the ancestors of arbitrary sets that are not

859

ancestors of common. This is an ancestor.incrementalmissingancestors

869

ancestors of common. This is an ancestor.incrementalmissingancestors

860

object.

870

object.

861

871

862

'common' is a list of revision numbers. If common is not supplied, uses

872

'common' is a list of revision numbers. If common is not supplied, uses

863

nullrev.

873

nullrev.

864

"""

874

"""

865

if common is None:

875

if common is None:

866

common = [nullrev]

876

common = [nullrev]

867

877

868

return ancestor.incrementalmissingancestors(self.parentrevs, common)

878

return ancestor.incrementalmissingancestors(self.parentrevs, common)

869

879

870

def findmissingrevs(self, common=None, heads=None):

880

def findmissingrevs(self, common=None, heads=None):

871

"""Return the revision numbers of the ancestors of heads that

881

"""Return the revision numbers of the ancestors of heads that

872

are not ancestors of common.

882

are not ancestors of common.

873

883

874

More specifically, return a list of revision numbers corresponding to

884

More specifically, return a list of revision numbers corresponding to

875

nodes N such that every N satisfies the following constraints:

885

nodes N such that every N satisfies the following constraints:

876

886

877

1. N is an ancestor of some node in 'heads'

887

1. N is an ancestor of some node in 'heads'

878

2. N is not an ancestor of any node in 'common'

888

2. N is not an ancestor of any node in 'common'

879

889

880

The list is sorted by revision number, meaning it is

890

The list is sorted by revision number, meaning it is

881

topologically sorted.

891

topologically sorted.

882

892

883

'heads' and 'common' are both lists of revision numbers. If heads is

893

'heads' and 'common' are both lists of revision numbers. If heads is

884

not supplied, uses all of the revlog's heads. If common is not

894

not supplied, uses all of the revlog's heads. If common is not

885

supplied, uses nullid."""

895

supplied, uses nullid."""

886

if common is None:

896

if common is None:

887

common = [nullrev]

897

common = [nullrev]

888

if heads is None:

898

if heads is None:

889

heads = self.headrevs()

899

heads = self.headrevs()

890

900

891

inc = self.incrementalmissingrevs(common=common)

901

inc = self.incrementalmissingrevs(common=common)

892

return inc.missingancestors(heads)

902

return inc.missingancestors(heads)

893

903

894

def findmissing(self, common=None, heads=None):

904

def findmissing(self, common=None, heads=None):

895

"""Return the ancestors of heads that are not ancestors of common.

905

"""Return the ancestors of heads that are not ancestors of common.

896

906

897

More specifically, return a list of nodes N such that every N

907

More specifically, return a list of nodes N such that every N

898

satisfies the following constraints:

908

satisfies the following constraints:

899

909

900

1. N is an ancestor of some node in 'heads'

910

1. N is an ancestor of some node in 'heads'

901

2. N is not an ancestor of any node in 'common'

911

2. N is not an ancestor of any node in 'common'

902

912

903

The list is sorted by revision number, meaning it is

913

The list is sorted by revision number, meaning it is

904

topologically sorted.

914

topologically sorted.

905

915

906

'heads' and 'common' are both lists of node IDs. If heads is

916

'heads' and 'common' are both lists of node IDs. If heads is

907

not supplied, uses all of the revlog's heads. If common is not

917

not supplied, uses all of the revlog's heads. If common is not

908

supplied, uses nullid."""

918

supplied, uses nullid."""

909

if common is None:

919

if common is None:

910

common = [nullid]

920

common = [nullid]

911

if heads is None:

921

if heads is None:

912

heads = self.heads()

922

heads = self.heads()

913

923

914

common = [self.rev(n) for n in common]

924

common = [self.rev(n) for n in common]

915

heads = [self.rev(n) for n in heads]

925

heads = [self.rev(n) for n in heads]

916

926

917

inc = self.incrementalmissingrevs(common=common)

927

inc = self.incrementalmissingrevs(common=common)

918

return [self.node(r) for r in inc.missingancestors(heads)]

928

return [self.node(r) for r in inc.missingancestors(heads)]

919

929

920

def nodesbetween(self, roots=None, heads=None):

930

def nodesbetween(self, roots=None, heads=None):

921

"""Return a topological path from 'roots' to 'heads'.

931

"""Return a topological path from 'roots' to 'heads'.

922

932

923

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

933

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

924

topologically sorted list of all nodes N that satisfy both of

934

topologically sorted list of all nodes N that satisfy both of

925

these constraints:

935

these constraints:

926

936

927

1. N is a descendant of some node in 'roots'

937

1. N is a descendant of some node in 'roots'

928

2. N is an ancestor of some node in 'heads'

938

2. N is an ancestor of some node in 'heads'

929

939

930

Every node is considered to be both a descendant and an ancestor

940

Every node is considered to be both a descendant and an ancestor

931

of itself, so every reachable node in 'roots' and 'heads' will be

941

of itself, so every reachable node in 'roots' and 'heads' will be

932

included in 'nodes'.

942

included in 'nodes'.

933

943

934

'outroots' is the list of reachable nodes in 'roots', i.e., the

944

'outroots' is the list of reachable nodes in 'roots', i.e., the

935

subset of 'roots' that is returned in 'nodes'. Likewise,

945

subset of 'roots' that is returned in 'nodes'. Likewise,

936

'outheads' is the subset of 'heads' that is also in 'nodes'.

946

'outheads' is the subset of 'heads' that is also in 'nodes'.

937

947

938

'roots' and 'heads' are both lists of node IDs. If 'roots' is

948

'roots' and 'heads' are both lists of node IDs. If 'roots' is

939

unspecified, uses nullid as the only root. If 'heads' is

949

unspecified, uses nullid as the only root. If 'heads' is

940

unspecified, uses list of all of the revlog's heads."""

950

unspecified, uses list of all of the revlog's heads."""

941

nonodes = ([], [], [])

951

nonodes = ([], [], [])

942

if roots is not None:

952

if roots is not None:

943

roots = list(roots)

953

roots = list(roots)

944

if not roots:

954

if not roots:

945

return nonodes

955

return nonodes

946

lowestrev = min([self.rev(n) for n in roots])

956

lowestrev = min([self.rev(n) for n in roots])

947

else:

957

else:

948

roots = [nullid] # Everybody's a descendant of nullid

958

roots = [nullid] # Everybody's a descendant of nullid

949

lowestrev = nullrev

959

lowestrev = nullrev

950

if (lowestrev == nullrev) and (heads is None):

960

if (lowestrev == nullrev) and (heads is None):

951

# We want _all_ the nodes!

961

# We want _all_ the nodes!

952

return ([self.node(r) for r in self], [nullid], list(self.heads()))

962

return ([self.node(r) for r in self], [nullid], list(self.heads()))

953

if heads is None:

963

if heads is None:

954

# All nodes are ancestors, so the latest ancestor is the last

964

# All nodes are ancestors, so the latest ancestor is the last

955

# node.

965

# node.

956

highestrev = len(self) - 1

966

highestrev = len(self) - 1

957

# Set ancestors to None to signal that every node is an ancestor.

967

# Set ancestors to None to signal that every node is an ancestor.

958

ancestors = None

968

ancestors = None

959

# Set heads to an empty dictionary for later discovery of heads

969

# Set heads to an empty dictionary for later discovery of heads

960

heads = {}

970

heads = {}

961

else:

971

else:

962

heads = list(heads)

972

heads = list(heads)

963

if not heads:

973

if not heads:

964

return nonodes

974

return nonodes

965

ancestors = set()

975

ancestors = set()

966

# Turn heads into a dictionary so we can remove 'fake' heads.

976

# Turn heads into a dictionary so we can remove 'fake' heads.

967

# Also, later we will be using it to filter out the heads we can't

977

# Also, later we will be using it to filter out the heads we can't

968

# find from roots.

978

# find from roots.

969

heads = dict.fromkeys(heads, False)

979

heads = dict.fromkeys(heads, False)

970

# Start at the top and keep marking parents until we're done.

980

# Start at the top and keep marking parents until we're done.

971

nodestotag = set(heads)

981

nodestotag = set(heads)

972

# Remember where the top was so we can use it as a limit later.

982

# Remember where the top was so we can use it as a limit later.

973

highestrev = max([self.rev(n) for n in nodestotag])

983

highestrev = max([self.rev(n) for n in nodestotag])

974

while nodestotag:

984

while nodestotag:

975

# grab a node to tag

985

# grab a node to tag

976

n = nodestotag.pop()

986

n = nodestotag.pop()

977

# Never tag nullid

987

# Never tag nullid

978

if n == nullid:

988

if n == nullid:

979

continue

989

continue

980

# A node's revision number represents its place in a

990

# A node's revision number represents its place in a

981

# topologically sorted list of nodes.

991

# topologically sorted list of nodes.

982

r = self.rev(n)

992

r = self.rev(n)

983

if r >= lowestrev:

993

if r >= lowestrev:

984

if n not in ancestors:

994

if n not in ancestors:

985

# If we are possibly a descendant of one of the roots

995

# If we are possibly a descendant of one of the roots

986

# and we haven't already been marked as an ancestor

996

# and we haven't already been marked as an ancestor

987

ancestors.add(n) # Mark as ancestor

997

ancestors.add(n) # Mark as ancestor

988

# Add non-nullid parents to list of nodes to tag.

998

# Add non-nullid parents to list of nodes to tag.

989

nodestotag.update([p for p in self.parents(n) if

999

nodestotag.update([p for p in self.parents(n) if

990

p != nullid])

1000

p != nullid])

991

elif n in heads: # We've seen it before, is it a fake head?

1001

elif n in heads: # We've seen it before, is it a fake head?

992

# So it is, real heads should not be the ancestors of

1002

# So it is, real heads should not be the ancestors of

993

# any other heads.

1003

# any other heads.

994

heads.pop(n)

1004

heads.pop(n)

995

if not ancestors:

1005

if not ancestors:

996

return nonodes

1006

return nonodes

997

# Now that we have our set of ancestors, we want to remove any

1007

# Now that we have our set of ancestors, we want to remove any

998

# roots that are not ancestors.

1008

# roots that are not ancestors.

999

1009

1000

# If one of the roots was nullid, everything is included anyway.

1010

# If one of the roots was nullid, everything is included anyway.

1001

if lowestrev > nullrev:

1011

if lowestrev > nullrev:

1002

# But, since we weren't, let's recompute the lowest rev to not

1012

# But, since we weren't, let's recompute the lowest rev to not

1003

# include roots that aren't ancestors.

1013

# include roots that aren't ancestors.

1004

1014

1005

# Filter out roots that aren't ancestors of heads

1015

# Filter out roots that aren't ancestors of heads

1006

roots = [root for root in roots if root in ancestors]

1016

roots = [root for root in roots if root in ancestors]

1007

# Recompute the lowest revision

1017

# Recompute the lowest revision

1008

if roots:

1018

if roots:

1009

lowestrev = min([self.rev(root) for root in roots])

1019

lowestrev = min([self.rev(root) for root in roots])

1010

else:

1020

else:

1011

# No more roots? Return empty list

1021

# No more roots? Return empty list

1012

return nonodes

1022

return nonodes

1013

else:

1023

else:

1014

# We are descending from nullid, and don't need to care about

1024

# We are descending from nullid, and don't need to care about

1015

# any other roots.

1025

# any other roots.

1016

lowestrev = nullrev

1026

lowestrev = nullrev

1017

roots = [nullid]

1027

roots = [nullid]

1018

# Transform our roots list into a set.

1028

# Transform our roots list into a set.

1019

descendants = set(roots)

1029

descendants = set(roots)

1020

# Also, keep the original roots so we can filter out roots that aren't

1030

# Also, keep the original roots so we can filter out roots that aren't

1021

# 'real' roots (i.e. are descended from other roots).

1031

# 'real' roots (i.e. are descended from other roots).

1022

roots = descendants.copy()

1032

roots = descendants.copy()

1023

# Our topologically sorted list of output nodes.

1033

# Our topologically sorted list of output nodes.

1024

orderedout = []

1034

orderedout = []

1025

# Don't start at nullid since we don't want nullid in our output list,

1035

# Don't start at nullid since we don't want nullid in our output list,

1026

# and if nullid shows up in descendants, empty parents will look like

1036

# and if nullid shows up in descendants, empty parents will look like

1027

# they're descendants.

1037

# they're descendants.

1028

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1038

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1029

n = self.node(r)

1039

n = self.node(r)

1030

isdescendant = False

1040

isdescendant = False

1031

if lowestrev == nullrev: # Everybody is a descendant of nullid

1041

if lowestrev == nullrev: # Everybody is a descendant of nullid

1032

isdescendant = True

1042

isdescendant = True

1033

elif n in descendants:

1043

elif n in descendants:

1034

# n is already a descendant

1044

# n is already a descendant

1035

isdescendant = True

1045

isdescendant = True

1036

# This check only needs to be done here because all the roots

1046

# This check only needs to be done here because all the roots

1037

# will start being marked is descendants before the loop.

1047

# will start being marked is descendants before the loop.

1038

if n in roots:

1048

if n in roots:

1039

# If n was a root, check if it's a 'real' root.

1049

# If n was a root, check if it's a 'real' root.

1040

p = tuple(self.parents(n))

1050

p = tuple(self.parents(n))

1041

# If any of its parents are descendants, it's not a root.

1051

# If any of its parents are descendants, it's not a root.

1042

if (p[0] in descendants) or (p[1] in descendants):

1052

if (p[0] in descendants) or (p[1] in descendants):

1043

roots.remove(n)

1053

roots.remove(n)

1044

else:

1054

else:

1045

p = tuple(self.parents(n))

1055

p = tuple(self.parents(n))

1046

# A node is a descendant if either of its parents are

1056

# A node is a descendant if either of its parents are

1047

# descendants. (We seeded the dependents list with the roots

1057

# descendants. (We seeded the dependents list with the roots

1048

# up there, remember?)

1058

# up there, remember?)

1049

if (p[0] in descendants) or (p[1] in descendants):

1059

if (p[0] in descendants) or (p[1] in descendants):

1050

descendants.add(n)

1060

descendants.add(n)

1051

isdescendant = True

1061

isdescendant = True

1052

if isdescendant and ((ancestors is None) or (n in ancestors)):

1062

if isdescendant and ((ancestors is None) or (n in ancestors)):

1053

# Only include nodes that are both descendants and ancestors.

1063

# Only include nodes that are both descendants and ancestors.

1054

orderedout.append(n)

1064

orderedout.append(n)

1055

if (ancestors is not None) and (n in heads):

1065

if (ancestors is not None) and (n in heads):

1056

# We're trying to figure out which heads are reachable

1066

# We're trying to figure out which heads are reachable

1057

# from roots.

1067

# from roots.

1058

# Mark this head as having been reached

1068

# Mark this head as having been reached

1059

heads[n] = True

1069

heads[n] = True

1060

elif ancestors is None:

1070

elif ancestors is None:

1061

# Otherwise, we're trying to discover the heads.

1071

# Otherwise, we're trying to discover the heads.

1062

# Assume this is a head because if it isn't, the next step

1072

# Assume this is a head because if it isn't, the next step

1063

# will eventually remove it.

1073

# will eventually remove it.

1064

heads[n] = True

1074

heads[n] = True

1065

# But, obviously its parents aren't.

1075

# But, obviously its parents aren't.

1066

for p in self.parents(n):

1076

for p in self.parents(n):

1067

heads.pop(p, None)

1077

heads.pop(p, None)

1068

heads = [head for head, flag in heads.iteritems() if flag]

1078

heads = [head for head, flag in heads.iteritems() if flag]

1069

roots = list(roots)

1079

roots = list(roots)

1070

assert orderedout

1080

assert orderedout

1071

assert roots

1081

assert roots

1072

assert heads

1082

assert heads

1073

return (orderedout, roots, heads)

1083

return (orderedout, roots, heads)

1074

1084

1075

def headrevs(self):

1085

def headrevs(self):

1076

try:

1086

try:

1077

return self.index.headrevs()

1087

return self.index.headrevs()

1078

except AttributeError:

1088

except AttributeError:

1079

return self._headrevs()

1089

return self._headrevs()

1080

1090

1081

def computephases(self, roots):

1091

def computephases(self, roots):

1082

return self.index.computephasesmapsets(roots)

1092

return self.index.computephasesmapsets(roots)

1083

1093

1084

def _headrevs(self):

1094

def _headrevs(self):

1085

count = len(self)

1095

count = len(self)

1086

if not count:

1096

if not count:

1087

return [nullrev]

1097

return [nullrev]

1088

# we won't iter over filtered rev so nobody is a head at start

1098

# we won't iter over filtered rev so nobody is a head at start

1089

ishead = [0] * (count + 1)

1099

ishead = [0] * (count + 1)

1090

index = self.index

1100

index = self.index

1091

for r in self:

1101

for r in self:

1092

ishead[r] = 1 # I may be an head

1102

ishead[r] = 1 # I may be an head

1093

e = index[r]

1103

e = index[r]

1094

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1104

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1095

return [r for r, val in enumerate(ishead) if val]

1105

return [r for r, val in enumerate(ishead) if val]

1096

1106

1097

def heads(self, start=None, stop=None):

1107

def heads(self, start=None, stop=None):

1098

"""return the list of all nodes that have no children

1108

"""return the list of all nodes that have no children

1099

1109

1100

if start is specified, only heads that are descendants of

1110

if start is specified, only heads that are descendants of

1101

start will be returned

1111

start will be returned

1102

if stop is specified, it will consider all the revs from stop

1112

if stop is specified, it will consider all the revs from stop

1103

as if they had no children

1113

as if they had no children

1104

"""

1114

"""

1105

if start is None and stop is None:

1115

if start is None and stop is None:

1106

if not len(self):

1116

if not len(self):

1107

return [nullid]

1117

return [nullid]

1108

return [self.node(r) for r in self.headrevs()]

1118

return [self.node(r) for r in self.headrevs()]

1109

1119

1110

if start is None:

1120

if start is None:

1111

start = nullrev

1121

start = nullrev

1112

else:

1122

else:

1113

start = self.rev(start)

1123

start = self.rev(start)

1114

1124

1115

stoprevs = set(self.rev(n) for n in stop or [])

1125

stoprevs = set(self.rev(n) for n in stop or [])

1116

1126

1117

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1127

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1118

stoprevs=stoprevs)

1128

stoprevs=stoprevs)

1119

1129

1120

return [self.node(rev) for rev in revs]

1130

return [self.node(rev) for rev in revs]

1121

1131

1122

def children(self, node):

1132

def children(self, node):

1123

"""find the children of a given node"""

1133

"""find the children of a given node"""

1124

c = []

1134

c = []

1125

p = self.rev(node)

1135

p = self.rev(node)

1126

for r in self.revs(start=p + 1):

1136

for r in self.revs(start=p + 1):

1127

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1137

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1128

if prevs:

1138

if prevs:

1129

for pr in prevs:

1139

for pr in prevs:

1130

if pr == p:

1140

if pr == p:

1131

c.append(self.node(r))

1141

c.append(self.node(r))

1132

elif p == nullrev:

1142

elif p == nullrev:

1133

c.append(self.node(r))

1143

c.append(self.node(r))

1134

return c

1144

return c

1135

1145

1136

def commonancestorsheads(self, a, b):

1146

def commonancestorsheads(self, a, b):

1137

"""calculate all the heads of the common ancestors of nodes a and b"""

1147

"""calculate all the heads of the common ancestors of nodes a and b"""

1138

a, b = self.rev(a), self.rev(b)

1148

a, b = self.rev(a), self.rev(b)

1139

ancs = self._commonancestorsheads(a, b)

1149

ancs = self._commonancestorsheads(a, b)

1140

return pycompat.maplist(self.node, ancs)

1150

return pycompat.maplist(self.node, ancs)

1141

1151

1142

def _commonancestorsheads(self, *revs):

1152

def _commonancestorsheads(self, *revs):

1143

"""calculate all the heads of the common ancestors of revs"""

1153

"""calculate all the heads of the common ancestors of revs"""

1144

try:

1154

try:

1145

ancs = self.index.commonancestorsheads(*revs)

1155

ancs = self.index.commonancestorsheads(*revs)

1146

except (AttributeError, OverflowError): # C implementation failed

1156

except (AttributeError, OverflowError): # C implementation failed

1147

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1157

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1148

return ancs

1158

return ancs

1149

1159

1150

def isancestor(self, a, b):

1160

def isancestor(self, a, b):

1151

"""return True if node a is an ancestor of node b

1161

"""return True if node a is an ancestor of node b

1152

1162

1153

A revision is considered an ancestor of itself."""

1163

A revision is considered an ancestor of itself."""

1154

a, b = self.rev(a), self.rev(b)

1164

a, b = self.rev(a), self.rev(b)

1155

return self.isancestorrev(a, b)

1165

return self.isancestorrev(a, b)

1156

1166

1157

def isancestorrev(self, a, b):

1167

def isancestorrev(self, a, b):

1158

"""return True if revision a is an ancestor of revision b

1168

"""return True if revision a is an ancestor of revision b

1159

1169

1160

A revision is considered an ancestor of itself.

1170

A revision is considered an ancestor of itself.

1161

1171

1162

The implementation of this is trivial but the use of

1172

The implementation of this is trivial but the use of

1163

commonancestorsheads is not."""

1173

commonancestorsheads is not."""

1164

if a == nullrev:

1174

if a == nullrev:

1165

return True

1175

return True

1166

elif a == b:

1176

elif a == b:

1167

return True

1177

return True

1168

elif a > b:

1178

elif a > b:

1169

return False

1179

return False

1170

return a in self._commonancestorsheads(a, b)

1180

return a in self._commonancestorsheads(a, b)

1171

1181

1172

def ancestor(self, a, b):

1182

def ancestor(self, a, b):

1173

"""calculate the "best" common ancestor of nodes a and b"""

1183

"""calculate the "best" common ancestor of nodes a and b"""

1174

1184

1175

a, b = self.rev(a), self.rev(b)

1185

a, b = self.rev(a), self.rev(b)

1176

try:

1186

try:

1177

ancs = self.index.ancestors(a, b)

1187

ancs = self.index.ancestors(a, b)

1178

except (AttributeError, OverflowError):

1188

except (AttributeError, OverflowError):

1179

ancs = ancestor.ancestors(self.parentrevs, a, b)

1189

ancs = ancestor.ancestors(self.parentrevs, a, b)

1180

if ancs:

1190

if ancs:

1181

# choose a consistent winner when there's a tie

1191

# choose a consistent winner when there's a tie

1182

return min(map(self.node, ancs))

1192

return min(map(self.node, ancs))

1183

return nullid

1193

return nullid

1184

1194

1185

def _match(self, id):

1195

def _match(self, id):

1186

if isinstance(id, int):

1196

if isinstance(id, int):

1187

# rev

1197

# rev

1188

return self.node(id)

1198

return self.node(id)

1189

if len(id) == 20:

1199

if len(id) == 20:

1190

# possibly a binary node

1200

# possibly a binary node

1191

# odds of a binary node being all hex in ASCII are 1 in 10**25

1201

# odds of a binary node being all hex in ASCII are 1 in 10**25

1192

try:

1202

try:

1193

node = id

1203

node = id

1194

self.rev(node) # quick search the index

1204

self.rev(node) # quick search the index

1195

return node

1205

return node

1196

except error.LookupError:

1206

except error.LookupError:

1197

pass # may be partial hex id

1207

pass # may be partial hex id

1198

try:

1208

try:

1199

# str(rev)

1209

# str(rev)

1200

rev = int(id)

1210

rev = int(id)

1201

if "%d" % rev != id:

1211

if "%d" % rev != id:

1202

raise ValueError

1212

raise ValueError

1203

if rev < 0:

1213

if rev < 0:

1204

rev = len(self) + rev

1214

rev = len(self) + rev

1205

if rev < 0 or rev >= len(self):

1215

if rev < 0 or rev >= len(self):

1206

raise ValueError

1216

raise ValueError

1207

return self.node(rev)

1217

return self.node(rev)

1208

except (ValueError, OverflowError):

1218

except (ValueError, OverflowError):

1209

pass

1219

pass

1210

if len(id) == 40:

1220

if len(id) == 40:

1211

try:

1221

try:

1212

# a full hex nodeid?

1222

# a full hex nodeid?

1213

node = bin(id)

1223

node = bin(id)

1214

self.rev(node)

1224

self.rev(node)

1215

return node

1225

return node

1216

except (TypeError, error.LookupError):

1226

except (TypeError, error.LookupError):

1217

pass

1227

pass

1218

1228

1219

def _partialmatch(self, id):

1229

def _partialmatch(self, id):

1220

# we don't care wdirfilenodeids as they should be always full hash

1230

# we don't care wdirfilenodeids as they should be always full hash

1221

maybewdir = wdirhex.startswith(id)

1231

maybewdir = wdirhex.startswith(id)

1222

try:

1232

try:

1223

partial = self.index.partialmatch(id)

1233

partial = self.index.partialmatch(id)

1224

if partial and self.hasnode(partial):

1234

if partial and self.hasnode(partial):

1225

if maybewdir:

1235

if maybewdir:

1226

# single 'ff...' match in radix tree, ambiguous with wdir

1236

# single 'ff...' match in radix tree, ambiguous with wdir

1227

raise error.RevlogError

1237

raise error.RevlogError

1228

return partial

1238

return partial

1229

if maybewdir:

1239

if maybewdir:

1230

# no 'ff...' match in radix tree, wdir identified

1240

# no 'ff...' match in radix tree, wdir identified

1231

raise error.WdirUnsupported

1241

raise error.WdirUnsupported

1232

return None

1242

return None

1233

except error.RevlogError:

1243

except error.RevlogError:

1234

# parsers.c radix tree lookup gave multiple matches

1244

# parsers.c radix tree lookup gave multiple matches

1235

# fast path: for unfiltered changelog, radix tree is accurate

1245

# fast path: for unfiltered changelog, radix tree is accurate

1236

if not getattr(self, 'filteredrevs', None):

1246

if not getattr(self, 'filteredrevs', None):

1237

raise error.AmbiguousPrefixLookupError(

1247

raise error.AmbiguousPrefixLookupError(

1238

id, self.indexfile, _('ambiguous identifier'))

1248

id, self.indexfile, _('ambiguous identifier'))

1239

# fall through to slow path that filters hidden revisions

1249

# fall through to slow path that filters hidden revisions

1240

except (AttributeError, ValueError):

1250

except (AttributeError, ValueError):

1241

# we are pure python, or key was too short to search radix tree

1251

# we are pure python, or key was too short to search radix tree

1242

pass

1252

pass

1243

1253

1244

if id in self._pcache:

1254

if id in self._pcache:

1245

return self._pcache[id]

1255

return self._pcache[id]

1246

1256

1247

if len(id) <= 40:

1257

if len(id) <= 40:

1248

try:

1258

try:

1249

# hex(node)[:...]

1259

# hex(node)[:...]

1250

l = len(id) // 2 # grab an even number of digits

1260

l = len(id) // 2 # grab an even number of digits

1251

prefix = bin(id[:l * 2])

1261

prefix = bin(id[:l * 2])

1252

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1262

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1253

nl = [n for n in nl if hex(n).startswith(id) and

1263

nl = [n for n in nl if hex(n).startswith(id) and

1254

self.hasnode(n)]

1264

self.hasnode(n)]

1255

if nullhex.startswith(id):

1265

if nullhex.startswith(id):

1256

nl.append(nullid)

1266

nl.append(nullid)

1257

if len(nl) > 0:

1267

if len(nl) > 0:

1258

if len(nl) == 1 and not maybewdir:

1268

if len(nl) == 1 and not maybewdir:

1259

self._pcache[id] = nl[0]

1269

self._pcache[id] = nl[0]

1260

return nl[0]

1270

return nl[0]

1261

raise error.AmbiguousPrefixLookupError(

1271

raise error.AmbiguousPrefixLookupError(

1262

id, self.indexfile, _('ambiguous identifier'))

1272

id, self.indexfile, _('ambiguous identifier'))

1263

if maybewdir:

1273

if maybewdir:

1264

raise error.WdirUnsupported

1274

raise error.WdirUnsupported

1265

return None

1275

return None

1266

except TypeError:

1276

except TypeError:

1267

pass

1277

pass

1268

1278

1269

def lookup(self, id):

1279

def lookup(self, id):

1270

"""locate a node based on:

1280

"""locate a node based on:

1271

- revision number or str(revision number)

1281

- revision number or str(revision number)

1272

- nodeid or subset of hex nodeid

1282

- nodeid or subset of hex nodeid

1273

"""

1283

"""

1274

n = self._match(id)

1284

n = self._match(id)

1275

if n is not None:

1285

if n is not None:

1276

return n

1286

return n

1277

n = self._partialmatch(id)

1287

n = self._partialmatch(id)

1278

if n:

1288

if n:

1279

return n

1289

return n

1280

1290

1281

raise error.LookupError(id, self.indexfile, _('no match found'))

1291

raise error.LookupError(id, self.indexfile, _('no match found'))

1282

1292

1283

def shortest(self, node, minlength=1):

1293

def shortest(self, node, minlength=1):

1284

"""Find the shortest unambiguous prefix that matches node."""

1294

"""Find the shortest unambiguous prefix that matches node."""

1285

def isvalid(prefix):

1295

def isvalid(prefix):

1286

try:

1296

try:

1287

node = self._partialmatch(prefix)

1297

node = self._partialmatch(prefix)

1288

except error.AmbiguousPrefixLookupError:

1298

except error.AmbiguousPrefixLookupError:

1289

return False

1299

return False

1290

except error.WdirUnsupported:

1300

except error.WdirUnsupported:

1291

# single 'ff...' match

1301

# single 'ff...' match

1292

return True

1302

return True

1293

if node is None:

1303

if node is None:

1294

raise error.LookupError(node, self.indexfile, _('no node'))

1304

raise error.LookupError(node, self.indexfile, _('no node'))

1295

return True

1305

return True

1296

1306

1297

def maybewdir(prefix):

1307

def maybewdir(prefix):

1298

return all(c == 'f' for c in prefix)

1308

return all(c == 'f' for c in prefix)

1299

1309

1300

hexnode = hex(node)

1310

hexnode = hex(node)

1301

1311

1302

def disambiguate(hexnode, minlength):

1312

def disambiguate(hexnode, minlength):

1303

"""Disambiguate against wdirid."""

1313

"""Disambiguate against wdirid."""

1304

for length in range(minlength, 41):

1314

for length in range(minlength, 41):

1305

prefix = hexnode[:length]

1315

prefix = hexnode[:length]

1306

if not maybewdir(prefix):

1316

if not maybewdir(prefix):

1307

return prefix

1317

return prefix

1308

1318

1309

if not getattr(self, 'filteredrevs', None):

1319

if not getattr(self, 'filteredrevs', None):

1310

try:

1320

try:

1311

length = max(self.index.shortest(node), minlength)

1321

length = max(self.index.shortest(node), minlength)

1312

return disambiguate(hexnode, length)

1322

return disambiguate(hexnode, length)

1313

except error.RevlogError:

1323

except error.RevlogError:

1314

if node != wdirid:

1324

if node != wdirid:

1315

raise error.LookupError(node, self.indexfile, _('no node'))

1325

raise error.LookupError(node, self.indexfile, _('no node'))

1316

except AttributeError:

1326

except AttributeError:

1317

# Fall through to pure code

1327

# Fall through to pure code

1318

pass

1328

pass

1319

1329

1320

if node == wdirid:

1330

if node == wdirid:

1321

for length in range(minlength, 41):

1331

for length in range(minlength, 41):

1322

prefix = hexnode[:length]

1332

prefix = hexnode[:length]

1323

if isvalid(prefix):

1333

if isvalid(prefix):

1324

return prefix

1334

return prefix

1325

1335

1326

for length in range(minlength, 41):

1336

for length in range(minlength, 41):

1327

prefix = hexnode[:length]

1337

prefix = hexnode[:length]

1328

if isvalid(prefix):

1338

if isvalid(prefix):

1329

return disambiguate(hexnode, length)

1339

return disambiguate(hexnode, length)

1330

1340

1331

def cmp(self, node, text):

1341

def cmp(self, node, text):

1332

"""compare text with a given file revision

1342

"""compare text with a given file revision

1333

1343

1334

returns True if text is different than what is stored.

1344

returns True if text is different than what is stored.

1335

"""

1345

"""

1336

p1, p2 = self.parents(node)

1346

p1, p2 = self.parents(node)

1337

return storageutil.hashrevisionsha1(text, p1, p2) != node

1347

return storageutil.hashrevisionsha1(text, p1, p2) != node

1338

1348

1339

def _cachesegment(self, offset, data):

1349

def _cachesegment(self, offset, data):

1340

"""Add a segment to the revlog cache.

1350

"""Add a segment to the revlog cache.

1341

1351

1342

Accepts an absolute offset and the data that is at that location.

1352

Accepts an absolute offset and the data that is at that location.

1343

"""

1353

"""

1344

o, d = self._chunkcache

1354

o, d = self._chunkcache

1345

# try to add to existing cache

1355

# try to add to existing cache

1346

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1356

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1347

self._chunkcache = o, d + data

1357

self._chunkcache = o, d + data

1348

else:

1358

else:

1349

self._chunkcache = offset, data

1359

self._chunkcache = offset, data

1350

1360

1351

def _readsegment(self, offset, length, df=None):

1361

def _readsegment(self, offset, length, df=None):

1352

"""Load a segment of raw data from the revlog.

1362

"""Load a segment of raw data from the revlog.

1353

1363

1354

Accepts an absolute offset, length to read, and an optional existing

1364

Accepts an absolute offset, length to read, and an optional existing

1355

file handle to read from.

1365

file handle to read from.

1356

1366

1357

If an existing file handle is passed, it will be seeked and the

1367

If an existing file handle is passed, it will be seeked and the

1358

original seek position will NOT be restored.

1368

original seek position will NOT be restored.

1359

1369

1360

Returns a str or buffer of raw byte data.

1370

Returns a str or buffer of raw byte data.

1361

1371

1362

Raises if the requested number of bytes could not be read.

1372

Raises if the requested number of bytes could not be read.

1363

"""

1373

"""

1364

# Cache data both forward and backward around the requested

1374

# Cache data both forward and backward around the requested

1365

# data, in a fixed size window. This helps speed up operations

1375

# data, in a fixed size window. This helps speed up operations

1366

# involving reading the revlog backwards.

1376

# involving reading the revlog backwards.

1367

cachesize = self._chunkcachesize

1377

cachesize = self._chunkcachesize

1368

realoffset = offset & ~(cachesize - 1)

1378

realoffset = offset & ~(cachesize - 1)

1369

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1379

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1370

- realoffset)

1380

- realoffset)

1371

with self._datareadfp(df) as df:

1381

with self._datareadfp(df) as df:

1372

df.seek(realoffset)

1382

df.seek(realoffset)

1373

d = df.read(reallength)

1383

d = df.read(reallength)

1374

1384

1375

self._cachesegment(realoffset, d)

1385

self._cachesegment(realoffset, d)

1376

if offset != realoffset or reallength != length:

1386

if offset != realoffset or reallength != length:

1377

startoffset = offset - realoffset

1387

startoffset = offset - realoffset

1378

if len(d) - startoffset < length:

1388

if len(d) - startoffset < length:

1379

raise error.RevlogError(

1389

raise error.RevlogError(

1380

_('partial read of revlog %s; expected %d bytes from '

1390

_('partial read of revlog %s; expected %d bytes from '

1381

'offset %d, got %d') %

1391

'offset %d, got %d') %

1382

(self.indexfile if self._inline else self.datafile,

1392

(self.indexfile if self._inline else self.datafile,

1383

length, realoffset, len(d) - startoffset))

1393

length, realoffset, len(d) - startoffset))

1384

1394

1385

return util.buffer(d, startoffset, length)

1395

return util.buffer(d, startoffset, length)

1386

1396

1387

if len(d) < length:

1397

if len(d) < length:

1388

raise error.RevlogError(

1398

raise error.RevlogError(

1389

_('partial read of revlog %s; expected %d bytes from offset '

1399

_('partial read of revlog %s; expected %d bytes from offset '

1390

'%d, got %d') %

1400

'%d, got %d') %

1391

(self.indexfile if self._inline else self.datafile,

1401

(self.indexfile if self._inline else self.datafile,

1392

length, offset, len(d)))

1402

length, offset, len(d)))

1393

1403

1394

return d

1404

return d

1395

1405

1396

def _getsegment(self, offset, length, df=None):

1406

def _getsegment(self, offset, length, df=None):

1397

"""Obtain a segment of raw data from the revlog.

1407

"""Obtain a segment of raw data from the revlog.

1398

1408

1399

Accepts an absolute offset, length of bytes to obtain, and an

1409

Accepts an absolute offset, length of bytes to obtain, and an

1400

optional file handle to the already-opened revlog. If the file

1410

optional file handle to the already-opened revlog. If the file

1401

handle is used, it's original seek position will not be preserved.

1411

handle is used, it's original seek position will not be preserved.

1402

1412

1403

Requests for data may be returned from a cache.

1413

Requests for data may be returned from a cache.

1404

1414

1405

Returns a str or a buffer instance of raw byte data.

1415

Returns a str or a buffer instance of raw byte data.

1406

"""

1416

"""

1407

o, d = self._chunkcache

1417

o, d = self._chunkcache

1408

l = len(d)

1418

l = len(d)

1409

1419

1410

# is it in the cache?

1420

# is it in the cache?

1411

cachestart = offset - o

1421

cachestart = offset - o

1412

cacheend = cachestart + length

1422

cacheend = cachestart + length

1413

if cachestart >= 0 and cacheend <= l:

1423

if cachestart >= 0 and cacheend <= l:

1414

if cachestart == 0 and cacheend == l:

1424

if cachestart == 0 and cacheend == l:

1415

return d # avoid a copy

1425

return d # avoid a copy

1416

return util.buffer(d, cachestart, cacheend - cachestart)

1426

return util.buffer(d, cachestart, cacheend - cachestart)

1417

1427

1418

return self._readsegment(offset, length, df=df)

1428

return self._readsegment(offset, length, df=df)

1419

1429

1420

def _getsegmentforrevs(self, startrev, endrev, df=None):

1430

def _getsegmentforrevs(self, startrev, endrev, df=None):

1421

"""Obtain a segment of raw data corresponding to a range of revisions.

1431

"""Obtain a segment of raw data corresponding to a range of revisions.

1422

1432

1423

Accepts the start and end revisions and an optional already-open

1433

Accepts the start and end revisions and an optional already-open

1424

file handle to be used for reading. If the file handle is read, its

1434

file handle to be used for reading. If the file handle is read, its

1425

seek position will not be preserved.

1435

seek position will not be preserved.

1426

1436

1427

Requests for data may be satisfied by a cache.

1437

Requests for data may be satisfied by a cache.

1428

1438

1429

Returns a 2-tuple of (offset, data) for the requested range of

1439

Returns a 2-tuple of (offset, data) for the requested range of

1430

revisions. Offset is the integer offset from the beginning of the

1440

revisions. Offset is the integer offset from the beginning of the

1431

revlog and data is a str or buffer of the raw byte data.

1441

revlog and data is a str or buffer of the raw byte data.

1432

1442

1433

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1443

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1434

to determine where each revision's data begins and ends.

1444

to determine where each revision's data begins and ends.

1435

"""

1445

"""

1436

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1446

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1437

# (functions are expensive).

1447

# (functions are expensive).

1438

index = self.index

1448

index = self.index

1439

istart = index[startrev]

1449

istart = index[startrev]

1440

start = int(istart[0] >> 16)

1450

start = int(istart[0] >> 16)

1441

if startrev == endrev:

1451

if startrev == endrev:

1442

end = start + istart[1]

1452

end = start + istart[1]

1443

else:

1453

else:

1444

iend = index[endrev]

1454

iend = index[endrev]

1445

end = int(iend[0] >> 16) + iend[1]

1455

end = int(iend[0] >> 16) + iend[1]

1446

1456

1447

if self._inline:

1457

if self._inline:

1448

start += (startrev + 1) * self._io.size

1458

start += (startrev + 1) * self._io.size

1449

end += (endrev + 1) * self._io.size

1459

end += (endrev + 1) * self._io.size

1450

length = end - start

1460

length = end - start

1451

1461

1452

return start, self._getsegment(start, length, df=df)

1462

return start, self._getsegment(start, length, df=df)

1453

1463

1454

def _chunk(self, rev, df=None):

1464

def _chunk(self, rev, df=None):

1455

"""Obtain a single decompressed chunk for a revision.

1465

"""Obtain a single decompressed chunk for a revision.

1456

1466

1457

Accepts an integer revision and an optional already-open file handle

1467

Accepts an integer revision and an optional already-open file handle

1458

to be used for reading. If used, the seek position of the file will not

1468

to be used for reading. If used, the seek position of the file will not

1459

be preserved.

1469

be preserved.

1460

1470

1461

Returns a str holding uncompressed data for the requested revision.

1471

Returns a str holding uncompressed data for the requested revision.

1462

"""

1472

"""

1463

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1473

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1464

1474

1465

def _chunks(self, revs, df=None, targetsize=None):

1475

def _chunks(self, revs, df=None, targetsize=None):

1466

"""Obtain decompressed chunks for the specified revisions.

1476

"""Obtain decompressed chunks for the specified revisions.

1467

1477

1468

Accepts an iterable of numeric revisions that are assumed to be in

1478

Accepts an iterable of numeric revisions that are assumed to be in

1469

ascending order. Also accepts an optional already-open file handle

1479

ascending order. Also accepts an optional already-open file handle

1470

to be used for reading. If used, the seek position of the file will

1480

to be used for reading. If used, the seek position of the file will

1471

not be preserved.

1481

not be preserved.

1472

1482

1473

This function is similar to calling ``self._chunk()`` multiple times,

1483

This function is similar to calling ``self._chunk()`` multiple times,

1474

but is faster.

1484

but is faster.

1475

1485

1476

Returns a list with decompressed data for each requested revision.

1486

Returns a list with decompressed data for each requested revision.

1477

"""

1487

"""

1478

if not revs:

1488

if not revs:

1479

return []

1489

return []

1480

start = self.start

1490

start = self.start

1481

length = self.length

1491

length = self.length

1482

inline = self._inline

1492

inline = self._inline

1483

iosize = self._io.size

1493

iosize = self._io.size

1484

buffer = util.buffer

1494

buffer = util.buffer

1485

1495

1486

l = []

1496

l = []

1487

ladd = l.append

1497

ladd = l.append

1488

1498

1489

if not self._withsparseread:

1499

if not self._withsparseread:

1490

slicedchunks = (revs,)

1500

slicedchunks = (revs,)

1491

else:

1501

else:

1492

slicedchunks = deltautil.slicechunk(self, revs,

1502

slicedchunks = deltautil.slicechunk(self, revs,

1493

targetsize=targetsize)

1503

targetsize=targetsize)

1494

1504

1495

for revschunk in slicedchunks:

1505

for revschunk in slicedchunks:

1496

firstrev = revschunk[0]

1506

firstrev = revschunk[0]

1497

# Skip trailing revisions with empty diff

1507

# Skip trailing revisions with empty diff

1498

for lastrev in revschunk[::-1]:

1508

for lastrev in revschunk[::-1]:

1499

if length(lastrev) != 0:

1509

if length(lastrev) != 0:

1500

break

1510

break

1501

1511

1502

try:

1512

try:

1503

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1513

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1504

except OverflowError:

1514

except OverflowError:

1505

# issue4215 - we can't cache a run of chunks greater than

1515

# issue4215 - we can't cache a run of chunks greater than

1506

# 2G on Windows

1516

# 2G on Windows

1507

return [self._chunk(rev, df=df) for rev in revschunk]

1517

return [self._chunk(rev, df=df) for rev in revschunk]

1508

1518

1509

decomp = self.decompress

1519

decomp = self.decompress

1510

for rev in revschunk:

1520

for rev in revschunk:

1511

chunkstart = start(rev)

1521

chunkstart = start(rev)

1512

if inline:

1522

if inline:

1513

chunkstart += (rev + 1) * iosize

1523

chunkstart += (rev + 1) * iosize

1514

chunklength = length(rev)

1524

chunklength = length(rev)

1515

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1525

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1516

1526

1517

return l

1527

return l

1518

1528

1519

def _chunkclear(self):

1529

def _chunkclear(self):

1520

"""Clear the raw chunk cache."""

1530

"""Clear the raw chunk cache."""

1521

self._chunkcache = (0, '')

1531

self._chunkcache = (0, '')

1522

1532

1523

def deltaparent(self, rev):

1533

def deltaparent(self, rev):

1524

"""return deltaparent of the given revision"""

1534

"""return deltaparent of the given revision"""

1525

base = self.index[rev][3]

1535

base = self.index[rev][3]

1526

if base == rev:

1536

if base == rev:

1527

return nullrev

1537

return nullrev

1528

elif self._generaldelta:

1538

elif self._generaldelta:

1529

return base

1539

return base

1530

else:

1540

else:

1531

return rev - 1

1541

return rev - 1

1532

1542

1533

def issnapshot(self, rev):

1543

def issnapshot(self, rev):

1534

"""tells whether rev is a snapshot

1544

"""tells whether rev is a snapshot

1535

"""

1545

"""

1536

if not self._sparserevlog:

1546

if not self._sparserevlog:

1537

return self.deltaparent(rev) == nullrev

1547

return self.deltaparent(rev) == nullrev

1538

elif util.safehasattr(self.index, 'issnapshot'):

1548

elif util.safehasattr(self.index, 'issnapshot'):

1539

# directly assign the method to cache the testing and access

1549

# directly assign the method to cache the testing and access

1540

self.issnapshot = self.index.issnapshot

1550

self.issnapshot = self.index.issnapshot

1541

return self.issnapshot(rev)

1551

return self.issnapshot(rev)

1542

if rev == nullrev:

1552

if rev == nullrev:

1543

return True

1553

return True

1544

entry = self.index[rev]

1554

entry = self.index[rev]

1545

base = entry[3]

1555

base = entry[3]

1546

if base == rev:

1556

if base == rev:

1547

return True

1557

return True

1548

if base == nullrev:

1558

if base == nullrev:

1549

return True

1559

return True

1550

p1 = entry[5]

1560

p1 = entry[5]

1551

p2 = entry[6]

1561

p2 = entry[6]

1552

if base == p1 or base == p2:

1562

if base == p1 or base == p2:

1553

return False

1563

return False

1554

return self.issnapshot(base)

1564

return self.issnapshot(base)

1555

1565

1556

def snapshotdepth(self, rev):

1566

def snapshotdepth(self, rev):

1557

"""number of snapshot in the chain before this one"""

1567

"""number of snapshot in the chain before this one"""

1558

if not self.issnapshot(rev):

1568

if not self.issnapshot(rev):

1559

raise error.ProgrammingError('revision %d not a snapshot')

1569

raise error.ProgrammingError('revision %d not a snapshot')

1560

return len(self._deltachain(rev)[0]) - 1

1570

return len(self._deltachain(rev)[0]) - 1

1561

1571

1562

def revdiff(self, rev1, rev2):

1572

def revdiff(self, rev1, rev2):

1563

"""return or calculate a delta between two revisions

1573

"""return or calculate a delta between two revisions

1564

1574

1565

The delta calculated is in binary form and is intended to be written to

1575

The delta calculated is in binary form and is intended to be written to

1566

revlog data directly. So this function needs raw revision data.

1576

revlog data directly. So this function needs raw revision data.

1567

"""

1577

"""

1568

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1578

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1569

return bytes(self._chunk(rev2))

1579

return bytes(self._chunk(rev2))

1570

1580

1571

return mdiff.textdiff(self.revision(rev1, raw=True),

1581

return mdiff.textdiff(self.revision(rev1, raw=True),

1572

self.revision(rev2, raw=True))

1582

self.revision(rev2, raw=True))

1573

1583

1574

def revision(self, nodeorrev, _df=None, raw=False):

1584

def revision(self, nodeorrev, _df=None, raw=False):

1575

"""return an uncompressed revision of a given node or revision

1585

"""return an uncompressed revision of a given node or revision

1576

number.

1586

number.

1577

1587

1578

_df - an existing file handle to read from. (internal-only)

1588

_df - an existing file handle to read from. (internal-only)

1579

raw - an optional argument specifying if the revision data is to be

1589

raw - an optional argument specifying if the revision data is to be

1580

treated as raw data when applying flag transforms. 'raw' should be set

1590

treated as raw data when applying flag transforms. 'raw' should be set

1581

to True when generating changegroups or in debug commands.

1591

to True when generating changegroups or in debug commands.

1582

"""

1592

"""

1583

if isinstance(nodeorrev, int):

1593

if isinstance(nodeorrev, int):

1584

rev = nodeorrev

1594

rev = nodeorrev

1585

node = self.node(rev)

1595

node = self.node(rev)

1586

else:

1596

else:

1587

node = nodeorrev

1597

node = nodeorrev

1588

rev = None

1598

rev = None

1589

1599

1590

cachedrev = None

1600

cachedrev = None

1591

flags = None

1601

flags = None

1592

rawtext = None

1602

rawtext = None

1593

if node == nullid:

1603

if node == nullid:

1594

return ""

1604

return ""

1595

if self._revisioncache:

1605

if self._revisioncache:

1596

if self._revisioncache[0] == node:

1606

if self._revisioncache[0] == node:

1597

# _cache only stores rawtext

1607

# _cache only stores rawtext

1598

if raw:

1608

if raw:

1599

return self._revisioncache[2]

1609

return self._revisioncache[2]

1600

# duplicated, but good for perf

1610

# duplicated, but good for perf

1601

if rev is None:

1611

if rev is None:

1602

rev = self.rev(node)

1612

rev = self.rev(node)

1603

if flags is None:

1613

if flags is None:

1604

flags = self.flags(rev)

1614

flags = self.flags(rev)

1605

# no extra flags set, no flag processor runs, text = rawtext

1615

# no extra flags set, no flag processor runs, text = rawtext

1606

if flags == REVIDX_DEFAULT_FLAGS:

1616

if flags == REVIDX_DEFAULT_FLAGS:

1607

return self._revisioncache[2]

1617

return self._revisioncache[2]

1608

# rawtext is reusable. need to run flag processor

1618

# rawtext is reusable. need to run flag processor

1609

rawtext = self._revisioncache[2]

1619

rawtext = self._revisioncache[2]

1610

1620

1611

cachedrev = self._revisioncache[1]

1621

cachedrev = self._revisioncache[1]

1612

1622

1613

# look up what we need to read

1623

# look up what we need to read

1614

if rawtext is None:

1624

if rawtext is None:

1615

if rev is None:

1625

if rev is None:

1616

rev = self.rev(node)

1626

rev = self.rev(node)

1617

1627

1618

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1628

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1619

if stopped:

1629

if stopped:

1620

rawtext = self._revisioncache[2]

1630

rawtext = self._revisioncache[2]

1621

1631

1622

# drop cache to save memory

1632

# drop cache to save memory

1623

self._revisioncache = None

1633

self._revisioncache = None

1624

1634

1625

targetsize = None

1635

targetsize = None

1626

rawsize = self.index[rev][2]

1636

rawsize = self.index[rev][2]

1627

if 0 <= rawsize:

1637

if 0 <= rawsize:

1628

targetsize = 4 * rawsize

1638

targetsize = 4 * rawsize

1629

1639

1630

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1640

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1631

if rawtext is None:

1641

if rawtext is None:

1632

rawtext = bytes(bins[0])

1642

rawtext = bytes(bins[0])

1633

bins = bins[1:]

1643

bins = bins[1:]

1634

1644

1635

rawtext = mdiff.patches(rawtext, bins)

1645

rawtext = mdiff.patches(rawtext, bins)

1636

self._revisioncache = (node, rev, rawtext)

1646

self._revisioncache = (node, rev, rawtext)

1637

1647

1638

if flags is None:

1648

if flags is None:

1639

if rev is None:

1649

if rev is None:

1640

rev = self.rev(node)

1650

rev = self.rev(node)

1641

flags = self.flags(rev)

1651

flags = self.flags(rev)

1642

1652

1643

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1653

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1644

if validatehash:

1654

if validatehash:

1645

self.checkhash(text, node, rev=rev)

1655

self.checkhash(text, node, rev=rev)

1646

1656

1647

return text

1657

return text

1648

1658

1649

def hash(self, text, p1, p2):

1659

def hash(self, text, p1, p2):

1650

"""Compute a node hash.

1660

"""Compute a node hash.

1651

1661

1652

Available as a function so that subclasses can replace the hash

1662

Available as a function so that subclasses can replace the hash

1653

as needed.

1663

as needed.

1654

"""

1664

"""

1655

return storageutil.hashrevisionsha1(text, p1, p2)

1665

return storageutil.hashrevisionsha1(text, p1, p2)

1656

1666

1657

def _processflags(self, text, flags, operation, raw=False):

1667

def _processflags(self, text, flags, operation, raw=False):

1658

"""Inspect revision data flags and applies transforms defined by

1668

"""Inspect revision data flags and applies transforms defined by

1659

registered flag processors.

1669

registered flag processors.

1660

1670

1661

``text`` - the revision data to process

1671

``text`` - the revision data to process

1662

``flags`` - the revision flags

1672

``flags`` - the revision flags

1663

``operation`` - the operation being performed (read or write)

1673

``operation`` - the operation being performed (read or write)

1664

``raw`` - an optional argument describing if the raw transform should be

1674

``raw`` - an optional argument describing if the raw transform should be

1665

applied.

1675

applied.

1666

1676

1667

This method processes the flags in the order (or reverse order if

1677

This method processes the flags in the order (or reverse order if

1668

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1678

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1669

flag processors registered for present flags. The order of flags defined

1679

flag processors registered for present flags. The order of flags defined

1670

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1680

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1671

1681

1672

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1682

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1673

processed text and ``validatehash`` is a bool indicating whether the

1683

processed text and ``validatehash`` is a bool indicating whether the

1674

returned text should be checked for hash integrity.

1684

returned text should be checked for hash integrity.

1675

1685

1676

Note: If the ``raw`` argument is set, it has precedence over the

1686

Note: If the ``raw`` argument is set, it has precedence over the

1677

operation and will only update the value of ``validatehash``.

1687

operation and will only update the value of ``validatehash``.

1678

"""

1688

"""

1679

# fast path: no flag processors will run

1689

# fast path: no flag processors will run

1680

if flags == 0:

1690

if flags == 0:

1681

return text, True

1691

return text, True

1682

if not operation in ('read', 'write'):

1692

if not operation in ('read', 'write'):

1683

raise error.ProgrammingError(_("invalid '%s' operation") %

1693

raise error.ProgrammingError(_("invalid '%s' operation") %

1684

operation)

1694

operation)

1685

# Check all flags are known.

1695

# Check all flags are known.

1686

if flags & ~REVIDX_KNOWN_FLAGS:

1696

if flags & ~REVIDX_KNOWN_FLAGS:

1687

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1697

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1688

(flags & ~REVIDX_KNOWN_FLAGS))

1698

(flags & ~REVIDX_KNOWN_FLAGS))

1689

validatehash = True

1699

validatehash = True

1690

# Depending on the operation (read or write), the order might be

1700

# Depending on the operation (read or write), the order might be

1691

# reversed due to non-commutative transforms.

1701

# reversed due to non-commutative transforms.

1692

orderedflags = REVIDX_FLAGS_ORDER

1702

orderedflags = REVIDX_FLAGS_ORDER

1693

if operation == 'write':

1703

if operation == 'write':

1694

orderedflags = reversed(orderedflags)

1704

orderedflags = reversed(orderedflags)

1695

1705

1696

for flag in orderedflags:

1706

for flag in orderedflags:

1697

# If a flagprocessor has been registered for a known flag, apply the

1707

# If a flagprocessor has been registered for a known flag, apply the

1698

# related operation transform and update result tuple.

1708

# related operation transform and update result tuple.

1699

if flag & flags:

1709

if flag & flags:

1700

vhash = True

1710

vhash = True

1701

1711

1702

if flag not in self._flagprocessors:

1712

if flag not in self._flagprocessors:

1703

message = _("missing processor for flag '%#x'") % (flag)

1713

message = _("missing processor for flag '%#x'") % (flag)

1704

raise error.RevlogError(message)

1714

raise error.RevlogError(message)

1705

1715

1706

processor = self._flagprocessors[flag]

1716

processor = self._flagprocessors[flag]

1707

if processor is not None:

1717

if processor is not None:

1708

readtransform, writetransform, rawtransform = processor

1718

readtransform, writetransform, rawtransform = processor

1709

1719

1710

if raw:

1720

if raw:

1711

vhash = rawtransform(self, text)

1721

vhash = rawtransform(self, text)

1712

elif operation == 'read':

1722

elif operation == 'read':

1713

text, vhash = readtransform(self, text)

1723

text, vhash = readtransform(self, text)

1714

else: # write operation

1724

else: # write operation

1715

text, vhash = writetransform(self, text)

1725

text, vhash = writetransform(self, text)

1716

validatehash = validatehash and vhash

1726

validatehash = validatehash and vhash

1717

1727

1718

return text, validatehash

1728

return text, validatehash

1719

1729

1720

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1730

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1721

"""Check node hash integrity.

1731

"""Check node hash integrity.

1722

1732

1723

Available as a function so that subclasses can extend hash mismatch

1733

Available as a function so that subclasses can extend hash mismatch

1724

behaviors as needed.

1734

behaviors as needed.

1725

"""

1735

"""

1726

try:

1736

try:

1727

if p1 is None and p2 is None:

1737

if p1 is None and p2 is None:

1728

p1, p2 = self.parents(node)

1738

p1, p2 = self.parents(node)

1729

if node != self.hash(text, p1, p2):

1739

if node != self.hash(text, p1, p2):

1730

# Clear the revision cache on hash failure. The revision cache

1740

# Clear the revision cache on hash failure. The revision cache

1731

# only stores the raw revision and clearing the cache does have

1741

# only stores the raw revision and clearing the cache does have

1732

# the side-effect that we won't have a cache hit when the raw

1742

# the side-effect that we won't have a cache hit when the raw

1733

# revision data is accessed. But this case should be rare and

1743

# revision data is accessed. But this case should be rare and

1734

# it is extra work to teach the cache about the hash

1744

# it is extra work to teach the cache about the hash

1735

# verification state.

1745

# verification state.

1736

if self._revisioncache and self._revisioncache[0] == node:

1746

if self._revisioncache and self._revisioncache[0] == node:

1737

self._revisioncache = None

1747

self._revisioncache = None

1738

1748

1739

revornode = rev

1749

revornode = rev

1740

if revornode is None:

1750

if revornode is None:

1741

revornode = templatefilters.short(hex(node))

1751

revornode = templatefilters.short(hex(node))

1742

raise error.RevlogError(_("integrity check failed on %s:%s")

1752

raise error.RevlogError(_("integrity check failed on %s:%s")

1743

% (self.indexfile, pycompat.bytestr(revornode)))

1753

% (self.indexfile, pycompat.bytestr(revornode)))

1744

except error.RevlogError:

1754

except error.RevlogError:

1745

if self._censorable and storageutil.iscensoredtext(text):

1755

if self._censorable and storageutil.iscensoredtext(text):

1746

raise error.CensoredNodeError(self.indexfile, node, text)

1756

raise error.CensoredNodeError(self.indexfile, node, text)

1747

raise

1757

raise

1748

1758

1749

def _enforceinlinesize(self, tr, fp=None):

1759

def _enforceinlinesize(self, tr, fp=None):

1750

"""Check if the revlog is too big for inline and convert if so.

1760

"""Check if the revlog is too big for inline and convert if so.

1751

1761

1752

This should be called after revisions are added to the revlog. If the

1762

This should be called after revisions are added to the revlog. If the

1753

revlog has grown too large to be an inline revlog, it will convert it

1763

revlog has grown too large to be an inline revlog, it will convert it

1754

to use multiple index and data files.

1764

to use multiple index and data files.

1755

"""

1765

"""

1756

tiprev = len(self) - 1

1766

tiprev = len(self) - 1

1757

if (not self._inline or

1767

if (not self._inline or

1758

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1768

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1759

return

1769

return

1760

1770

1761

trinfo = tr.find(self.indexfile)

1771

trinfo = tr.find(self.indexfile)

1762

if trinfo is None:

1772

if trinfo is None:

1763

raise error.RevlogError(_("%s not found in the transaction")

1773

raise error.RevlogError(_("%s not found in the transaction")

1764

% self.indexfile)

1774

% self.indexfile)

1765

1775

1766

trindex = trinfo[2]

1776

trindex = trinfo[2]

1767

if trindex is not None:

1777

if trindex is not None:

1768

dataoff = self.start(trindex)

1778

dataoff = self.start(trindex)

1769

else:

1779

else:

1770

# revlog was stripped at start of transaction, use all leftover data

1780

# revlog was stripped at start of transaction, use all leftover data

1771

trindex = len(self) - 1

1781

trindex = len(self) - 1

1772

dataoff = self.end(tiprev)

1782

dataoff = self.end(tiprev)

1773

1783

1774

tr.add(self.datafile, dataoff)

1784

tr.add(self.datafile, dataoff)

1775

1785

1776

if fp:

1786

if fp:

1777

fp.flush()

1787

fp.flush()

1778

fp.close()

1788

fp.close()

1779

# We can't use the cached file handle after close(). So prevent

1789

# We can't use the cached file handle after close(). So prevent

1780

# its usage.

1790

# its usage.

1781

self._writinghandles = None

1791

self._writinghandles = None

1782

1792

1783

with self._indexfp('r') as ifh, self._datafp('w') as dfh:

1793

with self._indexfp('r') as ifh, self._datafp('w') as dfh:

1784

for r in self:

1794

for r in self:

1785

dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])

1795

dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])

1786

1796

1787

with self._indexfp('w') as fp:

1797

with self._indexfp('w') as fp:

1788

self.version &= ~FLAG_INLINE_DATA

1798

self.version &= ~FLAG_INLINE_DATA

1789

self._inline = False

1799

self._inline = False

1790

io = self._io

1800

io = self._io

1791

for i in self:

1801

for i in self:

1792

e = io.packentry(self.index[i], self.node, self.version, i)

1802

e = io.packentry(self.index[i], self.node, self.version, i)

1793

fp.write(e)

1803

fp.write(e)

1794

1804

1795

# the temp file replace the real index when we exit the context

1805

# the temp file replace the real index when we exit the context

1796

# manager

1806

# manager

1797

1807

1798

tr.replace(self.indexfile, trindex * self._io.size)

1808

tr.replace(self.indexfile, trindex * self._io.size)

1799

self._chunkclear()

1809

self._chunkclear()

1800

1810

1801

def _nodeduplicatecallback(self, transaction, node):

1811

def _nodeduplicatecallback(self, transaction, node):

1802

"""called when trying to add a node already stored.

1812

"""called when trying to add a node already stored.

1803

"""

1813

"""

1804

1814

1805

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1815

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1806

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1816

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1807

"""add a revision to the log

1817

"""add a revision to the log

1808

1818

1809

text - the revision data to add

1819

text - the revision data to add

1810

transaction - the transaction object used for rollback

1820

transaction - the transaction object used for rollback

1811

link - the linkrev data to add

1821

link - the linkrev data to add

1812

p1, p2 - the parent nodeids of the revision

1822

p1, p2 - the parent nodeids of the revision

1813

cachedelta - an optional precomputed delta

1823

cachedelta - an optional precomputed delta

1814

node - nodeid of revision; typically node is not specified, and it is

1824

node - nodeid of revision; typically node is not specified, and it is

1815

computed by default as hash(text, p1, p2), however subclasses might

1825

computed by default as hash(text, p1, p2), however subclasses might

1816

use different hashing method (and override checkhash() in such case)

1826

use different hashing method (and override checkhash() in such case)

1817

flags - the known flags to set on the revision

1827

flags - the known flags to set on the revision

1818

deltacomputer - an optional deltacomputer instance shared between

1828

deltacomputer - an optional deltacomputer instance shared between

1819

multiple calls

1829

multiple calls

1820

"""

1830

"""

1821

if link == nullrev:

1831

if link == nullrev:

1822

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1832

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1823

% self.indexfile)

1833

% self.indexfile)

1824

1834

1825

if flags:

1835

if flags:

1826

node = node or self.hash(text, p1, p2)

1836

node = node or self.hash(text, p1, p2)

1827

1837

1828

rawtext, validatehash = self._processflags(text, flags, 'write')

1838

rawtext, validatehash = self._processflags(text, flags, 'write')

1829

1839

1830

# If the flag processor modifies the revision data, ignore any provided

1840

# If the flag processor modifies the revision data, ignore any provided

1831

# cachedelta.

1841

# cachedelta.

1832

if rawtext != text:

1842

if rawtext != text:

1833

cachedelta = None

1843

cachedelta = None

1834

1844

1835

if len(rawtext) > _maxentrysize:

1845

if len(rawtext) > _maxentrysize:

1836

raise error.RevlogError(

1846

raise error.RevlogError(

1837

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1847

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1838

% (self.indexfile, len(rawtext)))

1848

% (self.indexfile, len(rawtext)))

1839

1849

1840

node = node or self.hash(rawtext, p1, p2)

1850

node = node or self.hash(rawtext, p1, p2)

1841

if node in self.nodemap:

1851

if node in self.nodemap:

1842

return node

1852

return node

1843

1853

1844

if validatehash:

1854

if validatehash:

1845

self.checkhash(rawtext, node, p1=p1, p2=p2)

1855

self.checkhash(rawtext, node, p1=p1, p2=p2)

1846

1856

1847

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1857

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1848

flags, cachedelta=cachedelta,

1858

flags, cachedelta=cachedelta,

1849

deltacomputer=deltacomputer)

1859

deltacomputer=deltacomputer)

1850

1860

1851

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1861

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1852

cachedelta=None, deltacomputer=None):

1862

cachedelta=None, deltacomputer=None):

1853

"""add a raw revision with known flags, node and parents

1863

"""add a raw revision with known flags, node and parents

1854

useful when reusing a revision not stored in this revlog (ex: received

1864

useful when reusing a revision not stored in this revlog (ex: received

1855

over wire, or read from an external bundle).

1865

over wire, or read from an external bundle).

1856

"""

1866

"""

1857

dfh = None

1867

dfh = None

1858

if not self._inline:

1868

if not self._inline:

1859

dfh = self._datafp("a+")

1869

dfh = self._datafp("a+")

1860

ifh = self._indexfp("a+")

1870

ifh = self._indexfp("a+")

1861

try:

1871

try:

1862

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1872

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1863

flags, cachedelta, ifh, dfh,

1873

flags, cachedelta, ifh, dfh,

1864

deltacomputer=deltacomputer)

1874

deltacomputer=deltacomputer)

1865

finally:

1875

finally:

1866

if dfh:

1876

if dfh:

1867

dfh.close()

1877

dfh.close()

1868

ifh.close()

1878

ifh.close()

1869

1879

1870

def compress(self, data):

1880

def compress(self, data):

1871

"""Generate a possibly-compressed representation of data."""

1881

"""Generate a possibly-compressed representation of data."""

1872

if not data:

1882

if not data:

1873

return '', data

1883

return '', data

1874

1884

1875

compressed = self._compressor.compress(data)

1885

compressed = self._compressor.compress(data)

1876

1886

1877

if compressed:

1887

if compressed:

1878

# The revlog compressor added the header in the returned data.

1888

# The revlog compressor added the header in the returned data.

1879

return '', compressed

1889

return '', compressed

1880

1890

1881

if data[0:1] == '\0':

1891

if data[0:1] == '\0':

1882

return '', data

1892

return '', data

1883

return 'u', data

1893

return 'u', data

1884

1894

1885

def decompress(self, data):

1895

def decompress(self, data):

1886

"""Decompress a revlog chunk.

1896

"""Decompress a revlog chunk.

1887

1897

1888

The chunk is expected to begin with a header identifying the

1898

The chunk is expected to begin with a header identifying the

1889

format type so it can be routed to an appropriate decompressor.

1899

format type so it can be routed to an appropriate decompressor.

1890

"""

1900

"""

1891

if not data:

1901

if not data:

1892

return data

1902

return data

1893

1903

1894

# Revlogs are read much more frequently than they are written and many

1904

# Revlogs are read much more frequently than they are written and many

1895

# chunks only take microseconds to decompress, so performance is

1905

# chunks only take microseconds to decompress, so performance is

1896

# important here.

1906

# important here.

1897

#

1907

#

1898

# We can make a few assumptions about revlogs:

1908

# We can make a few assumptions about revlogs:

1899

#

1909

#

1900

# 1) the majority of chunks will be compressed (as opposed to inline

1910

# 1) the majority of chunks will be compressed (as opposed to inline

1901

# raw data).

1911

# raw data).

1902

# 2) decompressing *any* data will likely by at least 10x slower than

1912

# 2) decompressing *any* data will likely by at least 10x slower than

1903

# returning raw inline data.

1913

# returning raw inline data.

1904

# 3) we want to prioritize common and officially supported compression

1914

# 3) we want to prioritize common and officially supported compression

1905

# engines

1915

# engines

1906

#

1916

#

1907

# It follows that we want to optimize for "decompress compressed data

1917

# It follows that we want to optimize for "decompress compressed data

1908

# when encoded with common and officially supported compression engines"

1918

# when encoded with common and officially supported compression engines"

1909

# case over "raw data" and "data encoded by less common or non-official

1919

# case over "raw data" and "data encoded by less common or non-official

1910

# compression engines." That is why we have the inline lookup first

1920

# compression engines." That is why we have the inline lookup first

1911

# followed by the compengines lookup.

1921

# followed by the compengines lookup.

1912

#

1922

#

1913

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1923

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1914

# compressed chunks. And this matters for changelog and manifest reads.

1924

# compressed chunks. And this matters for changelog and manifest reads.

1915

t = data[0:1]

1925

t = data[0:1]

1916

1926

1917

if t == 'x':

1927

if t == 'x':

1918

try:

1928

try:

1919

return _zlibdecompress(data)

1929

return _zlibdecompress(data)

1920

except zlib.error as e:

1930

except zlib.error as e:

1921

raise error.RevlogError(_('revlog decompress error: %s') %

1931

raise error.RevlogError(_('revlog decompress error: %s') %

1922

stringutil.forcebytestr(e))

1932

stringutil.forcebytestr(e))

1923

# '\0' is more common than 'u' so it goes first.

1933

# '\0' is more common than 'u' so it goes first.

1924

elif t == '\0':

1934

elif t == '\0':

1925

return data

1935

return data

1926

elif t == 'u':

1936

elif t == 'u':

1927

return util.buffer(data, 1)

1937

return util.buffer(data, 1)

1928

1938

1929

try:

1939

try:

1930

compressor = self._decompressors[t]

1940

compressor = self._decompressors[t]

1931

except KeyError:

1941

except KeyError:

1932

try:

1942

try:

1933

engine = util.compengines.forrevlogheader(t)

1943

engine = util.compengines.forrevlogheader(t)

1934

compressor = engine.revlogcompressor()

1944

compressor = engine.revlogcompressor()

1935

self._decompressors[t] = compressor

1945

self._decompressors[t] = compressor

1936

except KeyError:

1946

except KeyError:

1937

raise error.RevlogError(_('unknown compression type %r') % t)

1947

raise error.RevlogError(_('unknown compression type %r') % t)

1938

1948

1939

return compressor.decompress(data)

1949

return compressor.decompress(data)

1940

1950

1941

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1951

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1942

cachedelta, ifh, dfh, alwayscache=False,

1952

cachedelta, ifh, dfh, alwayscache=False,

1943

deltacomputer=None):

1953

deltacomputer=None):

1944

"""internal function to add revisions to the log

1954

"""internal function to add revisions to the log

1945

1955

1946

see addrevision for argument descriptions.

1956

see addrevision for argument descriptions.

1947

1957

1948

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1958

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1949

1959

1950

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1960

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1951

be used.

1961

be used.

1952

1962

1953

invariants:

1963

invariants:

1954

- rawtext is optional (can be None); if not set, cachedelta must be set.

1964

- rawtext is optional (can be None); if not set, cachedelta must be set.

1955

if both are set, they must correspond to each other.

1965

if both are set, they must correspond to each other.

1956

"""

1966

"""

1957

if node == nullid:

1967

if node == nullid:

1958

raise error.RevlogError(_("%s: attempt to add null revision") %

1968

raise error.RevlogError(_("%s: attempt to add null revision") %

1959

self.indexfile)

1969

self.indexfile)

1960

if node == wdirid or node in wdirfilenodeids:

1970

if node == wdirid or node in wdirfilenodeids:

1961

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1971

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1962

self.indexfile)

1972

self.indexfile)

1963

1973

1964

if self._inline:

1974

if self._inline:

1965

fh = ifh

1975

fh = ifh

1966

else:

1976

else:

1967

fh = dfh

1977

fh = dfh

1968

1978

1969

btext = [rawtext]

1979

btext = [rawtext]

1970

1980

1971

curr = len(self)

1981

curr = len(self)

1972

prev = curr - 1

1982

prev = curr - 1

1973

offset = self.end(prev)

1983

offset = self.end(prev)

1974

p1r, p2r = self.rev(p1), self.rev(p2)

1984

p1r, p2r = self.rev(p1), self.rev(p2)

1975

1985

1976

# full versions are inserted when the needed deltas

1986

# full versions are inserted when the needed deltas

1977

# become comparable to the uncompressed text

1987

# become comparable to the uncompressed text

1978

if rawtext is None:

1988

if rawtext is None:

1979

# need rawtext size, before changed by flag processors, which is

1989

# need rawtext size, before changed by flag processors, which is

1980

# the non-raw size. use revlog explicitly to avoid filelog's extra

1990

# the non-raw size. use revlog explicitly to avoid filelog's extra

1981

# logic that might remove metadata size.

1991

# logic that might remove metadata size.

1982

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1992

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1983

cachedelta[1])

1993

cachedelta[1])

1984

else:

1994

else:

1985

textlen = len(rawtext)

1995

textlen = len(rawtext)

1986

1996

1987

if deltacomputer is None:

1997

if deltacomputer is None:

1988

deltacomputer = deltautil.deltacomputer(self)

1998

deltacomputer = deltautil.deltacomputer(self)

1989

1999

1990

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2000

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

1991

2001

1992

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2002

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

1993

2003

1994

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

2004

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

1995

deltainfo.base, link, p1r, p2r, node)

2005

deltainfo.base, link, p1r, p2r, node)

1996

self.index.append(e)

2006

self.index.append(e)

1997

self.nodemap[node] = curr

2007

self.nodemap[node] = curr

1998

2008

1999

# Reset the pure node cache start lookup offset to account for new

2009

# Reset the pure node cache start lookup offset to account for new

2000

# revision.

2010

# revision.

2001

if self._nodepos is not None:

2011

if self._nodepos is not None:

2002

self._nodepos = curr

2012

self._nodepos = curr

2003

2013

2004

entry = self._io.packentry(e, self.node, self.version, curr)

2014

entry = self._io.packentry(e, self.node, self.version, curr)

2005

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

2015

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

2006

link, offset)

2016

link, offset)

2007

2017

2008

rawtext = btext[0]

2018

rawtext = btext[0]

2009

2019

2010

if alwayscache and rawtext is None:

2020

if alwayscache and rawtext is None:

2011

rawtext = deltacomputer.buildtext(revinfo, fh)

2021

rawtext = deltacomputer.buildtext(revinfo, fh)

2012

2022

2013

if type(rawtext) == bytes: # only accept immutable objects

2023

if type(rawtext) == bytes: # only accept immutable objects

2014

self._revisioncache = (node, curr, rawtext)

2024

self._revisioncache = (node, curr, rawtext)

2015

self._chainbasecache[curr] = deltainfo.chainbase

2025

self._chainbasecache[curr] = deltainfo.chainbase

2016

return node

2026

return node

2017

2027

2018

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2028

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2019

# Files opened in a+ mode have inconsistent behavior on various

2029

# Files opened in a+ mode have inconsistent behavior on various

2020

# platforms. Windows requires that a file positioning call be made

2030

# platforms. Windows requires that a file positioning call be made

2021

# when the file handle transitions between reads and writes. See

2031

# when the file handle transitions between reads and writes. See

2022

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2032

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2023

# platforms, Python or the platform itself can be buggy. Some versions

2033

# platforms, Python or the platform itself can be buggy. Some versions

2024

# of Solaris have been observed to not append at the end of the file

2034

# of Solaris have been observed to not append at the end of the file

2025

# if the file was seeked to before the end. See issue4943 for more.

2035

# if the file was seeked to before the end. See issue4943 for more.

2026

#

2036

#

2027

# We work around this issue by inserting a seek() before writing.

2037

# We work around this issue by inserting a seek() before writing.

2028

# Note: This is likely not necessary on Python 3. However, because

2038

# Note: This is likely not necessary on Python 3. However, because

2029

# the file handle is reused for reads and may be seeked there, we need

2039

# the file handle is reused for reads and may be seeked there, we need

2030

# to be careful before changing this.

2040

# to be careful before changing this.

2031

ifh.seek(0, os.SEEK_END)

2041

ifh.seek(0, os.SEEK_END)

2032

if dfh:

2042

if dfh:

2033

dfh.seek(0, os.SEEK_END)

2043

dfh.seek(0, os.SEEK_END)

2034

2044

2035

curr = len(self) - 1

2045

curr = len(self) - 1

2036

if not self._inline:

2046

if not self._inline:

2037

transaction.add(self.datafile, offset)

2047

transaction.add(self.datafile, offset)

2038

transaction.add(self.indexfile, curr * len(entry))

2048

transaction.add(self.indexfile, curr * len(entry))

2039

if data[0]:

2049

if data[0]:

2040

dfh.write(data[0])

2050

dfh.write(data[0])

2041

dfh.write(data[1])

2051

dfh.write(data[1])

2042

ifh.write(entry)

2052

ifh.write(entry)

2043

else:

2053

else:

2044

offset += curr * self._io.size

2054

offset += curr * self._io.size

2045

transaction.add(self.indexfile, offset, curr)

2055

transaction.add(self.indexfile, offset, curr)

2046

ifh.write(entry)

2056

ifh.write(entry)

2047

ifh.write(data[0])

2057

ifh.write(data[0])

2048

ifh.write(data[1])

2058

ifh.write(data[1])

2049

self._enforceinlinesize(transaction, ifh)

2059

self._enforceinlinesize(transaction, ifh)

2050

2060

2051

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2061

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2052

"""

2062

"""

2053

add a delta group

2063

add a delta group

2054

2064

2055

given a set of deltas, add them to the revision log. the

2065

given a set of deltas, add them to the revision log. the

2056

first delta is against its parent, which should be in our

2066

first delta is against its parent, which should be in our

2057

log, the rest are against the previous delta.

2067

log, the rest are against the previous delta.

2058

2068

2059

If ``addrevisioncb`` is defined, it will be called with arguments of

2069

If ``addrevisioncb`` is defined, it will be called with arguments of

2060

this revlog and the node that was added.

2070

this revlog and the node that was added.

2061

"""

2071

"""

2062

2072

2063

if self._writinghandles:

2073

if self._writinghandles:

2064

raise error.ProgrammingError('cannot nest addgroup() calls')

2074

raise error.ProgrammingError('cannot nest addgroup() calls')

2065

2075

2066

nodes = []

2076

nodes = []

2067

2077

2068

r = len(self)

2078

r = len(self)

2069

end = 0

2079

end = 0

2070

if r:

2080

if r:

2071

end = self.end(r - 1)

2081

end = self.end(r - 1)

2072

ifh = self._indexfp("a+")

2082

ifh = self._indexfp("a+")

2073

isize = r * self._io.size

2083

isize = r * self._io.size

2074

if self._inline:

2084

if self._inline:

2075

transaction.add(self.indexfile, end + isize, r)

2085

transaction.add(self.indexfile, end + isize, r)

2076

dfh = None

2086

dfh = None

2077

else:

2087

else:

2078

transaction.add(self.indexfile, isize, r)

2088

transaction.add(self.indexfile, isize, r)

2079

transaction.add(self.datafile, end)

2089

transaction.add(self.datafile, end)

2080

dfh = self._datafp("a+")

2090

dfh = self._datafp("a+")

2081

def flush():

2091

def flush():

2082

if dfh:

2092

if dfh:

2083

dfh.flush()

2093

dfh.flush()

2084

ifh.flush()

2094

ifh.flush()

2085

2095

2086

self._writinghandles = (ifh, dfh)

2096

self._writinghandles = (ifh, dfh)

2087

2097

2088

try:

2098

try:

2089

deltacomputer = deltautil.deltacomputer(self)

2099

deltacomputer = deltautil.deltacomputer(self)

2090

# loop through our set of deltas

2100

# loop through our set of deltas

2091

for data in deltas:

2101

for data in deltas:

2092

node, p1, p2, linknode, deltabase, delta, flags = data

2102

node, p1, p2, linknode, deltabase, delta, flags = data

2093

link = linkmapper(linknode)

2103

link = linkmapper(linknode)

2094

flags = flags or REVIDX_DEFAULT_FLAGS

2104

flags = flags or REVIDX_DEFAULT_FLAGS

2095

2105

2096

nodes.append(node)

2106

nodes.append(node)

2097

2107

2098

if node in self.nodemap:

2108

if node in self.nodemap:

2099

self._nodeduplicatecallback(transaction, node)

2109

self._nodeduplicatecallback(transaction, node)

2100

# this can happen if two branches make the same change

2110

# this can happen if two branches make the same change

2101

continue

2111

continue

2102

2112

2103

for p in (p1, p2):

2113

for p in (p1, p2):

2104

if p not in self.nodemap:

2114

if p not in self.nodemap:

2105

raise error.LookupError(p, self.indexfile,

2115

raise error.LookupError(p, self.indexfile,

2106

_('unknown parent'))

2116

_('unknown parent'))

2107

2117

2108

if deltabase not in self.nodemap:

2118

if deltabase not in self.nodemap:

2109

raise error.LookupError(deltabase, self.indexfile,

2119

raise error.LookupError(deltabase, self.indexfile,

2110

_('unknown delta base'))

2120

_('unknown delta base'))

2111

2121

2112

baserev = self.rev(deltabase)

2122

baserev = self.rev(deltabase)

2113

2123

2114

if baserev != nullrev and self.iscensored(baserev):

2124

if baserev != nullrev and self.iscensored(baserev):

2115

# if base is censored, delta must be full replacement in a

2125

# if base is censored, delta must be full replacement in a

2116

# single patch operation

2126

# single patch operation

2117

hlen = struct.calcsize(">lll")

2127

hlen = struct.calcsize(">lll")

2118

oldlen = self.rawsize(baserev)

2128

oldlen = self.rawsize(baserev)

2119

newlen = len(delta) - hlen

2129

newlen = len(delta) - hlen

2120

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2130

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2121

raise error.CensoredBaseError(self.indexfile,

2131

raise error.CensoredBaseError(self.indexfile,

2122

self.node(baserev))

2132

self.node(baserev))

2123

2133

2124

if not flags and self._peek_iscensored(baserev, delta, flush):

2134

if not flags and self._peek_iscensored(baserev, delta, flush):

2125

flags |= REVIDX_ISCENSORED

2135

flags |= REVIDX_ISCENSORED

2126

2136

2127

# We assume consumers of addrevisioncb will want to retrieve

2137

# We assume consumers of addrevisioncb will want to retrieve

2128

# the added revision, which will require a call to

2138

# the added revision, which will require a call to

2129

# revision(). revision() will fast path if there is a cache

2139

# revision(). revision() will fast path if there is a cache

2130

# hit. So, we tell _addrevision() to always cache in this case.

2140

# hit. So, we tell _addrevision() to always cache in this case.

2131

# We're only using addgroup() in the context of changegroup

2141

# We're only using addgroup() in the context of changegroup

2132

# generation so the revision data can always be handled as raw

2142

# generation so the revision data can always be handled as raw

2133

# by the flagprocessor.

2143

# by the flagprocessor.

2134

self._addrevision(node, None, transaction, link,

2144

self._addrevision(node, None, transaction, link,

2135

p1, p2, flags, (baserev, delta),

2145

p1, p2, flags, (baserev, delta),

2136

ifh, dfh,

2146

ifh, dfh,

2137

alwayscache=bool(addrevisioncb),

2147

alwayscache=bool(addrevisioncb),

2138

deltacomputer=deltacomputer)

2148

deltacomputer=deltacomputer)

2139

2149

2140

if addrevisioncb:

2150

if addrevisioncb:

2141

addrevisioncb(self, node)

2151

addrevisioncb(self, node)

2142

2152

2143

if not dfh and not self._inline:

2153

if not dfh and not self._inline:

2144

# addrevision switched from inline to conventional

2154

# addrevision switched from inline to conventional

2145

# reopen the index

2155

# reopen the index

2146

ifh.close()

2156

ifh.close()

2147

dfh = self._datafp("a+")

2157

dfh = self._datafp("a+")

2148

ifh = self._indexfp("a+")

2158

ifh = self._indexfp("a+")

2149

self._writinghandles = (ifh, dfh)

2159

self._writinghandles = (ifh, dfh)

2150

finally:

2160

finally:

2151

self._writinghandles = None

2161

self._writinghandles = None

2152

2162

2153

if dfh:

2163

if dfh:

2154

dfh.close()

2164

dfh.close()

2155

ifh.close()

2165

ifh.close()

2156

2166

2157

return nodes

2167

return nodes

2158

2168

2159

def iscensored(self, rev):

2169

def iscensored(self, rev):

2160

"""Check if a file revision is censored."""

2170

"""Check if a file revision is censored."""

2161

if not self._censorable:

2171

if not self._censorable:

2162

return False

2172

return False

2163

2173

2164

return self.flags(rev) & REVIDX_ISCENSORED

2174

return self.flags(rev) & REVIDX_ISCENSORED

2165

2175

2166

def _peek_iscensored(self, baserev, delta, flush):

2176

def _peek_iscensored(self, baserev, delta, flush):

2167

"""Quickly check if a delta produces a censored revision."""

2177

"""Quickly check if a delta produces a censored revision."""

2168

if not self._censorable:

2178

if not self._censorable:

2169

return False

2179

return False

2170

2180

2171

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2181

return storageutil.deltaiscensored(delta, baserev, self.rawsize)

2172

2182

2173

def getstrippoint(self, minlink):

2183

def getstrippoint(self, minlink):

2174

"""find the minimum rev that must be stripped to strip the linkrev

2184

"""find the minimum rev that must be stripped to strip the linkrev

2175

2185

2176

Returns a tuple containing the minimum rev and a set of all revs that

2186

Returns a tuple containing the minimum rev and a set of all revs that

2177

have linkrevs that will be broken by this strip.

2187

have linkrevs that will be broken by this strip.

2178

"""

2188

"""

2179

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2189

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2180

self.headrevs(),

2190

self.headrevs(),

2181

self.linkrev, self.parentrevs)

2191

self.linkrev, self.parentrevs)

2182

2192

2183

def strip(self, minlink, transaction):

2193

def strip(self, minlink, transaction):

2184

"""truncate the revlog on the first revision with a linkrev >= minlink

2194

"""truncate the revlog on the first revision with a linkrev >= minlink

2185

2195

2186

This function is called when we're stripping revision minlink and

2196

This function is called when we're stripping revision minlink and

2187

its descendants from the repository.

2197

its descendants from the repository.

2188

2198

2189

We have to remove all revisions with linkrev >= minlink, because

2199

We have to remove all revisions with linkrev >= minlink, because

2190

the equivalent changelog revisions will be renumbered after the

2200

the equivalent changelog revisions will be renumbered after the

2191

strip.

2201

strip.

2192

2202

2193

So we truncate the revlog on the first of these revisions, and

2203

So we truncate the revlog on the first of these revisions, and

2194

trust that the caller has saved the revisions that shouldn't be

2204

trust that the caller has saved the revisions that shouldn't be

2195

removed and that it'll re-add them after this truncation.

2205

removed and that it'll re-add them after this truncation.

2196

"""

2206

"""

2197

if len(self) == 0:

2207

if len(self) == 0:

2198

return

2208

return

2199

2209

2200

rev, _ = self.getstrippoint(minlink)

2210

rev, _ = self.getstrippoint(minlink)

2201

if rev == len(self):

2211

if rev == len(self):

2202

return

2212

return

2203

2213

2204

# first truncate the files on disk

2214

# first truncate the files on disk

2205

end = self.start(rev)

2215

end = self.start(rev)

2206

if not self._inline:

2216

if not self._inline:

2207

transaction.add(self.datafile, end)

2217

transaction.add(self.datafile, end)

2208

end = rev * self._io.size

2218

end = rev * self._io.size

2209

else:

2219

else:

2210

end += rev * self._io.size

2220

end += rev * self._io.size

2211

2221

2212

transaction.add(self.indexfile, end)

2222

transaction.add(self.indexfile, end)

2213

2223

2214

# then reset internal state in memory to forget those revisions

2224

# then reset internal state in memory to forget those revisions

2215

self._revisioncache = None

2225

self._revisioncache = None

2216

self._chaininfocache = {}

2226

self._chaininfocache = {}

2217

self._chunkclear()

2227

self._chunkclear()

2218

for x in pycompat.xrange(rev, len(self)):

2228

for x in pycompat.xrange(rev, len(self)):

2219

del self.nodemap[self.node(x)]

2229

del self.nodemap[self.node(x)]

2220

2230

2221

del self.index[rev:-1]

2231

del self.index[rev:-1]

2222

self._nodepos = None

2232

self._nodepos = None

2223

2233

2224

def checksize(self):

2234

def checksize(self):

2225

expected = 0

2235

expected = 0

2226

if len(self):

2236

if len(self):

2227

expected = max(0, self.end(len(self) - 1))

2237

expected = max(0, self.end(len(self) - 1))

2228

2238

2229

try:

2239

try:

2230

with self._datafp() as f:

2240

with self._datafp() as f:

2231

f.seek(0, 2)

2241

f.seek(0, 2)

2232

actual = f.tell()

2242

actual = f.tell()

2233

dd = actual - expected

2243

dd = actual - expected

2234

except IOError as inst:

2244

except IOError as inst:

2235

if inst.errno != errno.ENOENT:

2245

if inst.errno != errno.ENOENT:

2236

raise

2246

raise

2237

dd = 0

2247

dd = 0

2238

2248

2239

try:

2249

try:

2240

f = self.opener(self.indexfile)

2250

f = self.opener(self.indexfile)

2241

f.seek(0, 2)

2251

f.seek(0, 2)

2242

actual = f.tell()

2252

actual = f.tell()

2243

f.close()

2253

f.close()

2244

s = self._io.size

2254

s = self._io.size

2245

i = max(0, actual // s)

2255

i = max(0, actual // s)

2246

di = actual - (i * s)

2256

di = actual - (i * s)

2247

if self._inline:

2257

if self._inline:

2248

databytes = 0

2258

databytes = 0

2249

for r in self:

2259

for r in self:

2250

databytes += max(0, self.length(r))

2260

databytes += max(0, self.length(r))

2251

dd = 0

2261

dd = 0

2252

di = actual - len(self) * s - databytes

2262

di = actual - len(self) * s - databytes

2253

except IOError as inst:

2263

except IOError as inst:

2254

if inst.errno != errno.ENOENT:

2264

if inst.errno != errno.ENOENT:

2255

raise

2265

raise

2256

di = 0

2266

di = 0

2257

2267

2258

return (dd, di)

2268

return (dd, di)

2259

2269

2260

def files(self):

2270

def files(self):

2261

res = [self.indexfile]

2271

res = [self.indexfile]

2262

if not self._inline:

2272

if not self._inline:

2263

res.append(self.datafile)

2273

res.append(self.datafile)

2264

return res

2274

return res

2265

2275

2266

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2276

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2267

assumehaveparentrevisions=False,

2277

assumehaveparentrevisions=False,

2268

deltamode=repository.CG_DELTAMODE_STD):

2278

deltamode=repository.CG_DELTAMODE_STD):

2269

if nodesorder not in ('nodes', 'storage', 'linear', None):

2279

if nodesorder not in ('nodes', 'storage', 'linear', None):

2270

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2280

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2271

nodesorder)

2281

nodesorder)

2272

2282

2273

if nodesorder is None and not self._generaldelta:

2283

if nodesorder is None and not self._generaldelta:

2274

nodesorder = 'storage'

2284

nodesorder = 'storage'

2275

2285

2276

if (not self._storedeltachains and

2286

if (not self._storedeltachains and

2277

deltamode != repository.CG_DELTAMODE_PREV):

2287

deltamode != repository.CG_DELTAMODE_PREV):

2278

deltamode = repository.CG_DELTAMODE_FULL

2288

deltamode = repository.CG_DELTAMODE_FULL

2279

2289

2280

return storageutil.emitrevisions(

2290

return storageutil.emitrevisions(

2281

self, nodes, nodesorder, revlogrevisiondelta,

2291

self, nodes, nodesorder, revlogrevisiondelta,

2282

deltaparentfn=self.deltaparent,

2292

deltaparentfn=self.deltaparent,

2283

candeltafn=self.candelta,

2293

candeltafn=self.candelta,

2284

rawsizefn=self.rawsize,

2294

rawsizefn=self.rawsize,

2285

revdifffn=self.revdiff,

2295

revdifffn=self.revdiff,

2286

flagsfn=self.flags,

2296

flagsfn=self.flags,

2287

deltamode=deltamode,

2297

deltamode=deltamode,

2288

revisiondata=revisiondata,

2298

revisiondata=revisiondata,

2289

assumehaveparentrevisions=assumehaveparentrevisions)

2299

assumehaveparentrevisions=assumehaveparentrevisions)

2290

2300

2291

DELTAREUSEALWAYS = 'always'

2301

DELTAREUSEALWAYS = 'always'

2292

DELTAREUSESAMEREVS = 'samerevs'

2302

DELTAREUSESAMEREVS = 'samerevs'

2293

DELTAREUSENEVER = 'never'

2303

DELTAREUSENEVER = 'never'

2294

2304

2295

DELTAREUSEFULLADD = 'fulladd'

2305

DELTAREUSEFULLADD = 'fulladd'

2296

2306

2297

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2307

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2298

2308

2299

def clone(self, tr, destrevlog, addrevisioncb=None,

2309

def clone(self, tr, destrevlog, addrevisioncb=None,

2300

deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):

2310

deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):

2301

"""Copy this revlog to another, possibly with format changes.

2311

"""Copy this revlog to another, possibly with format changes.

2302

2312

2303

The destination revlog will contain the same revisions and nodes.

2313

The destination revlog will contain the same revisions and nodes.

2304

However, it may not be bit-for-bit identical due to e.g. delta encoding

2314

However, it may not be bit-for-bit identical due to e.g. delta encoding

2305

differences.

2315

differences.

2306

2316

2307

The ``deltareuse`` argument control how deltas from the existing revlog

2317

The ``deltareuse`` argument control how deltas from the existing revlog

2308

are preserved in the destination revlog. The argument can have the

2318

are preserved in the destination revlog. The argument can have the

2309

following values:

2319

following values:

2310

2320

2311

DELTAREUSEALWAYS

2321

DELTAREUSEALWAYS

2312

Deltas will always be reused (if possible), even if the destination

2322

Deltas will always be reused (if possible), even if the destination

2313

revlog would not select the same revisions for the delta. This is the

2323

revlog would not select the same revisions for the delta. This is the

2314

fastest mode of operation.

2324

fastest mode of operation.

2315

DELTAREUSESAMEREVS

2325

DELTAREUSESAMEREVS

2316

Deltas will be reused if the destination revlog would pick the same

2326

Deltas will be reused if the destination revlog would pick the same

2317

revisions for the delta. This mode strikes a balance between speed

2327

revisions for the delta. This mode strikes a balance between speed

2318

and optimization.

2328

and optimization.

2319

DELTAREUSENEVER

2329

DELTAREUSENEVER

2320

Deltas will never be reused. This is the slowest mode of execution.

2330

Deltas will never be reused. This is the slowest mode of execution.

2321

This mode can be used to recompute deltas (e.g. if the diff/delta

2331

This mode can be used to recompute deltas (e.g. if the diff/delta

2322

algorithm changes).

2332

algorithm changes).

2323

2333

2324

Delta computation can be slow, so the choice of delta reuse policy can

2334

Delta computation can be slow, so the choice of delta reuse policy can

2325

significantly affect run time.

2335

significantly affect run time.

2326

2336

2327

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2337

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2328

two extremes. Deltas will be reused if they are appropriate. But if the

2338

two extremes. Deltas will be reused if they are appropriate. But if the

2329

delta could choose a better revision, it will do so. This means if you

2339

delta could choose a better revision, it will do so. This means if you

2330

are converting a non-generaldelta revlog to a generaldelta revlog,

2340

are converting a non-generaldelta revlog to a generaldelta revlog,

2331

deltas will be recomputed if the delta's parent isn't a parent of the

2341

deltas will be recomputed if the delta's parent isn't a parent of the

2332

revision.

2342

revision.

2333

2343

2334

In addition to the delta policy, the ``forcedeltabothparents``

2344

In addition to the delta policy, the ``forcedeltabothparents``

2335

argument controls whether to force compute deltas against both parents

2345

argument controls whether to force compute deltas against both parents

2336

for merges. By default, the current default is used.

2346

for merges. By default, the current default is used.

2337

"""

2347

"""

2338

if deltareuse not in self.DELTAREUSEALL:

2348

if deltareuse not in self.DELTAREUSEALL:

2339

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2349

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2340

2350

2341

if len(destrevlog):

2351

if len(destrevlog):

2342

raise ValueError(_('destination revlog is not empty'))

2352

raise ValueError(_('destination revlog is not empty'))

2343

2353

2344

if getattr(self, 'filteredrevs', None):

2354

if getattr(self, 'filteredrevs', None):

2345

raise ValueError(_('source revlog has filtered revisions'))

2355

raise ValueError(_('source revlog has filtered revisions'))

2346

if getattr(destrevlog, 'filteredrevs', None):

2356

if getattr(destrevlog, 'filteredrevs', None):

2347

raise ValueError(_('destination revlog has filtered revisions'))

2357

raise ValueError(_('destination revlog has filtered revisions'))

2348

2358

2349

# lazydeltabase controls whether to reuse a cached delta, if possible.

2359

# lazydeltabase controls whether to reuse a cached delta, if possible.

2350

oldlazydeltabase = destrevlog._lazydeltabase

2360

oldlazydeltabase = destrevlog._lazydeltabase

2351

oldamd = destrevlog._deltabothparents

2361

oldamd = destrevlog._deltabothparents

2352

2362

2353

try:

2363

try:

2354

if deltareuse == self.DELTAREUSEALWAYS:

2364

if deltareuse == self.DELTAREUSEALWAYS:

2355

destrevlog._lazydeltabase = True

2365

destrevlog._lazydeltabase = True

2356

elif deltareuse == self.DELTAREUSESAMEREVS:

2366

elif deltareuse == self.DELTAREUSESAMEREVS:

2357

destrevlog._lazydeltabase = False

2367

destrevlog._lazydeltabase = False

2358

2368

2359

destrevlog._deltabothparents = forcedeltabothparents or oldamd

2369

destrevlog._deltabothparents = forcedeltabothparents or oldamd

2360

2370

2361

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2371

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2362

self.DELTAREUSESAMEREVS)

2372

self.DELTAREUSESAMEREVS)

2363

2373

2364

deltacomputer = deltautil.deltacomputer(destrevlog)

2374

deltacomputer = deltautil.deltacomputer(destrevlog)

2365

index = self.index

2375

index = self.index

2366

for rev in self:

2376

for rev in self:

2367

entry = index[rev]

2377

entry = index[rev]

2368

2378

2369

# Some classes override linkrev to take filtered revs into

2379

# Some classes override linkrev to take filtered revs into

2370

# account. Use raw entry from index.

2380

# account. Use raw entry from index.

2371

flags = entry[0] & 0xffff

2381

flags = entry[0] & 0xffff

2372

linkrev = entry[4]

2382

linkrev = entry[4]

2373

p1 = index[entry[5]][7]

2383

p1 = index[entry[5]][7]

2374

p2 = index[entry[6]][7]

2384

p2 = index[entry[6]][7]

2375

node = entry[7]

2385

node = entry[7]

2376

2386

2377

# (Possibly) reuse the delta from the revlog if allowed and

2387

# (Possibly) reuse the delta from the revlog if allowed and

2378

# the revlog chunk is a delta.

2388

# the revlog chunk is a delta.

2379

cachedelta = None

2389

cachedelta = None

2380

rawtext = None

2390

rawtext = None

2381

if populatecachedelta:

2391

if populatecachedelta:

2382

dp = self.deltaparent(rev)

2392

dp = self.deltaparent(rev)

2383

if dp != nullrev:

2393

if dp != nullrev:

2384

cachedelta = (dp, bytes(self._chunk(rev)))

2394

cachedelta = (dp, bytes(self._chunk(rev)))

2385

2395

2386

if not cachedelta:

2396

if not cachedelta:

2387

rawtext = self.revision(rev, raw=True)

2397

rawtext = self.revision(rev, raw=True)

2388

2398

2389

2399

2390

if deltareuse == self.DELTAREUSEFULLADD:

2400

if deltareuse == self.DELTAREUSEFULLADD:

2391

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2401

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2392

cachedelta=cachedelta,

2402

cachedelta=cachedelta,

2393

node=node, flags=flags,

2403

node=node, flags=flags,

2394

deltacomputer=deltacomputer)

2404

deltacomputer=deltacomputer)

2395

else:

2405

else:

2396

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2406

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2397

checkambig=False)

2407

checkambig=False)

2398

dfh = None

2408

dfh = None

2399

if not destrevlog._inline:

2409

if not destrevlog._inline:

2400

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2410

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2401

try:

2411

try:

2402

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2412

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2403

p2, flags, cachedelta, ifh, dfh,

2413

p2, flags, cachedelta, ifh, dfh,

2404

deltacomputer=deltacomputer)

2414

deltacomputer=deltacomputer)

2405

finally:

2415

finally:

2406

if dfh:

2416

if dfh:

2407

dfh.close()

2417

dfh.close()

2408

ifh.close()

2418

ifh.close()

2409

2419

2410

if addrevisioncb:

2420

if addrevisioncb:

2411

addrevisioncb(self, rev, node)

2421

addrevisioncb(self, rev, node)

2412

finally:

2422

finally:

2413

destrevlog._lazydeltabase = oldlazydeltabase

2423

destrevlog._lazydeltabase = oldlazydeltabase

2414

destrevlog._deltabothparents = oldamd

2424

destrevlog._deltabothparents = oldamd

2415

2425

2416

def censorrevision(self, tr, censornode, tombstone=b''):

2426

def censorrevision(self, tr, censornode, tombstone=b''):

2417

if (self.version & 0xFFFF) == REVLOGV0:

2427

if (self.version & 0xFFFF) == REVLOGV0:

2418

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2428

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2419

self.version)

2429

self.version)

2420

2430

2421

censorrev = self.rev(censornode)

2431

censorrev = self.rev(censornode)

2422

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2432

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2423

2433

2424

if len(tombstone) > self.rawsize(censorrev):

2434

if len(tombstone) > self.rawsize(censorrev):

2425

raise error.Abort(_('censor tombstone must be no longer than '

2435

raise error.Abort(_('censor tombstone must be no longer than '

2426

'censored data'))

2436

'censored data'))

2427

2437

2428

# Rewriting the revlog in place is hard. Our strategy for censoring is

2438

# Rewriting the revlog in place is hard. Our strategy for censoring is

2429

# to create a new revlog, copy all revisions to it, then replace the

2439

# to create a new revlog, copy all revisions to it, then replace the

2430

# revlogs on transaction close.

2440

# revlogs on transaction close.

2431

2441

2432

newindexfile = self.indexfile + b'.tmpcensored'

2442

newindexfile = self.indexfile + b'.tmpcensored'

2433

newdatafile = self.datafile + b'.tmpcensored'

2443

newdatafile = self.datafile + b'.tmpcensored'

2434

2444

2435

# This is a bit dangerous. We could easily have a mismatch of state.

2445

# This is a bit dangerous. We could easily have a mismatch of state.

2436

newrl = revlog(self.opener, newindexfile, newdatafile,

2446

newrl = revlog(self.opener, newindexfile, newdatafile,

2437

censorable=True)

2447

censorable=True)

2438

newrl.version = self.version

2448

newrl.version = self.version

2439

newrl._generaldelta = self._generaldelta

2449

newrl._generaldelta = self._generaldelta

2440

newrl._io = self._io

2450

newrl._io = self._io

2441

2451

2442

for rev in self.revs():

2452

for rev in self.revs():

2443

node = self.node(rev)

2453

node = self.node(rev)

2444

p1, p2 = self.parents(node)

2454

p1, p2 = self.parents(node)

2445

2455

2446

if rev == censorrev:

2456

if rev == censorrev:

2447

newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),

2457

newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),

2448

p1, p2, censornode, REVIDX_ISCENSORED)

2458

p1, p2, censornode, REVIDX_ISCENSORED)

2449

2459

2450

if newrl.deltaparent(rev) != nullrev:

2460

if newrl.deltaparent(rev) != nullrev:

2451

raise error.Abort(_('censored revision stored as delta; '

2461

raise error.Abort(_('censored revision stored as delta; '

2452

'cannot censor'),

2462

'cannot censor'),

2453

hint=_('censoring of revlogs is not '

2463

hint=_('censoring of revlogs is not '

2454

'fully implemented; please report '

2464

'fully implemented; please report '

2455

'this bug'))

2465

'this bug'))

2456

continue

2466

continue

2457

2467

2458

if self.iscensored(rev):

2468

if self.iscensored(rev):

2459

if self.deltaparent(rev) != nullrev:

2469

if self.deltaparent(rev) != nullrev:

2460

raise error.Abort(_('cannot censor due to censored '

2470

raise error.Abort(_('cannot censor due to censored '

2461

'revision having delta stored'))

2471

'revision having delta stored'))

2462

rawtext = self._chunk(rev)

2472

rawtext = self._chunk(rev)

2463

else:

2473

else:

2464

rawtext = self.revision(rev, raw=True)

2474

rawtext = self.revision(rev, raw=True)

2465

2475

2466

newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,

2476

newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,

2467

self.flags(rev))

2477

self.flags(rev))

2468

2478

2469

tr.addbackup(self.indexfile, location='store')

2479

tr.addbackup(self.indexfile, location='store')

2470

if not self._inline:

2480

if not self._inline:

2471

tr.addbackup(self.datafile, location='store')

2481

tr.addbackup(self.datafile, location='store')

2472

2482

2473

self.opener.rename(newrl.indexfile, self.indexfile)

2483

self.opener.rename(newrl.indexfile, self.indexfile)

2474

if not self._inline:

2484

if not self._inline:

2475

self.opener.rename(newrl.datafile, self.datafile)

2485

self.opener.rename(newrl.datafile, self.datafile)

2476

2486

2477

self.clearcaches()

2487

self.clearcaches()

2478

self._loadindex(self.version, None)

2488

self._loadindex(self.version, None)

2479

2489

2480

def verifyintegrity(self, state):

2490

def verifyintegrity(self, state):

2481

"""Verifies the integrity of the revlog.

2491

"""Verifies the integrity of the revlog.

2482

2492

2483

Yields ``revlogproblem`` instances describing problems that are

2493

Yields ``revlogproblem`` instances describing problems that are

2484

found.

2494

found.

2485

"""

2495

"""

2486

dd, di = self.checksize()

2496

dd, di = self.checksize()

2487

if dd:

2497

if dd:

2488

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2498

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2489

if di:

2499

if di:

2490

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2500

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2491

2501

2492

version = self.version & 0xFFFF

2502

version = self.version & 0xFFFF

2493

2503

2494

# The verifier tells us what version revlog we should be.

2504

# The verifier tells us what version revlog we should be.

2495

if version != state['expectedversion']:

2505

if version != state['expectedversion']:

2496

yield revlogproblem(

2506

yield revlogproblem(

2497

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2507

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2498

(self.indexfile, version, state['expectedversion']))

2508

(self.indexfile, version, state['expectedversion']))

2499

2509

2500

state['skipread'] = set()

2510

state['skipread'] = set()

2501

2511

2502

for rev in self:

2512

for rev in self:

2503

node = self.node(rev)

2513

node = self.node(rev)

2504

2514

2505

# Verify contents. 4 cases to care about:

2515

# Verify contents. 4 cases to care about:

2506

#

2516

#

2507

# common: the most common case

2517

# common: the most common case

2508

# rename: with a rename

2518

# rename: with a rename

2509

# meta: file content starts with b'\1\n', the metadata

2519

# meta: file content starts with b'\1\n', the metadata

2510

# header defined in filelog.py, but without a rename

2520

# header defined in filelog.py, but without a rename

2511

# ext: content stored externally

2521

# ext: content stored externally

2512

#

2522

#

2513

# More formally, their differences are shown below:

2523

# More formally, their differences are shown below:

2514

#

2524

#

2515

# | common | rename | meta | ext

2525

# | common | rename | meta | ext

2516

# -------------------------------------------------------

2526

# -------------------------------------------------------

2517

# flags() | 0 | 0 | 0 | not 0

2527

# flags() | 0 | 0 | 0 | not 0

2518

# renamed() | False | True | False | ?

2528

# renamed() | False | True | False | ?

2519

# rawtext[0:2]=='\1\n'| False | True | True | ?

2529

# rawtext[0:2]=='\1\n'| False | True | True | ?

2520

#

2530

#

2521

# "rawtext" means the raw text stored in revlog data, which

2531

# "rawtext" means the raw text stored in revlog data, which

2522

# could be retrieved by "revision(rev, raw=True)". "text"

2532

# could be retrieved by "revision(rev, raw=True)". "text"

2523

# mentioned below is "revision(rev, raw=False)".

2533

# mentioned below is "revision(rev, raw=False)".

2524

#

2534

#

2525

# There are 3 different lengths stored physically:

2535

# There are 3 different lengths stored physically:

2526

# 1. L1: rawsize, stored in revlog index

2536

# 1. L1: rawsize, stored in revlog index

2527

# 2. L2: len(rawtext), stored in revlog data

2537

# 2. L2: len(rawtext), stored in revlog data

2528

# 3. L3: len(text), stored in revlog data if flags==0, or

2538

# 3. L3: len(text), stored in revlog data if flags==0, or

2529

# possibly somewhere else if flags!=0

2539

# possibly somewhere else if flags!=0

2530

#

2540

#

2531

# L1 should be equal to L2. L3 could be different from them.

2541

# L1 should be equal to L2. L3 could be different from them.

2532

# "text" may or may not affect commit hash depending on flag

2542

# "text" may or may not affect commit hash depending on flag

2533

# processors (see revlog.addflagprocessor).

2543

# processors (see revlog.addflagprocessor).

2534

#

2544

#

2535

# | common | rename | meta | ext

2545

# | common | rename | meta | ext

2536

# -------------------------------------------------

2546

# -------------------------------------------------

2537

# rawsize() | L1 | L1 | L1 | L1

2547

# rawsize() | L1 | L1 | L1 | L1

2538

# size() | L1 | L2-LM | L1(*) | L1 (?)

2548

# size() | L1 | L2-LM | L1(*) | L1 (?)

2539

# len(rawtext) | L2 | L2 | L2 | L2

2549

# len(rawtext) | L2 | L2 | L2 | L2

2540

# len(text) | L2 | L2 | L2 | L3

2550

# len(text) | L2 | L2 | L2 | L3

2541

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2551

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2542

#

2552

#

2543

# LM: length of metadata, depending on rawtext

2553

# LM: length of metadata, depending on rawtext

2544

# (*): not ideal, see comment in filelog.size

2554

# (*): not ideal, see comment in filelog.size

2545

# (?): could be "- len(meta)" if the resolved content has

2555

# (?): could be "- len(meta)" if the resolved content has

2546

# rename metadata

2556

# rename metadata

2547

#

2557

#

2548

# Checks needed to be done:

2558

# Checks needed to be done:

2549

# 1. length check: L1 == L2, in all cases.

2559

# 1. length check: L1 == L2, in all cases.

2550

# 2. hash check: depending on flag processor, we may need to

2560

# 2. hash check: depending on flag processor, we may need to

2551

# use either "text" (external), or "rawtext" (in revlog).

2561

# use either "text" (external), or "rawtext" (in revlog).

2552

2562

2553

try:

2563

try:

2554

skipflags = state.get('skipflags', 0)

2564

skipflags = state.get('skipflags', 0)

2555

if skipflags:

2565

if skipflags:

2556

skipflags &= self.flags(rev)

2566

skipflags &= self.flags(rev)

2557

2567

2558

if skipflags:

2568

if skipflags:

2559

state['skipread'].add(node)

2569

state['skipread'].add(node)

2560

else:

2570

else:

2561

# Side-effect: read content and verify hash.

2571

# Side-effect: read content and verify hash.

2562

self.revision(node)

2572

self.revision(node)

2563

2573

2564

l1 = self.rawsize(rev)

2574

l1 = self.rawsize(rev)

2565

l2 = len(self.revision(node, raw=True))

2575

l2 = len(self.revision(node, raw=True))

2566

2576

2567

if l1 != l2:

2577

if l1 != l2:

2568

yield revlogproblem(

2578

yield revlogproblem(

2569

error=_('unpacked size is %d, %d expected') % (l2, l1),

2579

error=_('unpacked size is %d, %d expected') % (l2, l1),

2570

node=node)

2580

node=node)

2571

2581

2572

except error.CensoredNodeError:

2582

except error.CensoredNodeError:

2573

if state['erroroncensored']:

2583

if state['erroroncensored']:

2574

yield revlogproblem(error=_('censored file data'),

2584

yield revlogproblem(error=_('censored file data'),

2575

node=node)

2585

node=node)

2576

state['skipread'].add(node)

2586

state['skipread'].add(node)

2577

except Exception as e:

2587

except Exception as e:

2578

yield revlogproblem(

2588

yield revlogproblem(

2579

error=_('unpacking %s: %s') % (short(node),

2589

error=_('unpacking %s: %s') % (short(node),

2580

stringutil.forcebytestr(e)),

2590

stringutil.forcebytestr(e)),

2581

node=node)

2591

node=node)

2582

state['skipread'].add(node)

2592

state['skipread'].add(node)

2583

2593

2584

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2594

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2585

revisionscount=False, trackedsize=False,

2595

revisionscount=False, trackedsize=False,

2586

storedsize=False):

2596

storedsize=False):

2587

d = {}

2597

d = {}

2588

2598

2589

if exclusivefiles:

2599

if exclusivefiles:

2590

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2600

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2591

if not self._inline:

2601

if not self._inline:

2592

d['exclusivefiles'].append((self.opener, self.datafile))

2602

d['exclusivefiles'].append((self.opener, self.datafile))

2593

2603

2594

if sharedfiles:

2604

if sharedfiles:

2595

d['sharedfiles'] = []

2605

d['sharedfiles'] = []

2596

2606

2597

if revisionscount:

2607

if revisionscount:

2598

d['revisionscount'] = len(self)

2608

d['revisionscount'] = len(self)

2599

2609

2600

if trackedsize:

2610

if trackedsize:

2601

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2611

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2602

2612

2603

if storedsize:

2613

if storedsize:

2604

d['storedsize'] = sum(self.opener.stat(path).st_size

2614

d['storedsize'] = sum(self.opener.stat(path).st_size

2605

for path in self.files())

2615

for path in self.files())

2606

2616

2607

return d

2617

return d

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullhex,
                 nullid,
                 nullrev,
                 short,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .revlogutils.constants import (
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_ISCENSORED,
                 REVIDX_KNOWN_FLAGS,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
             )
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 repository,
                 templatefilters,
                 util,
             )
             from .revlogutils import (
                 deltas as deltautil,
             )
             from .utils import (
                 interfaceutil,
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_KNOWN_FLAGS
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod(r'parsers')
+            try:
+                from . import rustext
+                rustext.__name__  # force actual import (see hgdemandimport)
+            except ImportError:
+                rustext = None
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 _insertflagprocessor(flag, processor, _flagprocessors)
             def _insertflagprocessor(flag, processor, flagprocessors):
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag in flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise error.RevlogError(_('index entry flags need revlog '
                                                   'version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(_flagprocessors)
                     # 2-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                         if opts.get('enableellipsis'):
                             self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                         # revlog v0 doesn't have flag processors
                         for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
                             _insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(_('revlog chunk cache size %r is not '
                                                   'greater than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(_('revlog chunk cache size %r is not a '
                                                   'power of 2') % self._chunkcachesize)
                     self._loadindex(v, mmapindexthreshold)
                 def _loadindex(self, v, mmapindexthreshold):
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                 self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     else:
                         raise error.RevlogError(_('unknown version (%d) in revlog %s') %
                                                 (fmt, self.indexfile))
                     self._storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise error.RevlogError(_("index %s is corrupted") %
                                                 self.indexfile)
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     # Use explicit file handle, if given.
                     if existingfp is not None:
                         yield existingfp
                     # Use a file handle being actively used for writes, if available.
                     # There is some danger to doing this because reads will seek the
                     # file. However, _writeentry() performs a SEEK_END before all writes,
                     # so we should be safe.
                     elif self._writinghandles:
                         if self._inline:
                             yield self._writinghandles[0]
                         else:
                             yield self._writinghandles[1]
                     # Otherwise open a new file handle.
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
-                    if util.safehasattr(parsers, 'rustlazyancestors'):
-                        return ancestor.rustlazyancestors(
+                    if rustext is not None:
-                            self.index, revs,
+                        lazyancestors = rustext.ancestor.LazyAncestors
-                            stoprev=stoprev, inclusive=inclusive)
+                        arg = self.index
-                    return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
+                    elif util.safehasattr(parsers, 'rustlazyancestors'):
-                                                  stoprev=stoprev, inclusive=inclusive)
+                        lazyancestors = ancestor.rustlazyancestors
+                        arg = self.index
+                    else:
+                        lazyancestors = ancestor.lazyancestors
+                        arg = self._uncheckedparentrevs
+                    return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = set(self.rev(n) for n in stop or [])
                     revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
                                                 stoprevs=stoprevs)
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except error.LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise error.RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise error.AmbiguousPrefixLookupError(
                                 id, self.indexfile, _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if nullhex.startswith(id):
                                 nl.append(nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.indexfile, _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise error.LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != wdirid:
                                 raise error.LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     Raises if the requested number of bytes could not be read.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         startoffset = offset - realoffset
                         if len(d) - startoffset < length:
                             raise error.RevlogError(
                                 _('partial read of revlog %s; expected %d bytes from '
                                   'offset %d, got %d') %
                                 (self.indexfile if self._inline else self.datafile,
                                  length, realoffset, len(d) - startoffset))
                         return util.buffer(d, startoffset, length)
                     if len(d) < length:
                         raise error.RevlogError(
                             _('partial read of revlog %s; expected %d bytes from offset '
                               '%d, got %d') %
                             (self.indexfile if self._inline else self.datafile,
                              length, offset, len(d)))
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(self, revs,
                                                             targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, 'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._revisioncache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._revisioncache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._revisioncache[2]
                         cachedrev = self._revisioncache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._revisioncache[2]
                         # drop cache to save memory
                         self._revisioncache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._revisioncache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise error.ProgrammingError(_("invalid '%s' operation") %
                                                      operation)
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise error.RevlogError(_("incompatible revision flag '%#x'") %
                                                 (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in self._flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise error.RevlogError(message)
                             processor = self._flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise error.RevlogError(_("%s not found in the transaction")
                                                 % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                     with self._indexfp('r') as ifh, self._datafp('w') as dfh:
                         for r in self:
                             dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored.
                     """
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(_("attempted to add linkrev -1 to %s")
                                                 % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(_('revlog decompress error: %s') %
                                                     stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise error.RevlogError(_("%s: attempt to add null revision") %
                                                 self.indexfile)
                     if node == wdirid or node in wdirfilenodeids:
                         raise error.RevlogError(_("%s: attempt to add wdir revision") %
                                                 self.indexfile)
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
                          deltainfo.base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     # Reset the pure node cache start lookup offset to account for new
                     # revision.
                     if self._nodepos is not None:
                         self._nodepos = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
                                      link, offset)
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._writinghandles:
                         raise error.ProgrammingError('cannot nest addgroup() calls')
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     self._writinghandles = (ifh, dfh)
                     try:
                         deltacomputer = deltautil.deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 self._nodeduplicatecallback(transaction, node)
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise error.LookupError(p, self.indexfile,
                                                             _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise error.LookupError(deltabase, self.indexfile,
                                                         _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                                 self._writinghandles = (ifh, dfh)
                     finally:
                         self._writinghandles = None
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(minlink, len(self) - 1,
                                                         self.headrevs(),
                                                         self.linkrev, self.parentrevs)
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
                                   assumehaveparentrevisions=False,
                                   deltamode=repository.CG_DELTAMODE_STD):
                     if nodesorder not in ('nodes', 'storage', 'linear', None):
                         raise error.ProgrammingError('unhandled value for nodesorder: %s' %
                                                      nodesorder)
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = 'storage'
                     if (not self._storedeltachains and
                             deltamode != repository.CG_DELTAMODE_PREV):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self, nodes, nodesorder, revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions)
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = deltautil.deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if (self.version & 0xFFFF) == REVLOGV0:
                         raise error.RevlogError(_('cannot censor with version %d revlogs') %
                                                 self.version)
                     censorrev = self.rev(censornode)
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     if len(tombstone) > self.rawsize(censorrev):
                         raise error.Abort(_('censor tombstone must be no longer than '
                                             'censored data'))
                     # Rewriting the revlog in place is hard. Our strategy for censoring is
                     # to create a new revlog, copy all revisions to it, then replace the
                     # revlogs on transaction close.
                     newindexfile = self.indexfile + b'.tmpcensored'
                     newdatafile = self.datafile + b'.tmpcensored'
                     # This is a bit dangerous. We could easily have a mismatch of state.
                     newrl = revlog(self.opener, newindexfile, newdatafile,
                                    censorable=True)
                     newrl.version = self.version
                     newrl._generaldelta = self._generaldelta
                     newrl._io = self._io
                     for rev in self.revs():
                         node = self.node(rev)
                         p1, p2 = self.parents(node)
                         if rev == censorrev:
                             newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
                                                  p1, p2, censornode, REVIDX_ISCENSORED)
                             if newrl.deltaparent(rev) != nullrev:
                                 raise error.Abort(_('censored revision stored as delta; '
                                                     'cannot censor'),
                                                   hint=_('censoring of revlogs is not '
                                                          'fully implemented; please report '
                                                          'this bug'))
                             continue
                         if self.iscensored(rev):
                             if self.deltaparent(rev) != nullrev:
                                 raise error.Abort(_('cannot censor due to censored '
                                                     'revision having delta stored'))
                             rawtext = self._chunk(rev)
                         else:
                             rawtext = self.revision(rev, raw=True)
                         newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
                                              self.flags(rev))
                     tr.addbackup(self.indexfile, location='store')
                     if not self._inline:
                         tr.addbackup(self.datafile, location='store')
                     self.opener.rename(newrl.indexfile, self.indexfile)
                     if not self._inline:
                         self.opener.rename(newrl.datafile, self.datafile)
                     self.clearcaches()
                     self._loadindex(self.version, None)
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_('data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_('index contains %d extra bytes') % di)
                     version = self.version & 0xFFFF
                     # The verifier tells us what version revlog we should be.
                     if version != state['expectedversion']:
                         yield revlogproblem(
                             warning=_("warning: '%s' uses revlog format %d; expected %d") %
                                     (self.indexfile, version, state['expectedversion']))
                     state['skipread'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "revision(rev, raw=True)". "text"
                         # mentioned below is "revision(rev, raw=False)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see revlog.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get('skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             if skipflags:
                                 state['skipread'].add(node)
                             else:
                                 # Side-effect: read content and verify hash.
                                 self.revision(node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.revision(node, raw=True))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_('unpacked size is %d, %d expected') % (l2, l1),
                                     node=node)
                         except error.CensoredNodeError:
                             if state['erroroncensored']:
                                 yield revlogproblem(error=_('censored file data'),
                                                     node=node)
                                 state['skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_('unpacking %s: %s') % (short(node),
                                                                stringutil.forcebytestr(e)),
                                 node=node)
                             state['skipread'].add(node)
                 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                                 revisionscount=False, trackedsize=False,
                                 storedsize=False):
                     d = {}
                     if exclusivefiles:
                         d['exclusivefiles'] = [(self.opener, self.indexfile)]
                         if not self._inline:
                             d['exclusivefiles'].append((self.opener, self.datafile))
                     if sharedfiles:
                         d['sharedfiles'] = []
                     if revisionscount:
                         d['revisionscount'] = len(self)
                     if trackedsize:
                         d['trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d['storedsize'] = sum(self.opener.stat(path).st_size
                                               for path in self.files())
                     return d