upstream/mercurial-mirror Commit - r38532:6db38c9d

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

CensoredNodeError = error.CensoredNodeError

94

CensoredNodeError = error.CensoredNodeError

95

ProgrammingError = error.ProgrammingError

95

ProgrammingError = error.ProgrammingError

96

97

# Store flag processors (cf. 'addflagprocessor()' to register)

97

# Store flag processors (cf. 'addflagprocessor()' to register)

98

_flagprocessors = {

98

_flagprocessors = {

99

REVIDX_ISCENSORED: None,

99

REVIDX_ISCENSORED: None,

100

}

100

}

101

102

_mdre = re.compile('\1\n')

102

_mdre = re.compile('\1\n')

103

def parsemeta(text):

103

def parsemeta(text):

104

"""return (metadatadict, metadatasize)"""

104

"""return (metadatadict, metadatasize)"""

105

# text can be buffer, so we can't use .startswith or .index

105

# text can be buffer, so we can't use .startswith or .index

106

if text[:2] != '\1\n':

106

if text[:2] != '\1\n':

107

return None, None

107

return None, None

108

s = _mdre.search(text, 2).start()

108

s = _mdre.search(text, 2).start()

109

mtext = text[2:s]

109

mtext = text[2:s]

110

meta = {}

110

meta = {}

111

for l in mtext.splitlines():

111

for l in mtext.splitlines():

112

k, v = l.split(": ", 1)

112

k, v = l.split(": ", 1)

113

meta[k] = v

113

meta[k] = v

114

return meta, (s + 2)

114

return meta, (s + 2)

115

116

def packmeta(meta, text):

116

def packmeta(meta, text):

117

keys = sorted(meta)

117

keys = sorted(meta)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

return "\1\n%s\1\n%s" % (metatext, text)

119

return "\1\n%s\1\n%s" % (metatext, text)

120

121

def _censoredtext(text):

121

def _censoredtext(text):

122

m, offs = parsemeta(text)

122

m, offs = parsemeta(text)

123

return m and "censored" in m

123

return m and "censored" in m

124

125

def addflagprocessor(flag, processor):

125

def addflagprocessor(flag, processor):

126

"""Register a flag processor on a revision data flag.

126

"""Register a flag processor on a revision data flag.

127

128

Invariant:

128

Invariant:

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

- Only one flag processor can be registered on a specific flag.

131

- Only one flag processor can be registered on a specific flag.

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

following signatures:

133

following signatures:

134

- (read) f(self, rawtext) -> text, bool

134

- (read) f(self, rawtext) -> text, bool

135

- (write) f(self, text) -> rawtext, bool

135

- (write) f(self, text) -> rawtext, bool

136

- (raw) f(self, rawtext) -> bool

136

- (raw) f(self, rawtext) -> bool

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

directly visible to the user.

138

directly visible to the user.

139

The boolean returned by these transforms is used to determine whether

139

The boolean returned by these transforms is used to determine whether

140

the returned text can be used for hash integrity checking. For example,

140

the returned text can be used for hash integrity checking. For example,

141

if "write" returns False, then "text" is used to generate hash. If

141

if "write" returns False, then "text" is used to generate hash. If

142

"write" returns True, that basically means "rawtext" returned by "write"

142

"write" returns True, that basically means "rawtext" returned by "write"

143

should be used to generate hash. Usually, "write" and "read" return

143

should be used to generate hash. Usually, "write" and "read" return

144

different booleans. And "raw" returns a same boolean as "write".

144

different booleans. And "raw" returns a same boolean as "write".

145

146

Note: The 'raw' transform is used for changegroup generation and in some

146

Note: The 'raw' transform is used for changegroup generation and in some

147

debug commands. In this case the transform only indicates whether the

147

debug commands. In this case the transform only indicates whether the

148

contents can be used for hash integrity checks.

148

contents can be used for hash integrity checks.

149

"""

149

"""

150

if not flag & REVIDX_KNOWN_FLAGS:

150

if not flag & REVIDX_KNOWN_FLAGS:

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

raise ProgrammingError(msg)

152

raise ProgrammingError(msg)

153

if flag not in REVIDX_FLAGS_ORDER:

153

if flag not in REVIDX_FLAGS_ORDER:

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

raise ProgrammingError(msg)

155

raise ProgrammingError(msg)

156

if flag in _flagprocessors:

156

if flag in _flagprocessors:

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

raise error.Abort(msg)

158

raise error.Abort(msg)

159

_flagprocessors[flag] = processor

159

_flagprocessors[flag] = processor

160

161

def getoffset(q):

161

def getoffset(q):

162

return int(q >> 16)

162

return int(q >> 16)

163

164

def gettype(q):

164

def gettype(q):

165

return int(q & 0xFFFF)

165

return int(q & 0xFFFF)

166

167

def offset_type(offset, type):

167

def offset_type(offset, type):

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

raise ValueError('unknown revlog index flags')

169

raise ValueError('unknown revlog index flags')

170

return int(int(offset) << 16 | type)

170

return int(int(offset) << 16 | type)

171

172

_nullhash = hashlib.sha1(nullid)

172

_nullhash = hashlib.sha1(nullid)

173

174

def hash(text, p1, p2):

174

def hash(text, p1, p2):

175

"""generate a hash from the given text and its parent hashes

175

"""generate a hash from the given text and its parent hashes

176

177

This hash combines both the current file contents and its history

177

This hash combines both the current file contents and its history

178

in a manner that makes it easy to distinguish nodes with the same

178

in a manner that makes it easy to distinguish nodes with the same

179

content in the revision graph.

179

content in the revision graph.

180

"""

180

"""

181

# As of now, if one of the parent node is null, p2 is null

181

# As of now, if one of the parent node is null, p2 is null

182

if p2 == nullid:

182

if p2 == nullid:

183

# deep copy of a hash is faster than creating one

183

# deep copy of a hash is faster than creating one

184

s = _nullhash.copy()

184

s = _nullhash.copy()

185

s.update(p1)

185

s.update(p1)

186

else:

186

else:

187

# none of the parent nodes are nullid

187

# none of the parent nodes are nullid

188

if p1 < p2:

188

if p1 < p2:

189

a = p1

189

a = p1

190

b = p2

190

b = p2

191

else:

191

else:

192

a = p2

192

a = p2

193

b = p1

193

b = p1

194

s = hashlib.sha1(a)

194

s = hashlib.sha1(a)

195

s.update(b)

195

s.update(b)

196

s.update(text)

196

s.update(text)

197

return s.digest()

197

return s.digest()

198

199

def _trimchunk(revlog, revs, startidx, endidx=None):

199

def _trimchunk(revlog, revs, startidx, endidx=None):

200

"""returns revs[startidx:endidx] without empty trailing revs

200

"""returns revs[startidx:endidx] without empty trailing revs

201

"""

201

"""

202

length = revlog.length

202

length = revlog.length

203

204

if endidx is None:

204

if endidx is None:

205

endidx = len(revs)

205

endidx = len(revs)

206

207

# Trim empty revs at the end, but never the very first revision of a chain

207

# Trim empty revs at the end, but never the very first revision of a chain

208

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

208

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

209

endidx -= 1

209

endidx -= 1

210

211

return revs[startidx:endidx]

211

return revs[startidx:endidx]

212

213

def _slicechunk(revlog, revs):

213

def _slicechunk(revlog, revs):

214

"""slice revs to reduce the amount of unrelated data to be read from disk.

214

"""slice revs to reduce the amount of unrelated data to be read from disk.

215

216

``revs`` is sliced into groups that should be read in one time.

216

``revs`` is sliced into groups that should be read in one time.

217

Assume that revs are sorted.

217

Assume that revs are sorted.

218

"""

218

"""

219

start = revlog.start

219

start = revlog.start

220

length = revlog.length

220

length = revlog.length

221

222

if len(revs) <= 1:

222

if len(revs) <= 1:

223

yield revs

223

yield revs

224

return

224

return

225

226

startbyte = start(revs[0])

226

startbyte = start(revs[0])

227

endbyte = start(revs[-1]) + length(revs[-1])

227

endbyte = start(revs[-1]) + length(revs[-1])

228

readdata = deltachainspan = endbyte - startbyte

228

readdata = deltachainspan = endbyte - startbyte

229

230

chainpayload = sum(length(r) for r in revs)

230

chainpayload = sum(length(r) for r in revs)

231

232

if deltachainspan:

232

if deltachainspan:

233

density = chainpayload / float(deltachainspan)

233

density = chainpayload / float(deltachainspan)

234

else:

234

else:

235

density = 1.0

235

density = 1.0

236

237

# Store the gaps in a heap to have them sorted by decreasing size

237

# Store the gaps in a heap to have them sorted by decreasing size

238

gapsheap = []

238

gapsheap = []

239

heapq.heapify(gapsheap)

239

heapq.heapify(gapsheap)

240

prevend = None

240

prevend = None

241

for i, rev in enumerate(revs):

241

for i, rev in enumerate(revs):

242

revstart = start(rev)

242

revstart = start(rev)

243

revlen = length(rev)

243

revlen = length(rev)

244

245

# Skip empty revisions to form larger holes

245

# Skip empty revisions to form larger holes

246

if revlen == 0:

246

if revlen == 0:

247

continue

247

continue

248

249

if prevend is not None:

249

if prevend is not None:

250

gapsize = revstart - prevend

250

gapsize = revstart - prevend

251

# only consider holes that are large enough

251

# only consider holes that are large enough

252

if gapsize > revlog._srmingapsize:

252

if gapsize > revlog._srmingapsize:

253

heapq.heappush(gapsheap, (-gapsize, i))

253

heapq.heappush(gapsheap, (-gapsize, i))

254

255

prevend = revstart + revlen

255

prevend = revstart + revlen

256

257

# Collect the indices of the largest holes until the density is acceptable

257

# Collect the indices of the largest holes until the density is acceptable

258

indicesheap = []

258

indicesheap = []

259

heapq.heapify(indicesheap)

259

heapq.heapify(indicesheap)

260

while gapsheap and density < revlog._srdensitythreshold:

260

while gapsheap and density < revlog._srdensitythreshold:

261

oppgapsize, gapidx = heapq.heappop(gapsheap)

261

oppgapsize, gapidx = heapq.heappop(gapsheap)

262

263

heapq.heappush(indicesheap, gapidx)

263

heapq.heappush(indicesheap, gapidx)

264

265

# the gap sizes are stored as negatives to be sorted decreasingly

265

# the gap sizes are stored as negatives to be sorted decreasingly

266

# by the heap

266

# by the heap

267

readdata -= (-oppgapsize)

267

readdata -= (-oppgapsize)

268

if readdata > 0:

268

if readdata > 0:

269

density = chainpayload / float(readdata)

269

density = chainpayload / float(readdata)

270

else:

270

else:

271

density = 1.0

271

density = 1.0

272

273

# Cut the revs at collected indices

273

# Cut the revs at collected indices

274

previdx = 0

274

previdx = 0

275

while indicesheap:

275

while indicesheap:

276

idx = heapq.heappop(indicesheap)

276

idx = heapq.heappop(indicesheap)

277

278

chunk = _trimchunk(revlog, revs, previdx, idx)

278

chunk = _trimchunk(revlog, revs, previdx, idx)

279

if chunk:

279

if chunk:

280

yield chunk

280

yield chunk

281

282

previdx = idx

282

previdx = idx

283

284

chunk = _trimchunk(revlog, revs, previdx)

284

chunk = _trimchunk(revlog, revs, previdx)

285

if chunk:

285

if chunk:

286

yield chunk

286

yield chunk

287

288

@attr.s(slots=True, frozen=True)

288

@attr.s(slots=True, frozen=True)

289

class _deltainfo(object):

289

class _deltainfo(object):

290

distance = attr.ib()

290

distance = attr.ib()

291

deltalen = attr.ib()

291

deltalen = attr.ib()

292

data = attr.ib()

292

data = attr.ib()

293

base = attr.ib()

293

base = attr.ib()

294

chainbase = attr.ib()

294

chainbase = attr.ib()

295

chainlen = attr.ib()

295

chainlen = attr.ib()

296

compresseddeltalen = attr.ib()

296

compresseddeltalen = attr.ib()

297

298

class _deltacomputer(object):

298

class _deltacomputer(object):

299

def __init__(self, revlog):

299

def __init__(self, revlog):

300

self.revlog = revlog

300

self.revlog = revlog

301

302

def _getcandidaterevs(self, p1, p2, cachedelta):

302

def _getcandidaterevs(self, p1, p2, cachedelta):

303

"""

303

"""

304

Provides revisions that present an interest to be diffed against,

304

Provides revisions that present an interest to be diffed against,

305

grouped by level of easiness.

305

grouped by level of easiness.

306

"""

306

"""

307

revlog = self.revlog

307

revlog = self.revlog

308

gdelta = revlog._generaldelta

308

gdelta = revlog._generaldelta

309

curr = len(revlog)

309

curr = len(revlog)

310

prev = curr - 1

310

prev = curr - 1

311

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

311

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

312

313

# should we try to build a delta?

313

# should we try to build a delta?

314

if prev != nullrev and revlog.storedeltachains:

314

if prev != nullrev and revlog.storedeltachains:

315

tested = set()

315

tested = set()

316

# This condition is true most of the time when processing

316

# This condition is true most of the time when processing

317

# changegroup data into a generaldelta repo. The only time it

317

# changegroup data into a generaldelta repo. The only time it

318

# isn't true is if this is the first revision in a delta chain

318

# isn't true is if this is the first revision in a delta chain

319

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

319

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

320

if cachedelta and gdelta and revlog._lazydeltabase:

320

if cachedelta and gdelta and revlog._lazydeltabase:

321

# Assume what we received from the server is a good choice

321

# Assume what we received from the server is a good choice

322

# build delta will reuse the cache

322

# build delta will reuse the cache

323

yield (cachedelta[0],)

323

yield (cachedelta[0],)

324

tested.add(cachedelta[0])

324

tested.add(cachedelta[0])

325

326

if gdelta:

326

if gdelta:

327

# exclude already lazy tested base if any

327

# exclude already lazy tested base if any

328

parents = [p for p in (p1r, p2r)

328

parents = [p for p in (p1r, p2r)

329

if p != nullrev and p not in tested]

329

if p != nullrev and p not in tested]

330

331

if not revlog._aggressivemergedeltas and len(parents) == 2:

331

if not revlog._aggressivemergedeltas and len(parents) == 2:

332

parents.sort()

332

parents.sort()

333

# To minimize the chance of having to build a fulltext,

333

# To minimize the chance of having to build a fulltext,

334

# pick first whichever parent is closest to us (max rev)

334

# pick first whichever parent is closest to us (max rev)

335

yield (parents[1],)

335

yield (parents[1],)

336

# then the other one (min rev) if the first did not fit

336

# then the other one (min rev) if the first did not fit

337

yield (parents[0],)

337

yield (parents[0],)

338

tested.update(parents)

338

tested.update(parents)

339

elif len(parents) > 0:

339

elif len(parents) > 0:

340

# Test all parents (1 or 2), and keep the best candidate

340

# Test all parents (1 or 2), and keep the best candidate

341

yield parents

341

yield parents

342

tested.update(parents)

342

tested.update(parents)

343

344

if prev not in tested:

344

if prev not in tested:

345

# other approach failed try against prev to hopefully save us a

345

# other approach failed try against prev to hopefully save us a

346

# fulltext.

346

# fulltext.

347

yield (prev,)

347

yield (prev,)

348

tested.add(prev)

348

tested.add(prev)

349

350

def buildtext(self, revinfo, fh):

350

def buildtext(self, revinfo, fh):

351

"""Builds a fulltext version of a revision

351

"""Builds a fulltext version of a revision

352

353

revinfo: _revisioninfo instance that contains all needed info

353

revinfo: _revisioninfo instance that contains all needed info

354

fh: file handle to either the .i or the .d revlog file,

354

fh: file handle to either the .i or the .d revlog file,

355

depending on whether it is inlined or not

355

depending on whether it is inlined or not

356

"""

356

"""

357

btext = revinfo.btext

357

btext = revinfo.btext

358

if btext[0] is not None:

358

if btext[0] is not None:

359

return btext[0]

359

return btext[0]

360

361

revlog = self.revlog

361

revlog = self.revlog

362

cachedelta = revinfo.cachedelta

362

cachedelta = revinfo.cachedelta

363

flags = revinfo.flags

363

flags = revinfo.flags

364

node = revinfo.node

364

node = revinfo.node

365

366

baserev = cachedelta[0]

366

baserev = cachedelta[0]

367

delta = cachedelta[1]

367

delta = cachedelta[1]

368

# special case deltas which replace entire base; no need to decode

368

# special case deltas which replace entire base; no need to decode

369

# base revision. this neatly avoids censored bases, which throw when

369

# base revision. this neatly avoids censored bases, which throw when

370

# they're decoded.

370

# they're decoded.

371

hlen = struct.calcsize(">lll")

371

hlen = struct.calcsize(">lll")

372

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

372

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

373

len(delta) - hlen):

373

len(delta) - hlen):

374

btext[0] = delta[hlen:]

374

btext[0] = delta[hlen:]

375

else:

375

else:

376

# deltabase is rawtext before changed by flag processors, which is

376

# deltabase is rawtext before changed by flag processors, which is

377

# equivalent to non-raw text

377

# equivalent to non-raw text

378

basetext = revlog.revision(baserev, _df=fh, raw=False)

378

basetext = revlog.revision(baserev, _df=fh, raw=False)

379

btext[0] = mdiff.patch(basetext, delta)

379

btext[0] = mdiff.patch(basetext, delta)

380

381

try:

381

try:

382

res = revlog._processflags(btext[0], flags, 'read', raw=True)

382

res = revlog._processflags(btext[0], flags, 'read', raw=True)

383

btext[0], validatehash = res

383

btext[0], validatehash = res

384

if validatehash:

384

if validatehash:

385

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

385

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

386

if flags & REVIDX_ISCENSORED:

386

if flags & REVIDX_ISCENSORED:

387

raise RevlogError(_('node %s is not censored') % node)

387

raise RevlogError(_('node %s is not censored') % node)

388

except CensoredNodeError:

388

except CensoredNodeError:

389

# must pass the censored index flag to add censored revisions

389

# must pass the censored index flag to add censored revisions

390

if not flags & REVIDX_ISCENSORED:

390

if not flags & REVIDX_ISCENSORED:

391

raise

391

raise

392

return btext[0]

392

return btext[0]

393

394

def _builddeltadiff(self, base, revinfo, fh):

394

def _builddeltadiff(self, base, revinfo, fh):

395

revlog = self.revlog

395

revlog = self.revlog

396

t = self.buildtext(revinfo, fh)

396

t = self.buildtext(revinfo, fh)

397

if revlog.iscensored(base):

397

if revlog.iscensored(base):

398

# deltas based on a censored revision must replace the

398

# deltas based on a censored revision must replace the

399

# full content in one patch, so delta works everywhere

399

# full content in one patch, so delta works everywhere

400

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

400

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

401

delta = header + t

401

delta = header + t

402

else:

402

else:

403

ptext = revlog.revision(base, _df=fh, raw=True)

403

ptext = revlog.revision(base, _df=fh, raw=True)

404

delta = mdiff.textdiff(ptext, t)

404

delta = mdiff.textdiff(ptext, t)

405

406

return delta

406

return delta

407

408

def _builddeltainfo(self, revinfo, base, fh):

408

def _builddeltainfo(self, revinfo, base, fh):

409

# can we use the cached delta?

409

# can we use the cached delta?

410

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

410

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

411

delta = revinfo.cachedelta[1]

411

delta = revinfo.cachedelta[1]

412

else:

412

else:

413

delta = self._builddeltadiff(base, revinfo, fh)

413

delta = self._builddeltadiff(base, revinfo, fh)

414

revlog = self.revlog

414

revlog = self.revlog

415

header, data = revlog.compress(delta)

415

header, data = revlog.compress(delta)

416

deltalen = len(header) + len(data)

416

deltalen = len(header) + len(data)

417

chainbase = revlog.chainbase(base)

417

chainbase = revlog.chainbase(base)

418

offset = revlog.end(len(revlog) - 1)

418

offset = revlog.end(len(revlog) - 1)

419

dist = deltalen + offset - revlog.start(chainbase)

419

dist = deltalen + offset - revlog.start(chainbase)

420

if revlog._generaldelta:

420

if revlog._generaldelta:

421

deltabase = base

421

deltabase = base

422

else:

422

else:

423

deltabase = chainbase

423

deltabase = chainbase

424

chainlen, compresseddeltalen = revlog._chaininfo(base)

424

chainlen, compresseddeltalen = revlog._chaininfo(base)

425

chainlen += 1

425

chainlen += 1

426

compresseddeltalen += deltalen

426

compresseddeltalen += deltalen

427

return _deltainfo(dist, deltalen, (header, data), deltabase,

427

return _deltainfo(dist, deltalen, (header, data), deltabase,

428

chainbase, chainlen, compresseddeltalen)

428

chainbase, chainlen, compresseddeltalen)

429

430

def finddeltainfo(self, revinfo, fh):

430

def finddeltainfo(self, revinfo, fh):

431

"""Find an acceptable delta against a candidate revision

431

"""Find an acceptable delta against a candidate revision

432

433

revinfo: information about the revision (instance of _revisioninfo)

433

revinfo: information about the revision (instance of _revisioninfo)

434

fh: file handle to either the .i or the .d revlog file,

434

fh: file handle to either the .i or the .d revlog file,

435

depending on whether it is inlined or not

435

depending on whether it is inlined or not

436

437

Returns the first acceptable candidate revision, as ordered by

437

Returns the first acceptable candidate revision, as ordered by

438

_getcandidaterevs

438

_getcandidaterevs

439

"""

439

"""

440

cachedelta = revinfo.cachedelta

440

cachedelta = revinfo.cachedelta

441

p1 = revinfo.p1

441

p1 = revinfo.p1

442

p2 = revinfo.p2

442

p2 = revinfo.p2

443

revlog = self.revlog

443

revlog = self.revlog

444

445

deltainfo = None

445

deltainfo = None

446

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

446

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

447

nominateddeltas = []

447

nominateddeltas = []

448

for candidaterev in candidaterevs:

448

for candidaterev in candidaterevs:

449

# no delta for rawtext-changing revs (see "candelta" for why)

449

# no delta for rawtext-changing revs (see "candelta" for why)

450

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

450

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

451

continue

451

continue

452

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

452

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

453

if revlog._isgooddeltainfo(candidatedelta, revinfo):

453

if revlog._isgooddeltainfo(candidatedelta, revinfo):

454

nominateddeltas.append(candidatedelta)

454

nominateddeltas.append(candidatedelta)

455

if nominateddeltas:

455

if nominateddeltas:

456

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

456

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

457

break

457

break

458

459

return deltainfo

459

return deltainfo

460

461

@attr.s(slots=True, frozen=True)

461

@attr.s(slots=True, frozen=True)

462

class _revisioninfo(object):

462

class _revisioninfo(object):

463

"""Information about a revision that allows building its fulltext

463

"""Information about a revision that allows building its fulltext

464

node: expected hash of the revision

464

node: expected hash of the revision

465

p1, p2: parent revs of the revision

465

p1, p2: parent revs of the revision

466

btext: built text cache consisting of a one-element list

466

btext: built text cache consisting of a one-element list

467

cachedelta: (baserev, uncompressed_delta) or None

467

cachedelta: (baserev, uncompressed_delta) or None

468

flags: flags associated to the revision storage

468

flags: flags associated to the revision storage

469

470

One of btext[0] or cachedelta must be set.

470

One of btext[0] or cachedelta must be set.

471

"""

471

"""

472

node = attr.ib()

472

node = attr.ib()

473

p1 = attr.ib()

473

p1 = attr.ib()

474

p2 = attr.ib()

474

p2 = attr.ib()

475

btext = attr.ib()

475

btext = attr.ib()

476

textlen = attr.ib()

476

textlen = attr.ib()

477

cachedelta = attr.ib()

477

cachedelta = attr.ib()

478

flags = attr.ib()

478

flags = attr.ib()

479

480

# index v0:

480

# index v0:

481

# 4 bytes: offset

481

# 4 bytes: offset

482

# 4 bytes: compressed length

482

# 4 bytes: compressed length

483

# 4 bytes: base rev

483

# 4 bytes: base rev

484

# 4 bytes: link rev

484

# 4 bytes: link rev

485

# 20 bytes: parent 1 nodeid

485

# 20 bytes: parent 1 nodeid

486

# 20 bytes: parent 2 nodeid

486

# 20 bytes: parent 2 nodeid

487

# 20 bytes: nodeid

487

# 20 bytes: nodeid

488

indexformatv0 = struct.Struct(">4l20s20s20s")

488

indexformatv0 = struct.Struct(">4l20s20s20s")

489

indexformatv0_pack = indexformatv0.pack

489

indexformatv0_pack = indexformatv0.pack

490

indexformatv0_unpack = indexformatv0.unpack

490

indexformatv0_unpack = indexformatv0.unpack

491

492

class revlogoldio(object):

492

class revlogoldio(object):

493

def __init__(self):

493

def __init__(self):

494

self.size = indexformatv0.size

494

self.size = indexformatv0.size

495

496

def parseindex(self, data, inline):

496

def parseindex(self, data, inline):

497

s = self.size

497

s = self.size

498

index = []

498

index = []

499

nodemap = {nullid: nullrev}

499

nodemap = {nullid: nullrev}

500

n = off = 0

500

n = off = 0

501

l = len(data)

501

l = len(data)

502

while off + s <= l:

502

while off + s <= l:

503

cur = data[off:off + s]

503

cur = data[off:off + s]

504

off += s

504

off += s

505

e = indexformatv0_unpack(cur)

505

e = indexformatv0_unpack(cur)

506

# transform to revlogv1 format

506

# transform to revlogv1 format

507

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

507

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

508

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

508

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

509

index.append(e2)

509

index.append(e2)

510

nodemap[e[6]] = n

510

nodemap[e[6]] = n

511

n += 1

511

n += 1

512

513

# add the magic null revision at -1

513

# add the magic null revision at -1

514

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

514

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

515

516

return index, nodemap, None

516

return index, nodemap, None

517

518

def packentry(self, entry, node, version, rev):

518

def packentry(self, entry, node, version, rev):

519

if gettype(entry[0]):

519

if gettype(entry[0]):

520

raise RevlogError(_('index entry flags need revlog version 1'))

520

raise RevlogError(_('index entry flags need revlog version 1'))

521

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

521

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

522

node(entry[5]), node(entry[6]), entry[7])

522

node(entry[5]), node(entry[6]), entry[7])

523

return indexformatv0_pack(*e2)

523

return indexformatv0_pack(*e2)

524

525

# index ng:

525

# index ng:

526

# 6 bytes: offset

526

# 6 bytes: offset

527

# 2 bytes: flags

527

# 2 bytes: flags

528

# 4 bytes: compressed length

528

# 4 bytes: compressed length

529

# 4 bytes: uncompressed length

529

# 4 bytes: uncompressed length

530

# 4 bytes: base rev

530

# 4 bytes: base rev

531

# 4 bytes: link rev

531

# 4 bytes: link rev

532

# 4 bytes: parent 1 rev

532

# 4 bytes: parent 1 rev

533

# 4 bytes: parent 2 rev

533

# 4 bytes: parent 2 rev

534

# 32 bytes: nodeid

534

# 32 bytes: nodeid

535

indexformatng = struct.Struct(">Qiiiiii20s12x")

535

indexformatng = struct.Struct(">Qiiiiii20s12x")

536

indexformatng_pack = indexformatng.pack

536

indexformatng_pack = indexformatng.pack

537

versionformat = struct.Struct(">I")

537

versionformat = struct.Struct(">I")

538

versionformat_pack = versionformat.pack

538

versionformat_pack = versionformat.pack

539

versionformat_unpack = versionformat.unpack

539

versionformat_unpack = versionformat.unpack

540

541

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

541

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

542

# signed integer)

542

# signed integer)

543

_maxentrysize = 0x7fffffff

543

_maxentrysize = 0x7fffffff

544

545

class revlogio(object):

545

class revlogio(object):

546

def __init__(self):

546

def __init__(self):

547

self.size = indexformatng.size

547

self.size = indexformatng.size

548

549

def parseindex(self, data, inline):

549

def parseindex(self, data, inline):

550

# call the C implementation to parse the index data

550

# call the C implementation to parse the index data

551

index, cache = parsers.parse_index2(data, inline)

551

index, cache = parsers.parse_index2(data, inline)

552

return index, getattr(index, 'nodemap', None), cache

552

return index, getattr(index, 'nodemap', None), cache

553

554

def packentry(self, entry, node, version, rev):

554

def packentry(self, entry, node, version, rev):

555

p = indexformatng_pack(*entry)

555

p = indexformatng_pack(*entry)

556

if rev == 0:

556

if rev == 0:

557

p = versionformat_pack(version) + p[4:]

557

p = versionformat_pack(version) + p[4:]

558

return p

558

return p

559

560

class revlog(object):

560

class revlog(object):

561

"""

561

"""

562

the underlying revision storage object

562

the underlying revision storage object

563

564

A revlog consists of two parts, an index and the revision data.

564

A revlog consists of two parts, an index and the revision data.

565

566

The index is a file with a fixed record size containing

566

The index is a file with a fixed record size containing

567

information on each revision, including its nodeid (hash), the

567

information on each revision, including its nodeid (hash), the

568

nodeids of its parents, the position and offset of its data within

568

nodeids of its parents, the position and offset of its data within

569

the data file, and the revision it's based on. Finally, each entry

569

the data file, and the revision it's based on. Finally, each entry

570

contains a linkrev entry that can serve as a pointer to external

570

contains a linkrev entry that can serve as a pointer to external

571

data.

571

data.

572

573

The revision data itself is a linear collection of data chunks.

573

The revision data itself is a linear collection of data chunks.

574

Each chunk represents a revision and is usually represented as a

574

Each chunk represents a revision and is usually represented as a

575

delta against the previous chunk. To bound lookup time, runs of

575

delta against the previous chunk. To bound lookup time, runs of

576

deltas are limited to about 2 times the length of the original

576

deltas are limited to about 2 times the length of the original

577

version data. This makes retrieval of a version proportional to

577

version data. This makes retrieval of a version proportional to

578

its size, or O(1) relative to the number of revisions.

578

its size, or O(1) relative to the number of revisions.

579

580

Both pieces of the revlog are written to in an append-only

580

Both pieces of the revlog are written to in an append-only

581

fashion, which means we never need to rewrite a file to insert or

581

fashion, which means we never need to rewrite a file to insert or

582

remove data, and can use some simple techniques to avoid the need

582

remove data, and can use some simple techniques to avoid the need

583

for locking while reading.

583

for locking while reading.

584

585

If checkambig, indexfile is opened with checkambig=True at

585

If checkambig, indexfile is opened with checkambig=True at

586

writing, to avoid file stat ambiguity.

586

writing, to avoid file stat ambiguity.

587

588

If mmaplargeindex is True, and an mmapindexthreshold is set, the

588

If mmaplargeindex is True, and an mmapindexthreshold is set, the

589

index will be mmapped rather than read if it is larger than the

589

index will be mmapped rather than read if it is larger than the

590

configured threshold.

590

configured threshold.

591

592

If censorable is True, the revlog can have censored revisions.

592

If censorable is True, the revlog can have censored revisions.

593

"""

593

"""

594

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

594

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

595

mmaplargeindex=False, censorable=False):

595

mmaplargeindex=False, censorable=False):

596

"""

596

"""

597

create a revlog object

597

create a revlog object

598

599

opener is a function that abstracts the file opening operation

599

opener is a function that abstracts the file opening operation

600

and can be used to implement COW semantics or the like.

600

and can be used to implement COW semantics or the like.

601

"""

601

"""

602

self.indexfile = indexfile

602

self.indexfile = indexfile

603

self.datafile = datafile or (indexfile[:-2] + ".d")

603

self.datafile = datafile or (indexfile[:-2] + ".d")

604

self.opener = opener

604

self.opener = opener

605

# When True, indexfile is opened with checkambig=True at writing, to

605

# When True, indexfile is opened with checkambig=True at writing, to

606

# avoid file stat ambiguity.

606

# avoid file stat ambiguity.

607

self._checkambig = checkambig

607

self._checkambig = checkambig

608

self._censorable = censorable

608

self._censorable = censorable

609

# 3-tuple of (node, rev, text) for a raw revision.

609

# 3-tuple of (node, rev, text) for a raw revision.

610

self._cache = None

610

self._cache = None

611

# Maps rev to chain base rev.

611

# Maps rev to chain base rev.

612

self._chainbasecache = util.lrucachedict(100)

612

self._chainbasecache = util.lrucachedict(100)

613

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

613

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

614

self._chunkcache = (0, '')

614

self._chunkcache = (0, '')

615

# How much data to read and cache into the raw revlog data cache.

615

# How much data to read and cache into the raw revlog data cache.

616

self._chunkcachesize = 65536

616

self._chunkcachesize = 65536

617

self._maxchainlen = None

617

self._maxchainlen = None

618

self._aggressivemergedeltas = False

618

self._aggressivemergedeltas = False

619

self.index = []

619

self.index = []

620

# Mapping of partial identifiers to full nodes.

620

# Mapping of partial identifiers to full nodes.

621

self._pcache = {}

621

self._pcache = {}

622

# Mapping of revision integer to full node.

622

# Mapping of revision integer to full node.

623

self._nodecache = {nullid: nullrev}

623

self._nodecache = {nullid: nullrev}

624

self._nodepos = None

624

self._nodepos = None

625

self._compengine = 'zlib'

625

self._compengine = 'zlib'

626

self._maxdeltachainspan = -1

626

self._maxdeltachainspan = -1

627

self._withsparseread = False

627

self._withsparseread = False

628

self._srdensitythreshold = 0.25

628

self._srdensitythreshold = 0.25

629

self._srmingapsize = 262144

629

self._srmingapsize = 262144

630

631

mmapindexthreshold = None

631

mmapindexthreshold = None

632

v = REVLOG_DEFAULT_VERSION

632

v = REVLOG_DEFAULT_VERSION

633

opts = getattr(opener, 'options', None)

633

opts = getattr(opener, 'options', None)

634

if opts is not None:

634

if opts is not None:

635

if 'revlogv2' in opts:

635

if 'revlogv2' in opts:

636

# version 2 revlogs always use generaldelta.

636

# version 2 revlogs always use generaldelta.

637

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

637

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

638

elif 'revlogv1' in opts:

638

elif 'revlogv1' in opts:

639

if 'generaldelta' in opts:

639

if 'generaldelta' in opts:

640

v |= FLAG_GENERALDELTA

640

v |= FLAG_GENERALDELTA

641

else:

641

else:

642

v = 0

642

v = 0

643

if 'chunkcachesize' in opts:

643

if 'chunkcachesize' in opts:

644

self._chunkcachesize = opts['chunkcachesize']

644

self._chunkcachesize = opts['chunkcachesize']

645

if 'maxchainlen' in opts:

645

if 'maxchainlen' in opts:

646

self._maxchainlen = opts['maxchainlen']

646

self._maxchainlen = opts['maxchainlen']

647

if 'aggressivemergedeltas' in opts:

647

if 'aggressivemergedeltas' in opts:

648

self._aggressivemergedeltas = opts['aggressivemergedeltas']

648

self._aggressivemergedeltas = opts['aggressivemergedeltas']

649

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

649

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

650

if 'compengine' in opts:

650

if 'compengine' in opts:

651

self._compengine = opts['compengine']

651

self._compengine = opts['compengine']

652

if 'maxdeltachainspan' in opts:

652

if 'maxdeltachainspan' in opts:

653

self._maxdeltachainspan = opts['maxdeltachainspan']

653

self._maxdeltachainspan = opts['maxdeltachainspan']

654

if mmaplargeindex and 'mmapindexthreshold' in opts:

654

if mmaplargeindex and 'mmapindexthreshold' in opts:

655

mmapindexthreshold = opts['mmapindexthreshold']

655

mmapindexthreshold = opts['mmapindexthreshold']

656

self._withsparseread = bool(opts.get('with-sparse-read', False))

656

self._withsparseread = bool(opts.get('with-sparse-read', False))

657

if 'sparse-read-density-threshold' in opts:

657

if 'sparse-read-density-threshold' in opts:

658

self._srdensitythreshold = opts['sparse-read-density-threshold']

658

self._srdensitythreshold = opts['sparse-read-density-threshold']

659

if 'sparse-read-min-gap-size' in opts:

659

if 'sparse-read-min-gap-size' in opts:

660

self._srmingapsize = opts['sparse-read-min-gap-size']

660

self._srmingapsize = opts['sparse-read-min-gap-size']

661

662

if self._chunkcachesize <= 0:

662

if self._chunkcachesize <= 0:

663

raise RevlogError(_('revlog chunk cache size %r is not greater '

663

raise RevlogError(_('revlog chunk cache size %r is not greater '

664

'than 0') % self._chunkcachesize)

664

'than 0') % self._chunkcachesize)

665

elif self._chunkcachesize & (self._chunkcachesize - 1):

665

elif self._chunkcachesize & (self._chunkcachesize - 1):

666

raise RevlogError(_('revlog chunk cache size %r is not a power '

666

raise RevlogError(_('revlog chunk cache size %r is not a power '

667

'of 2') % self._chunkcachesize)

667

'of 2') % self._chunkcachesize)

668

669

indexdata = ''

669

indexdata = ''

670

self._initempty = True

670

self._initempty = True

671

try:

671

try:

672

with self._indexfp() as f:

672

with self._indexfp() as f:

673

if (mmapindexthreshold is not None and

673

if (mmapindexthreshold is not None and

674

self.opener.fstat(f).st_size >= mmapindexthreshold):

674

self.opener.fstat(f).st_size >= mmapindexthreshold):

675

indexdata = util.buffer(util.mmapread(f))

675

indexdata = util.buffer(util.mmapread(f))

676

else:

676

else:

677

indexdata = f.read()

677

indexdata = f.read()

678

if len(indexdata) > 0:

678

if len(indexdata) > 0:

679

v = versionformat_unpack(indexdata[:4])[0]

679

v = versionformat_unpack(indexdata[:4])[0]

680

self._initempty = False

680

self._initempty = False

681

except IOError as inst:

681

except IOError as inst:

682

if inst.errno != errno.ENOENT:

682

if inst.errno != errno.ENOENT:

683

raise

683

raise

684

685

self.version = v

685

self.version = v

686

self._inline = v & FLAG_INLINE_DATA

686

self._inline = v & FLAG_INLINE_DATA

687

self._generaldelta = v & FLAG_GENERALDELTA

687

self._generaldelta = v & FLAG_GENERALDELTA

688

flags = v & ~0xFFFF

688

flags = v & ~0xFFFF

689

fmt = v & 0xFFFF

689

fmt = v & 0xFFFF

690

if fmt == REVLOGV0:

690

if fmt == REVLOGV0:

691

if flags:

691

if flags:

692

raise RevlogError(_('unknown flags (%#04x) in version %d '

692

raise RevlogError(_('unknown flags (%#04x) in version %d '

693

'revlog %s') %

693

'revlog %s') %

694

(flags >> 16, fmt, self.indexfile))

694

(flags >> 16, fmt, self.indexfile))

695

elif fmt == REVLOGV1:

695

elif fmt == REVLOGV1:

696

if flags & ~REVLOGV1_FLAGS:

696

if flags & ~REVLOGV1_FLAGS:

697

raise RevlogError(_('unknown flags (%#04x) in version %d '

697

raise RevlogError(_('unknown flags (%#04x) in version %d '

698

'revlog %s') %

698

'revlog %s') %

699

(flags >> 16, fmt, self.indexfile))

699

(flags >> 16, fmt, self.indexfile))

700

elif fmt == REVLOGV2:

700

elif fmt == REVLOGV2:

701

if flags & ~REVLOGV2_FLAGS:

701

if flags & ~REVLOGV2_FLAGS:

702

raise RevlogError(_('unknown flags (%#04x) in version %d '

702

raise RevlogError(_('unknown flags (%#04x) in version %d '

703

'revlog %s') %

703

'revlog %s') %

704

(flags >> 16, fmt, self.indexfile))

704

(flags >> 16, fmt, self.indexfile))

705

else:

705

else:

706

raise RevlogError(_('unknown version (%d) in revlog %s') %

706

raise RevlogError(_('unknown version (%d) in revlog %s') %

707

(fmt, self.indexfile))

707

(fmt, self.indexfile))

708

709

self.storedeltachains = True

709

self.storedeltachains = True

710

711

self._io = revlogio()

711

self._io = revlogio()

712

if self.version == REVLOGV0:

712

if self.version == REVLOGV0:

713

self._io = revlogoldio()

713

self._io = revlogoldio()

714

try:

714

try:

715

d = self._io.parseindex(indexdata, self._inline)

715

d = self._io.parseindex(indexdata, self._inline)

716

except (ValueError, IndexError):

716

except (ValueError, IndexError):

717

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

717

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

718

self.index, nodemap, self._chunkcache = d

718

self.index, nodemap, self._chunkcache = d

719

if nodemap is not None:

719

if nodemap is not None:

720

self.nodemap = self._nodecache = nodemap

720

self.nodemap = self._nodecache = nodemap

721

if not self._chunkcache:

721

if not self._chunkcache:

722

self._chunkclear()

722

self._chunkclear()

723

# revnum -> (chain-length, sum-delta-length)

723

# revnum -> (chain-length, sum-delta-length)

724

self._chaininfocache = {}

724

self._chaininfocache = {}

725

# revlog header -> revlog compressor

725

# revlog header -> revlog compressor

726

self._decompressors = {}

726

self._decompressors = {}

727

728

@util.propertycache

728

@util.propertycache

729

def _compressor(self):

729

def _compressor(self):

730

return util.compengines[self._compengine].revlogcompressor()

730

return util.compengines[self._compengine].revlogcompressor()

731

732

def _indexfp(self, mode='r'):

732

def _indexfp(self, mode='r'):

733

"""file object for the revlog's index file"""

733

"""file object for the revlog's index file"""

734

args = {r'mode': mode}

734

args = {r'mode': mode}

735

if mode != 'r':

735

if mode != 'r':

736

args[r'checkambig'] = self._checkambig

736

args[r'checkambig'] = self._checkambig

737

if mode == 'w':

737

if mode == 'w':

738

args[r'atomictemp'] = True

738

args[r'atomictemp'] = True

739

return self.opener(self.indexfile, **args)

739

return self.opener(self.indexfile, **args)

740

741

def _datafp(self, mode='r'):

741

def _datafp(self, mode='r'):

742

"""file object for the revlog's data file"""

742

"""file object for the revlog's data file"""

743

return self.opener(self.datafile, mode=mode)

743

return self.opener(self.datafile, mode=mode)

744

745

@contextlib.contextmanager

745

@contextlib.contextmanager

746

def _datareadfp(self, existingfp=None):

746

def _datareadfp(self, existingfp=None):

747

"""file object suitable to read data"""

747

"""file object suitable to read data"""

748

if existingfp is not None:

748

if existingfp is not None:

749

yield existingfp

749

yield existingfp

750

else:

750

else:

751

if self._inline:

751

if self._inline:

752

func = self._indexfp

752

func = self._indexfp

753

else:

753

else:

754

func = self._datafp

754

func = self._datafp

755

with func() as fp:

755

with func() as fp:

756

yield fp

756

yield fp

757

758

def tip(self):

758

def tip(self):

759

return self.node(len(self.index) - 2)

759

return self.node(len(self.index) - 2)

760

def __contains__(self, rev):

760

def __contains__(self, rev):

761

return 0 <= rev < len(self)

761

return 0 <= rev < len(self)

762

def __len__(self):

762

def __len__(self):

763

return len(self.index) - 1

763

return len(self.index) - 1

764

def __iter__(self):

764

def __iter__(self):

765

return iter(xrange(len(self)))

765

return iter(xrange(len(self)))

766

def revs(self, start=0, stop=None):

766

def revs(self, start=0, stop=None):

767

"""iterate over all rev in this revlog (from start to stop)"""

767

"""iterate over all rev in this revlog (from start to stop)"""

768

step = 1

768

step = 1

769

if stop is not None:

769

if stop is not None:

770

if start > stop:

770

if start > stop:

771

step = -1

771

step = -1

772

stop += step

772

stop += step

773

else:

773

else:

774

stop = len(self)

774

stop = len(self)

775

return xrange(start, stop, step)

775

return xrange(start, stop, step)

776

777

@util.propertycache

777

@util.propertycache

778

def nodemap(self):

778

def nodemap(self):

779

self.rev(self.node(0))

779

self.rev(self.node(0))

780

return self._nodecache

780

return self._nodecache

781

782

def hasnode(self, node):

782

def hasnode(self, node):

783

try:

783

try:

784

self.rev(node)

784

self.rev(node)

785

return True

785

return True

786

except KeyError:

786

except KeyError:

787

return False

787

return False

788

789

def candelta(self, baserev, rev):

789

def candelta(self, baserev, rev):

790

"""whether two revisions (baserev, rev) can be delta-ed or not"""

790

"""whether two revisions (baserev, rev) can be delta-ed or not"""

791

# Disable delta if either rev requires a content-changing flag

791

# Disable delta if either rev requires a content-changing flag

792

# processor (ex. LFS). This is because such flag processor can alter

792

# processor (ex. LFS). This is because such flag processor can alter

793

# the rawtext content that the delta will be based on, and two clients

793

# the rawtext content that the delta will be based on, and two clients

794

# could have a same revlog node with different flags (i.e. different

794

# could have a same revlog node with different flags (i.e. different

795

# rawtext contents) and the delta could be incompatible.

795

# rawtext contents) and the delta could be incompatible.

796

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

796

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

797

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

797

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

798

return False

798

return False

799

return True

799

return True

800

801

def clearcaches(self):

801

def clearcaches(self):

802

self._cache = None

802

self._cache = None

803

self._chainbasecache.clear()

803

self._chainbasecache.clear()

804

self._chunkcache = (0, '')

804

self._chunkcache = (0, '')

805

self._pcache = {}

805

self._pcache = {}

806

807

try:

807

try:

808

self._nodecache.clearcaches()

808

self._nodecache.clearcaches()

809

except AttributeError:

809

except AttributeError:

810

self._nodecache = {nullid: nullrev}

810

self._nodecache = {nullid: nullrev}

811

self._nodepos = None

811

self._nodepos = None

812

813

def rev(self, node):

813

def rev(self, node):

814

try:

814

try:

815

return self._nodecache[node]

815

return self._nodecache[node]

816

except TypeError:

816

except TypeError:

817

raise

817

raise

818

except RevlogError:

818

except RevlogError:

819

# parsers.c radix tree lookup failed

819

# parsers.c radix tree lookup failed

820

if node == wdirid or node in wdirfilenodeids:

820

if node == wdirid or node in wdirfilenodeids:

821

raise error.WdirUnsupported

821

raise error.WdirUnsupported

822

raise LookupError(node, self.indexfile, _('no node'))

822

raise LookupError(node, self.indexfile, _('no node'))

823

except KeyError:

823

except KeyError:

824

# pure python cache lookup failed

824

# pure python cache lookup failed

825

n = self._nodecache

825

n = self._nodecache

826

i = self.index

826

i = self.index

827

p = self._nodepos

827

p = self._nodepos

828

if p is None:

828

if p is None:

829

p = len(i) - 2

829

p = len(i) - 2

830

else:

830

else:

831

assert p < len(i)

831

assert p < len(i)

832

for r in xrange(p, -1, -1):

832

for r in xrange(p, -1, -1):

833

v = i[r][7]

833

v = i[r][7]

834

n[v] = r

834

n[v] = r

835

if v == node:

835

if v == node:

836

self._nodepos = r - 1

836

self._nodepos = r - 1

837

return r

837

return r

838

if node == wdirid or node in wdirfilenodeids:

838

if node == wdirid or node in wdirfilenodeids:

839

raise error.WdirUnsupported

839

raise error.WdirUnsupported

840

raise LookupError(node, self.indexfile, _('no node'))

840

raise LookupError(node, self.indexfile, _('no node'))

841

842

# Accessors for index entries.

842

# Accessors for index entries.

843

844

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

844

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

845

# are flags.

845

# are flags.

846

def start(self, rev):

846

def start(self, rev):

847

return int(self.index[rev][0] >> 16)

847

return int(self.index[rev][0] >> 16)

848

849

def flags(self, rev):

849

def flags(self, rev):

850

return self.index[rev][0] & 0xFFFF

850

return self.index[rev][0] & 0xFFFF

851

852

def length(self, rev):

852

def length(self, rev):

853

return self.index[rev][1]

853

return self.index[rev][1]

854

855

def rawsize(self, rev):

855

def rawsize(self, rev):

856

"""return the length of the uncompressed text for a given revision"""

856

"""return the length of the uncompressed text for a given revision"""

857

l = self.index[rev][2]

857

l = self.index[rev][2]

858

if l >= 0:

858

if l >= 0:

859

return l

859

return l

860

861

t = self.revision(rev, raw=True)

861

t = self.revision(rev, raw=True)

862

return len(t)

862

return len(t)

863

864

def size(self, rev):

864

def size(self, rev):

865

"""length of non-raw text (processed by a "read" flag processor)"""

865

"""length of non-raw text (processed by a "read" flag processor)"""

866

# fast path: if no "read" flag processor could change the content,

866

# fast path: if no "read" flag processor could change the content,

867

# size is rawsize. note: ELLIPSIS is known to not change the content.

867

# size is rawsize. note: ELLIPSIS is known to not change the content.

868

flags = self.flags(rev)

868

flags = self.flags(rev)

869

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

869

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

870

return self.rawsize(rev)

870

return self.rawsize(rev)

871

872

return len(self.revision(rev, raw=False))

872

return len(self.revision(rev, raw=False))

873

874

def chainbase(self, rev):

874

def chainbase(self, rev):

875

base = self._chainbasecache.get(rev)

875

base = self._chainbasecache.get(rev)

876

if base is not None:

876

if base is not None:

877

return base

877

return base

878

879

index = self.index

879

index = self.index

880

iterrev = rev

880

iterrev = rev

881

base = index[iterrev][3]

881

base = index[iterrev][3]

882

while base != iterrev:

882

while base != iterrev:

883

iterrev = base

883

iterrev = base

884

base = index[iterrev][3]

884

base = index[iterrev][3]

885

886

self._chainbasecache[rev] = base

886

self._chainbasecache[rev] = base

887

return base

887

return base

888

889

def linkrev(self, rev):

889

def linkrev(self, rev):

890

return self.index[rev][4]

890

return self.index[rev][4]

891

892

def parentrevs(self, rev):

892

def parentrevs(self, rev):

893

try:

893

try:

894

entry = self.index[rev]

894

entry = self.index[rev]

895

except IndexError:

895

except IndexError:

896

if rev == wdirrev:

896

if rev == wdirrev:

897

raise error.WdirUnsupported

897

raise error.WdirUnsupported

898

raise

898

raise

899

900

return entry[5], entry[6]

900

return entry[5], entry[6]

901

902

def node(self, rev):

902

def node(self, rev):

903

try:

903

try:

904

return self.index[rev][7]

904

return self.index[rev][7]

905

except IndexError:

905

except IndexError:

906

if rev == wdirrev:

906

if rev == wdirrev:

907

raise error.WdirUnsupported

907

raise error.WdirUnsupported

908

raise

908

raise

909

910

# Derived from index values.

910

# Derived from index values.

911

912

def end(self, rev):

912

def end(self, rev):

913

return self.start(rev) + self.length(rev)

913

return self.start(rev) + self.length(rev)

914

915

def parents(self, node):

915

def parents(self, node):

916

i = self.index

916

i = self.index

917

d = i[self.rev(node)]

917

d = i[self.rev(node)]

918

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

918

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

919

920

def chainlen(self, rev):

920

def chainlen(self, rev):

921

return self._chaininfo(rev)[0]

921

return self._chaininfo(rev)[0]

922

923

def _chaininfo(self, rev):

923

def _chaininfo(self, rev):

924

chaininfocache = self._chaininfocache

924

chaininfocache = self._chaininfocache

925

if rev in chaininfocache:

925

if rev in chaininfocache:

926

return chaininfocache[rev]

926

return chaininfocache[rev]

927

index = self.index

927

index = self.index

928

generaldelta = self._generaldelta

928

generaldelta = self._generaldelta

929

iterrev = rev

929

iterrev = rev

930

e = index[iterrev]

930

e = index[iterrev]

931

clen = 0

931

clen = 0

932

compresseddeltalen = 0

932

compresseddeltalen = 0

933

while iterrev != e[3]:

933

while iterrev != e[3]:

934

clen += 1

934

clen += 1

935

compresseddeltalen += e[1]

935

compresseddeltalen += e[1]

936

if generaldelta:

936

if generaldelta:

937

iterrev = e[3]

937

iterrev = e[3]

938

else:

938

else:

939

iterrev -= 1

939

iterrev -= 1

940

if iterrev in chaininfocache:

940

if iterrev in chaininfocache:

941

t = chaininfocache[iterrev]

941

t = chaininfocache[iterrev]

942

clen += t[0]

942

clen += t[0]

943

compresseddeltalen += t[1]

943

compresseddeltalen += t[1]

944

break

944

break

945

e = index[iterrev]

945

e = index[iterrev]

946

else:

946

else:

947

# Add text length of base since decompressing that also takes

947

# Add text length of base since decompressing that also takes

948

# work. For cache hits the length is already included.

948

# work. For cache hits the length is already included.

949

compresseddeltalen += e[1]

949

compresseddeltalen += e[1]

950

r = (clen, compresseddeltalen)

950

r = (clen, compresseddeltalen)

951

chaininfocache[rev] = r

951

chaininfocache[rev] = r

952

return r

952

return r

953

954

def _deltachain(self, rev, stoprev=None):

954

def _deltachain(self, rev, stoprev=None):

955

"""Obtain the delta chain for a revision.

955

"""Obtain the delta chain for a revision.

956

957

``stoprev`` specifies a revision to stop at. If not specified, we

957

``stoprev`` specifies a revision to stop at. If not specified, we

958

stop at the base of the chain.

958

stop at the base of the chain.

959

960

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

960

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

961

revs in ascending order and ``stopped`` is a bool indicating whether

961

revs in ascending order and ``stopped`` is a bool indicating whether

962

``stoprev`` was hit.

962

``stoprev`` was hit.

963

"""

963

"""

964

# Try C implementation.

964

# Try C implementation.

965

try:

965

try:

966

return self.index.deltachain(rev, stoprev, self._generaldelta)

966

return self.index.deltachain(rev, stoprev, self._generaldelta)

967

except AttributeError:

967

except AttributeError:

968

pass

968

pass

969

970

chain = []

970

chain = []

971

972

# Alias to prevent attribute lookup in tight loop.

972

# Alias to prevent attribute lookup in tight loop.

973

index = self.index

973

index = self.index

974

generaldelta = self._generaldelta

974

generaldelta = self._generaldelta

975

976

iterrev = rev

976

iterrev = rev

977

e = index[iterrev]

977

e = index[iterrev]

978

while iterrev != e[3] and iterrev != stoprev:

978

while iterrev != e[3] and iterrev != stoprev:

979

chain.append(iterrev)

979

chain.append(iterrev)

980

if generaldelta:

980

if generaldelta:

981

iterrev = e[3]

981

iterrev = e[3]

982

else:

982

else:

983

iterrev -= 1

983

iterrev -= 1

984

e = index[iterrev]

984

e = index[iterrev]

985

986

if iterrev == stoprev:

986

if iterrev == stoprev:

987

stopped = True

987

stopped = True

988

else:

988

else:

989

chain.append(iterrev)

989

chain.append(iterrev)

990

stopped = False

990

stopped = False

991

992

chain.reverse()

992

chain.reverse()

993

return chain, stopped

993

return chain, stopped

994

995

def ancestors(self, revs, stoprev=0, inclusive=False):

995

def ancestors(self, revs, stoprev=0, inclusive=False):

996

"""Generate the ancestors of 'revs' in reverse topological order.

996

"""Generate the ancestors of 'revs' in reverse topological order.

997

Does not generate revs lower than stoprev.

997

Does not generate revs lower than stoprev.

998

999

See the documentation for ancestor.lazyancestors for more details."""

999

See the documentation for ancestor.lazyancestors for more details."""

1000

1001

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1001

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1002

inclusive=inclusive)

1002

inclusive=inclusive)

1003

1004

def descendants(self, revs):

1004

def descendants(self, revs):

1005

"""Generate the descendants of 'revs' in revision order.

1005

"""Generate the descendants of 'revs' in revision order.

1006

1007

Yield a sequence of revision numbers starting with a child of

1007

Yield a sequence of revision numbers starting with a child of

1008

some rev in revs, i.e., each revision is *not* considered a

1008

some rev in revs, i.e., each revision is *not* considered a

1009

descendant of itself. Results are ordered by revision number (a

1009

descendant of itself. Results are ordered by revision number (a

1010

topological sort)."""

1010

topological sort)."""

1011

first = min(revs)

1011

first = min(revs)

1012

if first == nullrev:

1012

if first == nullrev:

1013

for i in self:

1013

for i in self:

1014

yield i

1014

yield i

1015

return

1015

return

1016

1017

seen = set(revs)

1017

seen = set(revs)

1018

for i in self.revs(start=first + 1):

1018

for i in self.revs(start=first + 1):

1019

for x in self.parentrevs(i):

1019

for x in self.parentrevs(i):

1020

if x != nullrev and x in seen:

1020

if x != nullrev and x in seen:

1021

seen.add(i)

1021

seen.add(i)

1022

yield i

1022

yield i

1023

break

1023

break

1024

1025

def findcommonmissing(self, common=None, heads=None):

1025

def findcommonmissing(self, common=None, heads=None):

1026

"""Return a tuple of the ancestors of common and the ancestors of heads

1026

"""Return a tuple of the ancestors of common and the ancestors of heads

1027

that are not ancestors of common. In revset terminology, we return the

1027

that are not ancestors of common. In revset terminology, we return the

1028

tuple:

1028

tuple:

1029

1030

::common, (::heads) - (::common)

1030

::common, (::heads) - (::common)

1031

1032

The list is sorted by revision number, meaning it is

1032

The list is sorted by revision number, meaning it is

1033

topologically sorted.

1033

topologically sorted.

1034

1035

'heads' and 'common' are both lists of node IDs. If heads is

1035

'heads' and 'common' are both lists of node IDs. If heads is

1036

not supplied, uses all of the revlog's heads. If common is not

1036

not supplied, uses all of the revlog's heads. If common is not

1037

supplied, uses nullid."""

1037

supplied, uses nullid."""

1038

if common is None:

1038

if common is None:

1039

common = [nullid]

1039

common = [nullid]

1040

if heads is None:

1040

if heads is None:

1041

heads = self.heads()

1041

heads = self.heads()

1042

1043

common = [self.rev(n) for n in common]

1043

common = [self.rev(n) for n in common]

1044

heads = [self.rev(n) for n in heads]

1044

heads = [self.rev(n) for n in heads]

1045

1046

# we want the ancestors, but inclusive

1046

# we want the ancestors, but inclusive

1047

class lazyset(object):

1047

class lazyset(object):

1048

def __init__(self, lazyvalues):

1048

def __init__(self, lazyvalues):

1049

self.addedvalues = set()

1049

self.addedvalues = set()

1050

self.lazyvalues = lazyvalues

1050

self.lazyvalues = lazyvalues

1051

1052

def __contains__(self, value):

1052

def __contains__(self, value):

1053

return value in self.addedvalues or value in self.lazyvalues

1053

return value in self.addedvalues or value in self.lazyvalues

1054

1055

def __iter__(self):

1055

def __iter__(self):

1056

added = self.addedvalues

1056

added = self.addedvalues

1057

for r in added:

1057

for r in added:

1058

yield r

1058

yield r

1059

for r in self.lazyvalues:

1059

for r in self.lazyvalues:

1060

if not r in added:

1060

if not r in added:

1061

yield r

1061

yield r

1062

1063

def add(self, value):

1063

def add(self, value):

1064

self.addedvalues.add(value)

1064

self.addedvalues.add(value)

1065

1066

def update(self, values):

1066

def update(self, values):

1067

self.addedvalues.update(values)

1067

self.addedvalues.update(values)

1068

1069

has = lazyset(self.ancestors(common))

1069

has = lazyset(self.ancestors(common))

1070

has.add(nullrev)

1070

has.add(nullrev)

1071

has.update(common)

1071

has.update(common)

1072

1073

# take all ancestors from heads that aren't in has

1073

# take all ancestors from heads that aren't in has

1074

missing = set()

1074

missing = set()

1075

visit = collections.deque(r for r in heads if r not in has)

1075

visit = collections.deque(r for r in heads if r not in has)

1076

while visit:

1076

while visit:

1077

r = visit.popleft()

1077

r = visit.popleft()

1078

if r in missing:

1078

if r in missing:

1079

continue

1079

continue

1080

else:

1080

else:

1081

missing.add(r)

1081

missing.add(r)

1082

for p in self.parentrevs(r):

1082

for p in self.parentrevs(r):

1083

if p not in has:

1083

if p not in has:

1084

visit.append(p)

1084

visit.append(p)

1085

missing = list(missing)

1085

missing = list(missing)

1086

missing.sort()

1086

missing.sort()

1087

return has, [self.node(miss) for miss in missing]

1087

return has, [self.node(miss) for miss in missing]

1088

1089

def incrementalmissingrevs(self, common=None):

1089

def incrementalmissingrevs(self, common=None):

1090

"""Return an object that can be used to incrementally compute the

1090

"""Return an object that can be used to incrementally compute the

1091

revision numbers of the ancestors of arbitrary sets that are not

1091

revision numbers of the ancestors of arbitrary sets that are not

1092

ancestors of common. This is an ancestor.incrementalmissingancestors

1092

ancestors of common. This is an ancestor.incrementalmissingancestors

1093

object.

1093

object.

1094

1095

'common' is a list of revision numbers. If common is not supplied, uses

1095

'common' is a list of revision numbers. If common is not supplied, uses

1096

nullrev.

1096

nullrev.

1097

"""

1097

"""

1098

if common is None:

1098

if common is None:

1099

common = [nullrev]

1099

common = [nullrev]

1100

1101

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1101

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1102

1103

def findmissingrevs(self, common=None, heads=None):

1103

def findmissingrevs(self, common=None, heads=None):

1104

"""Return the revision numbers of the ancestors of heads that

1104

"""Return the revision numbers of the ancestors of heads that

1105

are not ancestors of common.

1105

are not ancestors of common.

1106

1107

More specifically, return a list of revision numbers corresponding to

1107

More specifically, return a list of revision numbers corresponding to

1108

nodes N such that every N satisfies the following constraints:

1108

nodes N such that every N satisfies the following constraints:

1109

1110

1. N is an ancestor of some node in 'heads'

1110

1. N is an ancestor of some node in 'heads'

1111

2. N is not an ancestor of any node in 'common'

1111

2. N is not an ancestor of any node in 'common'

1112

1113

The list is sorted by revision number, meaning it is

1113

The list is sorted by revision number, meaning it is

1114

topologically sorted.

1114

topologically sorted.

1115

1116

'heads' and 'common' are both lists of revision numbers. If heads is

1116

'heads' and 'common' are both lists of revision numbers. If heads is

1117

not supplied, uses all of the revlog's heads. If common is not

1117

not supplied, uses all of the revlog's heads. If common is not

1118

supplied, uses nullid."""

1118

supplied, uses nullid."""

1119

if common is None:

1119

if common is None:

1120

common = [nullrev]

1120

common = [nullrev]

1121

if heads is None:

1121

if heads is None:

1122

heads = self.headrevs()

1122

heads = self.headrevs()

1123

1124

inc = self.incrementalmissingrevs(common=common)

1124

inc = self.incrementalmissingrevs(common=common)

1125

return inc.missingancestors(heads)

1125

return inc.missingancestors(heads)

1126

1127

def findmissing(self, common=None, heads=None):

1127

def findmissing(self, common=None, heads=None):

1128

"""Return the ancestors of heads that are not ancestors of common.

1128

"""Return the ancestors of heads that are not ancestors of common.

1129

1130

More specifically, return a list of nodes N such that every N

1130

More specifically, return a list of nodes N such that every N

1131

satisfies the following constraints:

1131

satisfies the following constraints:

1132

1133

1. N is an ancestor of some node in 'heads'

1133

1. N is an ancestor of some node in 'heads'

1134

2. N is not an ancestor of any node in 'common'

1134

2. N is not an ancestor of any node in 'common'

1135

1136

The list is sorted by revision number, meaning it is

1136

The list is sorted by revision number, meaning it is

1137

topologically sorted.

1137

topologically sorted.

1138

1139

'heads' and 'common' are both lists of node IDs. If heads is

1139

'heads' and 'common' are both lists of node IDs. If heads is

1140

not supplied, uses all of the revlog's heads. If common is not

1140

not supplied, uses all of the revlog's heads. If common is not

1141

supplied, uses nullid."""

1141

supplied, uses nullid."""

1142

if common is None:

1142

if common is None:

1143

common = [nullid]

1143

common = [nullid]

1144

if heads is None:

1144

if heads is None:

1145

heads = self.heads()

1145

heads = self.heads()

1146

1147

common = [self.rev(n) for n in common]

1147

common = [self.rev(n) for n in common]

1148

heads = [self.rev(n) for n in heads]

1148

heads = [self.rev(n) for n in heads]

1149

1150

inc = self.incrementalmissingrevs(common=common)

1150

inc = self.incrementalmissingrevs(common=common)

1151

return [self.node(r) for r in inc.missingancestors(heads)]

1151

return [self.node(r) for r in inc.missingancestors(heads)]

1152

1153

def nodesbetween(self, roots=None, heads=None):

1153

def nodesbetween(self, roots=None, heads=None):

1154

"""Return a topological path from 'roots' to 'heads'.

1154

"""Return a topological path from 'roots' to 'heads'.

1155

1156

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1156

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1157

topologically sorted list of all nodes N that satisfy both of

1157

topologically sorted list of all nodes N that satisfy both of

1158

these constraints:

1158

these constraints:

1159

1160

1. N is a descendant of some node in 'roots'

1160

1. N is a descendant of some node in 'roots'

1161

2. N is an ancestor of some node in 'heads'

1161

2. N is an ancestor of some node in 'heads'

1162

1163

Every node is considered to be both a descendant and an ancestor

1163

Every node is considered to be both a descendant and an ancestor

1164

of itself, so every reachable node in 'roots' and 'heads' will be

1164

of itself, so every reachable node in 'roots' and 'heads' will be

1165

included in 'nodes'.

1165

included in 'nodes'.

1166

1167

'outroots' is the list of reachable nodes in 'roots', i.e., the

1167

'outroots' is the list of reachable nodes in 'roots', i.e., the

1168

subset of 'roots' that is returned in 'nodes'. Likewise,

1168

subset of 'roots' that is returned in 'nodes'. Likewise,

1169

'outheads' is the subset of 'heads' that is also in 'nodes'.

1169

'outheads' is the subset of 'heads' that is also in 'nodes'.

1170

1171

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1171

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1172

unspecified, uses nullid as the only root. If 'heads' is

1172

unspecified, uses nullid as the only root. If 'heads' is

1173

unspecified, uses list of all of the revlog's heads."""

1173

unspecified, uses list of all of the revlog's heads."""

1174

nonodes = ([], [], [])

1174

nonodes = ([], [], [])

1175

if roots is not None:

1175

if roots is not None:

1176

roots = list(roots)

1176

roots = list(roots)

1177

if not roots:

1177

if not roots:

1178

return nonodes

1178

return nonodes

1179

lowestrev = min([self.rev(n) for n in roots])

1179

lowestrev = min([self.rev(n) for n in roots])

1180

else:

1180

else:

1181

roots = [nullid] # Everybody's a descendant of nullid

1181

roots = [nullid] # Everybody's a descendant of nullid

1182

lowestrev = nullrev

1182

lowestrev = nullrev

1183

if (lowestrev == nullrev) and (heads is None):

1183

if (lowestrev == nullrev) and (heads is None):

1184

# We want _all_ the nodes!

1184

# We want _all_ the nodes!

1185

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1185

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1186

if heads is None:

1186

if heads is None:

1187

# All nodes are ancestors, so the latest ancestor is the last

1187

# All nodes are ancestors, so the latest ancestor is the last

1188

# node.

1188

# node.

1189

highestrev = len(self) - 1

1189

highestrev = len(self) - 1

1190

# Set ancestors to None to signal that every node is an ancestor.

1190

# Set ancestors to None to signal that every node is an ancestor.

1191

ancestors = None

1191

ancestors = None

1192

# Set heads to an empty dictionary for later discovery of heads

1192

# Set heads to an empty dictionary for later discovery of heads

1193

heads = {}

1193

heads = {}

1194

else:

1194

else:

1195

heads = list(heads)

1195

heads = list(heads)

1196

if not heads:

1196

if not heads:

1197

return nonodes

1197

return nonodes

1198

ancestors = set()

1198

ancestors = set()

1199

# Turn heads into a dictionary so we can remove 'fake' heads.

1199

# Turn heads into a dictionary so we can remove 'fake' heads.

1200

# Also, later we will be using it to filter out the heads we can't

1200

# Also, later we will be using it to filter out the heads we can't

1201

# find from roots.

1201

# find from roots.

1202

heads = dict.fromkeys(heads, False)

1202

heads = dict.fromkeys(heads, False)

1203

# Start at the top and keep marking parents until we're done.

1203

# Start at the top and keep marking parents until we're done.

1204

nodestotag = set(heads)

1204

nodestotag = set(heads)

1205

# Remember where the top was so we can use it as a limit later.

1205

# Remember where the top was so we can use it as a limit later.

1206

highestrev = max([self.rev(n) for n in nodestotag])

1206

highestrev = max([self.rev(n) for n in nodestotag])

1207

while nodestotag:

1207

while nodestotag:

1208

# grab a node to tag

1208

# grab a node to tag

1209

n = nodestotag.pop()

1209

n = nodestotag.pop()

1210

# Never tag nullid

1210

# Never tag nullid

1211

if n == nullid:

1211

if n == nullid:

1212

continue

1212

continue

1213

# A node's revision number represents its place in a

1213

# A node's revision number represents its place in a

1214

# topologically sorted list of nodes.

1214

# topologically sorted list of nodes.

1215

r = self.rev(n)

1215

r = self.rev(n)

1216

if r >= lowestrev:

1216

if r >= lowestrev:

1217

if n not in ancestors:

1217

if n not in ancestors:

1218

# If we are possibly a descendant of one of the roots

1218

# If we are possibly a descendant of one of the roots

1219

# and we haven't already been marked as an ancestor

1219

# and we haven't already been marked as an ancestor

1220

ancestors.add(n) # Mark as ancestor

1220

ancestors.add(n) # Mark as ancestor

1221

# Add non-nullid parents to list of nodes to tag.

1221

# Add non-nullid parents to list of nodes to tag.

1222

nodestotag.update([p for p in self.parents(n) if

1222

nodestotag.update([p for p in self.parents(n) if

1223

p != nullid])

1223

p != nullid])

1224

elif n in heads: # We've seen it before, is it a fake head?

1224

elif n in heads: # We've seen it before, is it a fake head?

1225

# So it is, real heads should not be the ancestors of

1225

# So it is, real heads should not be the ancestors of

1226

# any other heads.

1226

# any other heads.

1227

heads.pop(n)

1227

heads.pop(n)

1228

if not ancestors:

1228

if not ancestors:

1229

return nonodes

1229

return nonodes

1230

# Now that we have our set of ancestors, we want to remove any

1230

# Now that we have our set of ancestors, we want to remove any

1231

# roots that are not ancestors.

1231

# roots that are not ancestors.

1232

1233

# If one of the roots was nullid, everything is included anyway.

1233

# If one of the roots was nullid, everything is included anyway.

1234

if lowestrev > nullrev:

1234

if lowestrev > nullrev:

1235

# But, since we weren't, let's recompute the lowest rev to not

1235

# But, since we weren't, let's recompute the lowest rev to not

1236

# include roots that aren't ancestors.

1236

# include roots that aren't ancestors.

1237

1238

# Filter out roots that aren't ancestors of heads

1238

# Filter out roots that aren't ancestors of heads

1239

roots = [root for root in roots if root in ancestors]

1239

roots = [root for root in roots if root in ancestors]

1240

# Recompute the lowest revision

1240

# Recompute the lowest revision

1241

if roots:

1241

if roots:

1242

lowestrev = min([self.rev(root) for root in roots])

1242

lowestrev = min([self.rev(root) for root in roots])

1243

else:

1243

else:

1244

# No more roots? Return empty list

1244

# No more roots? Return empty list

1245

return nonodes

1245

return nonodes

1246

else:

1246

else:

1247

# We are descending from nullid, and don't need to care about

1247

# We are descending from nullid, and don't need to care about

1248

# any other roots.

1248

# any other roots.

1249

lowestrev = nullrev

1249

lowestrev = nullrev

1250

roots = [nullid]

1250

roots = [nullid]

1251

# Transform our roots list into a set.

1251

# Transform our roots list into a set.

1252

descendants = set(roots)

1252

descendants = set(roots)

1253

# Also, keep the original roots so we can filter out roots that aren't

1253

# Also, keep the original roots so we can filter out roots that aren't

1254

# 'real' roots (i.e. are descended from other roots).

1254

# 'real' roots (i.e. are descended from other roots).

1255

roots = descendants.copy()

1255

roots = descendants.copy()

1256

# Our topologically sorted list of output nodes.

1256

# Our topologically sorted list of output nodes.

1257

orderedout = []

1257

orderedout = []

1258

# Don't start at nullid since we don't want nullid in our output list,

1258

# Don't start at nullid since we don't want nullid in our output list,

1259

# and if nullid shows up in descendants, empty parents will look like

1259

# and if nullid shows up in descendants, empty parents will look like

1260

# they're descendants.

1260

# they're descendants.

1261

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1261

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1262

n = self.node(r)

1262

n = self.node(r)

1263

isdescendant = False

1263

isdescendant = False

1264

if lowestrev == nullrev: # Everybody is a descendant of nullid

1264

if lowestrev == nullrev: # Everybody is a descendant of nullid

1265

isdescendant = True

1265

isdescendant = True

1266

elif n in descendants:

1266

elif n in descendants:

1267

# n is already a descendant

1267

# n is already a descendant

1268

isdescendant = True

1268

isdescendant = True

1269

# This check only needs to be done here because all the roots

1269

# This check only needs to be done here because all the roots

1270

# will start being marked is descendants before the loop.

1270

# will start being marked is descendants before the loop.

1271

if n in roots:

1271

if n in roots:

1272

# If n was a root, check if it's a 'real' root.

1272

# If n was a root, check if it's a 'real' root.

1273

p = tuple(self.parents(n))

1273

p = tuple(self.parents(n))

1274

# If any of its parents are descendants, it's not a root.

1274

# If any of its parents are descendants, it's not a root.

1275

if (p[0] in descendants) or (p[1] in descendants):

1275

if (p[0] in descendants) or (p[1] in descendants):

1276

roots.remove(n)

1276

roots.remove(n)

1277

else:

1277

else:

1278

p = tuple(self.parents(n))

1278

p = tuple(self.parents(n))

1279

# A node is a descendant if either of its parents are

1279

# A node is a descendant if either of its parents are

1280

# descendants. (We seeded the dependents list with the roots

1280

# descendants. (We seeded the dependents list with the roots

1281

# up there, remember?)

1281

# up there, remember?)

1282

if (p[0] in descendants) or (p[1] in descendants):

1282

if (p[0] in descendants) or (p[1] in descendants):

1283

descendants.add(n)

1283

descendants.add(n)

1284

isdescendant = True

1284

isdescendant = True

1285

if isdescendant and ((ancestors is None) or (n in ancestors)):

1285

if isdescendant and ((ancestors is None) or (n in ancestors)):

1286

# Only include nodes that are both descendants and ancestors.

1286

# Only include nodes that are both descendants and ancestors.

1287

orderedout.append(n)

1287

orderedout.append(n)

1288

if (ancestors is not None) and (n in heads):

1288

if (ancestors is not None) and (n in heads):

1289

# We're trying to figure out which heads are reachable

1289

# We're trying to figure out which heads are reachable

1290

# from roots.

1290

# from roots.

1291

# Mark this head as having been reached

1291

# Mark this head as having been reached

1292

heads[n] = True

1292

heads[n] = True

1293

elif ancestors is None:

1293

elif ancestors is None:

1294

# Otherwise, we're trying to discover the heads.

1294

# Otherwise, we're trying to discover the heads.

1295

# Assume this is a head because if it isn't, the next step

1295

# Assume this is a head because if it isn't, the next step

1296

# will eventually remove it.

1296

# will eventually remove it.

1297

heads[n] = True

1297

heads[n] = True

1298

# But, obviously its parents aren't.

1298

# But, obviously its parents aren't.

1299

for p in self.parents(n):

1299

for p in self.parents(n):

1300

heads.pop(p, None)

1300

heads.pop(p, None)

1301

heads = [head for head, flag in heads.iteritems() if flag]

1301

heads = [head for head, flag in heads.iteritems() if flag]

1302

roots = list(roots)

1302

roots = list(roots)

1303

assert orderedout

1303

assert orderedout

1304

assert roots

1304

assert roots

1305

assert heads

1305

assert heads

1306

return (orderedout, roots, heads)

1306

return (orderedout, roots, heads)

1307

1308

def headrevs(self):

1308

def headrevs(self):

1309

try:

1309

try:

1310

return self.index.headrevs()

1310

return self.index.headrevs()

1311

except AttributeError:

1311

except AttributeError:

1312

return self._headrevs()

1312

return self._headrevs()

1313

1314

def computephases(self, roots):

1314

def computephases(self, roots):

1315

return self.index.computephasesmapsets(roots)

1315

return self.index.computephasesmapsets(roots)

1316

1317

def _headrevs(self):

1317

def _headrevs(self):

1318

count = len(self)

1318

count = len(self)

1319

if not count:

1319

if not count:

1320

return [nullrev]

1320

return [nullrev]

1321

# we won't iter over filtered rev so nobody is a head at start

1321

# we won't iter over filtered rev so nobody is a head at start

1322

ishead = [0] * (count + 1)

1322

ishead = [0] * (count + 1)

1323

index = self.index

1323

index = self.index

1324

for r in self:

1324

for r in self:

1325

ishead[r] = 1 # I may be an head

1325

ishead[r] = 1 # I may be an head

1326

e = index[r]

1326

e = index[r]

1327

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1327

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1328

return [r for r, val in enumerate(ishead) if val]

1328

return [r for r, val in enumerate(ishead) if val]

1329

1330

def heads(self, start=None, stop=None):

1330

def heads(self, start=None, stop=None):

1331

"""return the list of all nodes that have no children

1331

"""return the list of all nodes that have no children

1332

1333

if start is specified, only heads that are descendants of

1333

if start is specified, only heads that are descendants of

1334

start will be returned

1334

start will be returned

1335

if stop is specified, it will consider all the revs from stop

1335

if stop is specified, it will consider all the revs from stop

1336

as if they had no children

1336

as if they had no children

1337

"""

1337

"""

1338

if start is None and stop is None:

1338

if start is None and stop is None:

1339

if not len(self):

1339

if not len(self):

1340

return [nullid]

1340

return [nullid]

1341

return [self.node(r) for r in self.headrevs()]

1341

return [self.node(r) for r in self.headrevs()]

1342

1343

if start is None:

1343

if start is None:

1344

start = nullid

1344

start = nullid

1345

if stop is None:

1345

if stop is None:

1346

stop = []

1346

stop = []

1347

stoprevs = set([self.rev(n) for n in stop])

1347

stoprevs = set([self.rev(n) for n in stop])

1348

startrev = self.rev(start)

1348

startrev = self.rev(start)

1349

reachable = {startrev}

1349

reachable = {startrev}

1350

heads = {startrev}

1350

heads = {startrev}

1351

1352

parentrevs = self.parentrevs

1352

parentrevs = self.parentrevs

1353

for r in self.revs(start=startrev + 1):

1353

for r in self.revs(start=startrev + 1):

1354

for p in parentrevs(r):

1354

for p in parentrevs(r):

1355

if p in reachable:

1355

if p in reachable:

1356

if r not in stoprevs:

1356

if r not in stoprevs:

1357

reachable.add(r)

1357

reachable.add(r)

1358

heads.add(r)

1358

heads.add(r)

1359

if p in heads and p not in stoprevs:

1359

if p in heads and p not in stoprevs:

1360

heads.remove(p)

1360

heads.remove(p)

1361

1362

return [self.node(r) for r in heads]

1362

return [self.node(r) for r in heads]

1363

1364

def children(self, node):

1364

def children(self, node):

1365

"""find the children of a given node"""

1365

"""find the children of a given node"""

1366

c = []

1366

c = []

1367

p = self.rev(node)

1367

p = self.rev(node)

1368

for r in self.revs(start=p + 1):

1368

for r in self.revs(start=p + 1):

1369

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1369

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1370

if prevs:

1370

if prevs:

1371

for pr in prevs:

1371

for pr in prevs:

1372

if pr == p:

1372

if pr == p:

1373

c.append(self.node(r))

1373

c.append(self.node(r))

1374

elif p == nullrev:

1374

elif p == nullrev:

1375

c.append(self.node(r))

1375

c.append(self.node(r))

1376

return c

1376

return c

1377

1378

def descendant(self, start, end):

1378

def descendant(self, start, end):

1379

"""True if revision 'end' is an descendant of revision 'start'

1380

1381

A revision is considered as a descendant of itself."""

1379

if start == nullrev:

1382

if start == nullrev:

1380

return True

1383

return True

1381

elif start == end:

1384

elif start == end:

1382

return True

1385

return True

1383

for i in self.descendants([start]):

1386

return start in self._commonancestorsheads(start, end)

1384

if i == end:

1385

return True

1386

elif i > end:

1387

break

1388

return False

1389

1387

1390

def commonancestorsheads(self, a, b):

1388

def commonancestorsheads(self, a, b):

1391

"""calculate all the heads of the common ancestors of nodes a and b"""

1389

"""calculate all the heads of the common ancestors of nodes a and b"""

1392

a, b = self.rev(a), self.rev(b)

1390

a, b = self.rev(a), self.rev(b)

1393

ancs = self._commonancestorsheads(a, b)

1391

ancs = self._commonancestorsheads(a, b)

1394

return pycompat.maplist(self.node, ancs)

1392

return pycompat.maplist(self.node, ancs)

1395

1393

1396

def _commonancestorsheads(self, *revs):

1394

def _commonancestorsheads(self, *revs):

1397

"""calculate all the heads of the common ancestors of revs"""

1395

"""calculate all the heads of the common ancestors of revs"""

1398

try:

1396

try:

1399

ancs = self.index.commonancestorsheads(*revs)

1397

ancs = self.index.commonancestorsheads(*revs)

1400

except (AttributeError, OverflowError): # C implementation failed

1398

except (AttributeError, OverflowError): # C implementation failed

1401

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1399

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1402

return ancs

1400

return ancs

1403

1401

1404

def isancestor(self, a, b):

1402

def isancestor(self, a, b):

1405

"""return True if node a is an ancestor of node b

1403

"""return True if node a is an ancestor of node b

1406

1404

1407

The implementation of this is trivial but the use of

1405

The implementation of this is trivial but the use of

1408

commonancestorsheads is not."""

1406

commonancestorsheads is not."""

1409

return a in self.commonancestorsheads(a, b)

1407

return a in self.commonancestorsheads(a, b)

1410

1408

1411

def ancestor(self, a, b):

1409

def ancestor(self, a, b):

1412

"""calculate the "best" common ancestor of nodes a and b"""

1410

"""calculate the "best" common ancestor of nodes a and b"""

1413

1411

1414

a, b = self.rev(a), self.rev(b)

1412

a, b = self.rev(a), self.rev(b)

1415

try:

1413

try:

1416

ancs = self.index.ancestors(a, b)

1414

ancs = self.index.ancestors(a, b)

1417

except (AttributeError, OverflowError):

1415

except (AttributeError, OverflowError):

1418

ancs = ancestor.ancestors(self.parentrevs, a, b)

1416

ancs = ancestor.ancestors(self.parentrevs, a, b)

1419

if ancs:

1417

if ancs:

1420

# choose a consistent winner when there's a tie

1418

# choose a consistent winner when there's a tie

1421

return min(map(self.node, ancs))

1419

return min(map(self.node, ancs))

1422

return nullid

1420

return nullid

1423

1421

1424

def _match(self, id):

1422

def _match(self, id):

1425

if isinstance(id, int):

1423

if isinstance(id, int):

1426

# rev

1424

# rev

1427

return self.node(id)

1425

return self.node(id)

1428

if len(id) == 20:

1426

if len(id) == 20:

1429

# possibly a binary node

1427

# possibly a binary node

1430

# odds of a binary node being all hex in ASCII are 1 in 10**25

1428

# odds of a binary node being all hex in ASCII are 1 in 10**25

1431

try:

1429

try:

1432

node = id

1430

node = id

1433

self.rev(node) # quick search the index

1431

self.rev(node) # quick search the index

1434

return node

1432

return node

1435

except LookupError:

1433

except LookupError:

1436

pass # may be partial hex id

1434

pass # may be partial hex id

1437

try:

1435

try:

1438

# str(rev)

1436

# str(rev)

1439

rev = int(id)

1437

rev = int(id)

1440

if "%d" % rev != id:

1438

if "%d" % rev != id:

1441

raise ValueError

1439

raise ValueError

1442

if rev < 0:

1440

if rev < 0:

1443

rev = len(self) + rev

1441

rev = len(self) + rev

1444

if rev < 0 or rev >= len(self):

1442

if rev < 0 or rev >= len(self):

1445

raise ValueError

1443

raise ValueError

1446

return self.node(rev)

1444

return self.node(rev)

1447

except (ValueError, OverflowError):

1445

except (ValueError, OverflowError):

1448

pass

1446

pass

1449

if len(id) == 40:

1447

if len(id) == 40:

1450

try:

1448

try:

1451

# a full hex nodeid?

1449

# a full hex nodeid?

1452

node = bin(id)

1450

node = bin(id)

1453

self.rev(node)

1451

self.rev(node)

1454

return node

1452

return node

1455

except (TypeError, LookupError):

1453

except (TypeError, LookupError):

1456

pass

1454

pass

1457

1455

1458

def _partialmatch(self, id):

1456

def _partialmatch(self, id):

1459

# we don't care wdirfilenodeids as they should be always full hash

1457

# we don't care wdirfilenodeids as they should be always full hash

1460

maybewdir = wdirhex.startswith(id)

1458

maybewdir = wdirhex.startswith(id)

1461

try:

1459

try:

1462

partial = self.index.partialmatch(id)

1460

partial = self.index.partialmatch(id)

1463

if partial and self.hasnode(partial):

1461

if partial and self.hasnode(partial):

1464

if maybewdir:

1462

if maybewdir:

1465

# single 'ff...' match in radix tree, ambiguous with wdir

1463

# single 'ff...' match in radix tree, ambiguous with wdir

1466

raise RevlogError

1464

raise RevlogError

1467

return partial

1465

return partial

1468

if maybewdir:

1466

if maybewdir:

1469

# no 'ff...' match in radix tree, wdir identified

1467

# no 'ff...' match in radix tree, wdir identified

1470

raise error.WdirUnsupported

1468

raise error.WdirUnsupported

1471

return None

1469

return None

1472

except RevlogError:

1470

except RevlogError:

1473

# parsers.c radix tree lookup gave multiple matches

1471

# parsers.c radix tree lookup gave multiple matches

1474

# fast path: for unfiltered changelog, radix tree is accurate

1472

# fast path: for unfiltered changelog, radix tree is accurate

1475

if not getattr(self, 'filteredrevs', None):

1473

if not getattr(self, 'filteredrevs', None):

1476

raise LookupError(id, self.indexfile,

1474

raise LookupError(id, self.indexfile,

1477

_('ambiguous identifier'))

1475

_('ambiguous identifier'))

1478

# fall through to slow path that filters hidden revisions

1476

# fall through to slow path that filters hidden revisions

1479

except (AttributeError, ValueError):

1477

except (AttributeError, ValueError):

1480

# we are pure python, or key was too short to search radix tree

1478

# we are pure python, or key was too short to search radix tree

1481

pass

1479

pass

1482

1480

1483

if id in self._pcache:

1481

if id in self._pcache:

1484

return self._pcache[id]

1482

return self._pcache[id]

1485

1483

1486

if len(id) <= 40:

1484

if len(id) <= 40:

1487

try:

1485

try:

1488

# hex(node)[:...]

1486

# hex(node)[:...]

1489

l = len(id) // 2 # grab an even number of digits

1487

l = len(id) // 2 # grab an even number of digits

1490

prefix = bin(id[:l * 2])

1488

prefix = bin(id[:l * 2])

1491

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1489

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1492

nl = [n for n in nl if hex(n).startswith(id) and

1490

nl = [n for n in nl if hex(n).startswith(id) and

1493

self.hasnode(n)]

1491

self.hasnode(n)]

1494

if len(nl) > 0:

1492

if len(nl) > 0:

1495

if len(nl) == 1 and not maybewdir:

1493

if len(nl) == 1 and not maybewdir:

1496

self._pcache[id] = nl[0]

1494

self._pcache[id] = nl[0]

1497

return nl[0]

1495

return nl[0]

1498

raise LookupError(id, self.indexfile,

1496

raise LookupError(id, self.indexfile,

1499

_('ambiguous identifier'))

1497

_('ambiguous identifier'))

1500

if maybewdir:

1498

if maybewdir:

1501

raise error.WdirUnsupported

1499

raise error.WdirUnsupported

1502

return None

1500

return None

1503

except TypeError:

1501

except TypeError:

1504

pass

1502

pass

1505

1503

1506

def lookup(self, id):

1504

def lookup(self, id):

1507

"""locate a node based on:

1505

"""locate a node based on:

1508

- revision number or str(revision number)

1506

- revision number or str(revision number)

1509

- nodeid or subset of hex nodeid

1507

- nodeid or subset of hex nodeid

1510

"""

1508

"""

1511

n = self._match(id)

1509

n = self._match(id)

1512

if n is not None:

1510

if n is not None:

1513

return n

1511

return n

1514

n = self._partialmatch(id)

1512

n = self._partialmatch(id)

1515

if n:

1513

if n:

1516

return n

1514

return n

1517

1515

1518

raise LookupError(id, self.indexfile, _('no match found'))

1516

raise LookupError(id, self.indexfile, _('no match found'))

1519

1517

1520

def shortest(self, node, minlength=1):

1518

def shortest(self, node, minlength=1):

1521

"""Find the shortest unambiguous prefix that matches node."""

1519

"""Find the shortest unambiguous prefix that matches node."""

1522

def isvalid(prefix):

1520

def isvalid(prefix):

1523

try:

1521

try:

1524

node = self._partialmatch(prefix)

1522

node = self._partialmatch(prefix)

1525

except error.RevlogError:

1523

except error.RevlogError:

1526

return False

1524

return False

1527

except error.WdirUnsupported:

1525

except error.WdirUnsupported:

1528

# single 'ff...' match

1526

# single 'ff...' match

1529

return True

1527

return True

1530

if node is None:

1528

if node is None:

1531

raise LookupError(node, self.indexfile, _('no node'))

1529

raise LookupError(node, self.indexfile, _('no node'))

1532

return True

1530

return True

1533

1531

1534

def maybewdir(prefix):

1532

def maybewdir(prefix):

1535

return all(c == 'f' for c in prefix)

1533

return all(c == 'f' for c in prefix)

1536

1534

1537

hexnode = hex(node)

1535

hexnode = hex(node)

1538

1536

1539

def disambiguate(hexnode, minlength):

1537

def disambiguate(hexnode, minlength):

1540

"""Disambiguate against wdirid."""

1538

"""Disambiguate against wdirid."""

1541

for length in range(minlength, 41):

1539

for length in range(minlength, 41):

1542

prefix = hexnode[:length]

1540

prefix = hexnode[:length]

1543

if not maybewdir(prefix):

1541

if not maybewdir(prefix):

1544

return prefix

1542

return prefix

1545

1543

1546

if not getattr(self, 'filteredrevs', None):

1544

if not getattr(self, 'filteredrevs', None):

1547

try:

1545

try:

1548

length = max(self.index.shortest(node), minlength)

1546

length = max(self.index.shortest(node), minlength)

1549

return disambiguate(hexnode, length)

1547

return disambiguate(hexnode, length)

1550

except RevlogError:

1548

except RevlogError:

1551

if node != wdirid:

1549

if node != wdirid:

1552

raise LookupError(node, self.indexfile, _('no node'))

1550

raise LookupError(node, self.indexfile, _('no node'))

1553

except AttributeError:

1551

except AttributeError:

1554

# Fall through to pure code

1552

# Fall through to pure code

1555

pass

1553

pass

1556

1554

1557

if node == wdirid:

1555

if node == wdirid:

1558

for length in range(minlength, 41):

1556

for length in range(minlength, 41):

1559

prefix = hexnode[:length]

1557

prefix = hexnode[:length]

1560

if isvalid(prefix):

1558

if isvalid(prefix):

1561

return prefix

1559

return prefix

1562

1560

1563

for length in range(minlength, 41):

1561

for length in range(minlength, 41):

1564

prefix = hexnode[:length]

1562

prefix = hexnode[:length]

1565

if isvalid(prefix):

1563

if isvalid(prefix):

1566

return disambiguate(hexnode, length)

1564

return disambiguate(hexnode, length)

1567

1565

1568

def cmp(self, node, text):

1566

def cmp(self, node, text):

1569

"""compare text with a given file revision

1567

"""compare text with a given file revision

1570

1568

1571

returns True if text is different than what is stored.

1569

returns True if text is different than what is stored.

1572

"""

1570

"""

1573

p1, p2 = self.parents(node)

1571

p1, p2 = self.parents(node)

1574

return hash(text, p1, p2) != node

1572

return hash(text, p1, p2) != node

1575

1573

1576

def _cachesegment(self, offset, data):

1574

def _cachesegment(self, offset, data):

1577

"""Add a segment to the revlog cache.

1575

"""Add a segment to the revlog cache.

1578

1576

1579

Accepts an absolute offset and the data that is at that location.

1577

Accepts an absolute offset and the data that is at that location.

1580

"""

1578

"""

1581

o, d = self._chunkcache

1579

o, d = self._chunkcache

1582

# try to add to existing cache

1580

# try to add to existing cache

1583

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1581

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1584

self._chunkcache = o, d + data

1582

self._chunkcache = o, d + data

1585

else:

1583

else:

1586

self._chunkcache = offset, data

1584

self._chunkcache = offset, data

1587

1585

1588

def _readsegment(self, offset, length, df=None):

1586

def _readsegment(self, offset, length, df=None):

1589

"""Load a segment of raw data from the revlog.

1587

"""Load a segment of raw data from the revlog.

1590

1588

1591

Accepts an absolute offset, length to read, and an optional existing

1589

Accepts an absolute offset, length to read, and an optional existing

1592

file handle to read from.

1590

file handle to read from.

1593

1591

1594

If an existing file handle is passed, it will be seeked and the

1592

If an existing file handle is passed, it will be seeked and the

1595

original seek position will NOT be restored.

1593

original seek position will NOT be restored.

1596

1594

1597

Returns a str or buffer of raw byte data.

1595

Returns a str or buffer of raw byte data.

1598

"""

1596

"""

1599

# Cache data both forward and backward around the requested

1597

# Cache data both forward and backward around the requested

1600

# data, in a fixed size window. This helps speed up operations

1598

# data, in a fixed size window. This helps speed up operations

1601

# involving reading the revlog backwards.

1599

# involving reading the revlog backwards.

1602

cachesize = self._chunkcachesize

1600

cachesize = self._chunkcachesize

1603

realoffset = offset & ~(cachesize - 1)

1601

realoffset = offset & ~(cachesize - 1)

1604

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1602

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1605

- realoffset)

1603

- realoffset)

1606

with self._datareadfp(df) as df:

1604

with self._datareadfp(df) as df:

1607

df.seek(realoffset)

1605

df.seek(realoffset)

1608

d = df.read(reallength)

1606

d = df.read(reallength)

1609

self._cachesegment(realoffset, d)

1607

self._cachesegment(realoffset, d)

1610

if offset != realoffset or reallength != length:

1608

if offset != realoffset or reallength != length:

1611

return util.buffer(d, offset - realoffset, length)

1609

return util.buffer(d, offset - realoffset, length)

1612

return d

1610

return d

1613

1611

1614

def _getsegment(self, offset, length, df=None):

1612

def _getsegment(self, offset, length, df=None):

1615

"""Obtain a segment of raw data from the revlog.

1613

"""Obtain a segment of raw data from the revlog.

1616

1614

1617

Accepts an absolute offset, length of bytes to obtain, and an

1615

Accepts an absolute offset, length of bytes to obtain, and an

1618

optional file handle to the already-opened revlog. If the file

1616

optional file handle to the already-opened revlog. If the file

1619

handle is used, it's original seek position will not be preserved.

1617

handle is used, it's original seek position will not be preserved.

1620

1618

1621

Requests for data may be returned from a cache.

1619

Requests for data may be returned from a cache.

1622

1620

1623

Returns a str or a buffer instance of raw byte data.

1621

Returns a str or a buffer instance of raw byte data.

1624

"""

1622

"""

1625

o, d = self._chunkcache

1623

o, d = self._chunkcache

1626

l = len(d)

1624

l = len(d)

1627

1625

1628

# is it in the cache?

1626

# is it in the cache?

1629

cachestart = offset - o

1627

cachestart = offset - o

1630

cacheend = cachestart + length

1628

cacheend = cachestart + length

1631

if cachestart >= 0 and cacheend <= l:

1629

if cachestart >= 0 and cacheend <= l:

1632

if cachestart == 0 and cacheend == l:

1630

if cachestart == 0 and cacheend == l:

1633

return d # avoid a copy

1631

return d # avoid a copy

1634

return util.buffer(d, cachestart, cacheend - cachestart)

1632

return util.buffer(d, cachestart, cacheend - cachestart)

1635

1633

1636

return self._readsegment(offset, length, df=df)

1634

return self._readsegment(offset, length, df=df)

1637

1635

1638

def _getsegmentforrevs(self, startrev, endrev, df=None):

1636

def _getsegmentforrevs(self, startrev, endrev, df=None):

1639

"""Obtain a segment of raw data corresponding to a range of revisions.

1637

"""Obtain a segment of raw data corresponding to a range of revisions.

1640

1638

1641

Accepts the start and end revisions and an optional already-open

1639

Accepts the start and end revisions and an optional already-open

1642

file handle to be used for reading. If the file handle is read, its

1640

file handle to be used for reading. If the file handle is read, its

1643

seek position will not be preserved.

1641

seek position will not be preserved.

1644

1642

1645

Requests for data may be satisfied by a cache.

1643

Requests for data may be satisfied by a cache.

1646

1644

1647

Returns a 2-tuple of (offset, data) for the requested range of

1645

Returns a 2-tuple of (offset, data) for the requested range of

1648

revisions. Offset is the integer offset from the beginning of the

1646

revisions. Offset is the integer offset from the beginning of the

1649

revlog and data is a str or buffer of the raw byte data.

1647

revlog and data is a str or buffer of the raw byte data.

1650

1648

1651

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1649

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1652

to determine where each revision's data begins and ends.

1650

to determine where each revision's data begins and ends.

1653

"""

1651

"""

1654

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1652

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1655

# (functions are expensive).

1653

# (functions are expensive).

1656

index = self.index

1654

index = self.index

1657

istart = index[startrev]

1655

istart = index[startrev]

1658

start = int(istart[0] >> 16)

1656

start = int(istart[0] >> 16)

1659

if startrev == endrev:

1657

if startrev == endrev:

1660

end = start + istart[1]

1658

end = start + istart[1]

1661

else:

1659

else:

1662

iend = index[endrev]

1660

iend = index[endrev]

1663

end = int(iend[0] >> 16) + iend[1]

1661

end = int(iend[0] >> 16) + iend[1]

1664

1662

1665

if self._inline:

1663

if self._inline:

1666

start += (startrev + 1) * self._io.size

1664

start += (startrev + 1) * self._io.size

1667

end += (endrev + 1) * self._io.size

1665

end += (endrev + 1) * self._io.size

1668

length = end - start

1666

length = end - start

1669

1667

1670

return start, self._getsegment(start, length, df=df)

1668

return start, self._getsegment(start, length, df=df)

1671

1669

1672

def _chunk(self, rev, df=None):

1670

def _chunk(self, rev, df=None):

1673

"""Obtain a single decompressed chunk for a revision.

1671

"""Obtain a single decompressed chunk for a revision.

1674

1672

1675

Accepts an integer revision and an optional already-open file handle

1673

Accepts an integer revision and an optional already-open file handle

1676

to be used for reading. If used, the seek position of the file will not

1674

to be used for reading. If used, the seek position of the file will not

1677

be preserved.

1675

be preserved.

1678

1676

1679

Returns a str holding uncompressed data for the requested revision.

1677

Returns a str holding uncompressed data for the requested revision.

1680

"""

1678

"""

1681

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1679

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1682

1680

1683

def _chunks(self, revs, df=None):

1681

def _chunks(self, revs, df=None):

1684

"""Obtain decompressed chunks for the specified revisions.

1682

"""Obtain decompressed chunks for the specified revisions.

1685

1683

1686

Accepts an iterable of numeric revisions that are assumed to be in

1684

Accepts an iterable of numeric revisions that are assumed to be in

1687

ascending order. Also accepts an optional already-open file handle

1685

ascending order. Also accepts an optional already-open file handle

1688

to be used for reading. If used, the seek position of the file will

1686

to be used for reading. If used, the seek position of the file will

1689

not be preserved.

1687

not be preserved.

1690

1688

1691

This function is similar to calling ``self._chunk()`` multiple times,

1689

This function is similar to calling ``self._chunk()`` multiple times,

1692

but is faster.

1690

but is faster.

1693

1691

1694

Returns a list with decompressed data for each requested revision.

1692

Returns a list with decompressed data for each requested revision.

1695

"""

1693

"""

1696

if not revs:

1694

if not revs:

1697

return []

1695

return []

1698

start = self.start

1696

start = self.start

1699

length = self.length

1697

length = self.length

1700

inline = self._inline

1698

inline = self._inline

1701

iosize = self._io.size

1699

iosize = self._io.size

1702

buffer = util.buffer

1700

buffer = util.buffer

1703

1701

1704

l = []

1702

l = []

1705

ladd = l.append

1703

ladd = l.append

1706

1704

1707

if not self._withsparseread:

1705

if not self._withsparseread:

1708

slicedchunks = (revs,)

1706

slicedchunks = (revs,)

1709

else:

1707

else:

1710

slicedchunks = _slicechunk(self, revs)

1708

slicedchunks = _slicechunk(self, revs)

1711

1709

1712

for revschunk in slicedchunks:

1710

for revschunk in slicedchunks:

1713

firstrev = revschunk[0]

1711

firstrev = revschunk[0]

1714

# Skip trailing revisions with empty diff

1712

# Skip trailing revisions with empty diff

1715

for lastrev in revschunk[::-1]:

1713

for lastrev in revschunk[::-1]:

1716

if length(lastrev) != 0:

1714

if length(lastrev) != 0:

1717

break

1715

break

1718

1716

1719

try:

1717

try:

1720

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1718

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1721

except OverflowError:

1719

except OverflowError:

1722

# issue4215 - we can't cache a run of chunks greater than

1720

# issue4215 - we can't cache a run of chunks greater than

1723

# 2G on Windows

1721

# 2G on Windows

1724

return [self._chunk(rev, df=df) for rev in revschunk]

1722

return [self._chunk(rev, df=df) for rev in revschunk]

1725

1723

1726

decomp = self.decompress

1724

decomp = self.decompress

1727

for rev in revschunk:

1725

for rev in revschunk:

1728

chunkstart = start(rev)

1726

chunkstart = start(rev)

1729

if inline:

1727

if inline:

1730

chunkstart += (rev + 1) * iosize

1728

chunkstart += (rev + 1) * iosize

1731

chunklength = length(rev)

1729

chunklength = length(rev)

1732

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1730

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1733

1731

1734

return l

1732

return l

1735

1733

1736

def _chunkclear(self):

1734

def _chunkclear(self):

1737

"""Clear the raw chunk cache."""

1735

"""Clear the raw chunk cache."""

1738

self._chunkcache = (0, '')

1736

self._chunkcache = (0, '')

1739

1737

1740

def deltaparent(self, rev):

1738

def deltaparent(self, rev):

1741

"""return deltaparent of the given revision"""

1739

"""return deltaparent of the given revision"""

1742

base = self.index[rev][3]

1740

base = self.index[rev][3]

1743

if base == rev:

1741

if base == rev:

1744

return nullrev

1742

return nullrev

1745

elif self._generaldelta:

1743

elif self._generaldelta:

1746

return base

1744

return base

1747

else:

1745

else:

1748

return rev - 1

1746

return rev - 1

1749

1747

1750

def revdiff(self, rev1, rev2):

1748

def revdiff(self, rev1, rev2):

1751

"""return or calculate a delta between two revisions

1749

"""return or calculate a delta between two revisions

1752

1750

1753

The delta calculated is in binary form and is intended to be written to

1751

The delta calculated is in binary form and is intended to be written to

1754

revlog data directly. So this function needs raw revision data.

1752

revlog data directly. So this function needs raw revision data.

1755

"""

1753

"""

1756

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1754

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1757

return bytes(self._chunk(rev2))

1755

return bytes(self._chunk(rev2))

1758

1756

1759

return mdiff.textdiff(self.revision(rev1, raw=True),

1757

return mdiff.textdiff(self.revision(rev1, raw=True),

1760

self.revision(rev2, raw=True))

1758

self.revision(rev2, raw=True))

1761

1759

1762

def revision(self, nodeorrev, _df=None, raw=False):

1760

def revision(self, nodeorrev, _df=None, raw=False):

1763

"""return an uncompressed revision of a given node or revision

1761

"""return an uncompressed revision of a given node or revision

1764

number.

1762

number.

1765

1763

1766

_df - an existing file handle to read from. (internal-only)

1764

_df - an existing file handle to read from. (internal-only)

1767

raw - an optional argument specifying if the revision data is to be

1765

raw - an optional argument specifying if the revision data is to be

1768

treated as raw data when applying flag transforms. 'raw' should be set

1766

treated as raw data when applying flag transforms. 'raw' should be set

1769

to True when generating changegroups or in debug commands.

1767

to True when generating changegroups or in debug commands.

1770

"""

1768

"""

1771

if isinstance(nodeorrev, int):

1769

if isinstance(nodeorrev, int):

1772

rev = nodeorrev

1770

rev = nodeorrev

1773

node = self.node(rev)

1771

node = self.node(rev)

1774

else:

1772

else:

1775

node = nodeorrev

1773

node = nodeorrev

1776

rev = None

1774

rev = None

1777

1775

1778

cachedrev = None

1776

cachedrev = None

1779

flags = None

1777

flags = None

1780

rawtext = None

1778

rawtext = None

1781

if node == nullid:

1779

if node == nullid:

1782

return ""

1780

return ""

1783

if self._cache:

1781

if self._cache:

1784

if self._cache[0] == node:

1782

if self._cache[0] == node:

1785

# _cache only stores rawtext

1783

# _cache only stores rawtext

1786

if raw:

1784

if raw:

1787

return self._cache[2]

1785

return self._cache[2]

1788

# duplicated, but good for perf

1786

# duplicated, but good for perf

1789

if rev is None:

1787

if rev is None:

1790

rev = self.rev(node)

1788

rev = self.rev(node)

1791

if flags is None:

1789

if flags is None:

1792

flags = self.flags(rev)

1790

flags = self.flags(rev)

1793

# no extra flags set, no flag processor runs, text = rawtext

1791

# no extra flags set, no flag processor runs, text = rawtext

1794

if flags == REVIDX_DEFAULT_FLAGS:

1792

if flags == REVIDX_DEFAULT_FLAGS:

1795

return self._cache[2]

1793

return self._cache[2]

1796

# rawtext is reusable. need to run flag processor

1794

# rawtext is reusable. need to run flag processor

1797

rawtext = self._cache[2]

1795

rawtext = self._cache[2]

1798

1796

1799

cachedrev = self._cache[1]

1797

cachedrev = self._cache[1]

1800

1798

1801

# look up what we need to read

1799

# look up what we need to read

1802

if rawtext is None:

1800

if rawtext is None:

1803

if rev is None:

1801

if rev is None:

1804

rev = self.rev(node)

1802

rev = self.rev(node)

1805

1803

1806

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1804

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1807

if stopped:

1805

if stopped:

1808

rawtext = self._cache[2]

1806

rawtext = self._cache[2]

1809

1807

1810

# drop cache to save memory

1808

# drop cache to save memory

1811

self._cache = None

1809

self._cache = None

1812

1810

1813

bins = self._chunks(chain, df=_df)

1811

bins = self._chunks(chain, df=_df)

1814

if rawtext is None:

1812

if rawtext is None:

1815

rawtext = bytes(bins[0])

1813

rawtext = bytes(bins[0])

1816

bins = bins[1:]

1814

bins = bins[1:]

1817

1815

1818

rawtext = mdiff.patches(rawtext, bins)

1816

rawtext = mdiff.patches(rawtext, bins)

1819

self._cache = (node, rev, rawtext)

1817

self._cache = (node, rev, rawtext)

1820

1818

1821

if flags is None:

1819

if flags is None:

1822

if rev is None:

1820

if rev is None:

1823

rev = self.rev(node)

1821

rev = self.rev(node)

1824

flags = self.flags(rev)

1822

flags = self.flags(rev)

1825

1823

1826

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1824

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1827

if validatehash:

1825

if validatehash:

1828

self.checkhash(text, node, rev=rev)

1826

self.checkhash(text, node, rev=rev)

1829

1827

1830

return text

1828

return text

1831

1829

1832

def hash(self, text, p1, p2):

1830

def hash(self, text, p1, p2):

1833

"""Compute a node hash.

1831

"""Compute a node hash.

1834

1832

1835

Available as a function so that subclasses can replace the hash

1833

Available as a function so that subclasses can replace the hash

1836

as needed.

1834

as needed.

1837

"""

1835

"""

1838

return hash(text, p1, p2)

1836

return hash(text, p1, p2)

1839

1837

1840

def _processflags(self, text, flags, operation, raw=False):

1838

def _processflags(self, text, flags, operation, raw=False):

1841

"""Inspect revision data flags and applies transforms defined by

1839

"""Inspect revision data flags and applies transforms defined by

1842

registered flag processors.

1840

registered flag processors.

1843

1841

1844

``text`` - the revision data to process

1842

``text`` - the revision data to process

1845

``flags`` - the revision flags

1843

``flags`` - the revision flags

1846

``operation`` - the operation being performed (read or write)

1844

``operation`` - the operation being performed (read or write)

1847

``raw`` - an optional argument describing if the raw transform should be

1845

``raw`` - an optional argument describing if the raw transform should be

1848

applied.

1846

applied.

1849

1847

1850

This method processes the flags in the order (or reverse order if

1848

This method processes the flags in the order (or reverse order if

1851

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1849

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1852

flag processors registered for present flags. The order of flags defined

1850

flag processors registered for present flags. The order of flags defined

1853

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1851

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1854

1852

1855

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1853

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1856

processed text and ``validatehash`` is a bool indicating whether the

1854

processed text and ``validatehash`` is a bool indicating whether the

1857

returned text should be checked for hash integrity.

1855

returned text should be checked for hash integrity.

1858

1856

1859

Note: If the ``raw`` argument is set, it has precedence over the

1857

Note: If the ``raw`` argument is set, it has precedence over the

1860

operation and will only update the value of ``validatehash``.

1858

operation and will only update the value of ``validatehash``.

1861

"""

1859

"""

1862

# fast path: no flag processors will run

1860

# fast path: no flag processors will run

1863

if flags == 0:

1861

if flags == 0:

1864

return text, True

1862

return text, True

1865

if not operation in ('read', 'write'):

1863

if not operation in ('read', 'write'):

1866

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1864

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1867

# Check all flags are known.

1865

# Check all flags are known.

1868

if flags & ~REVIDX_KNOWN_FLAGS:

1866

if flags & ~REVIDX_KNOWN_FLAGS:

1869

raise RevlogError(_("incompatible revision flag '%#x'") %

1867

raise RevlogError(_("incompatible revision flag '%#x'") %

1870

(flags & ~REVIDX_KNOWN_FLAGS))

1868

(flags & ~REVIDX_KNOWN_FLAGS))

1871

validatehash = True

1869

validatehash = True

1872

# Depending on the operation (read or write), the order might be

1870

# Depending on the operation (read or write), the order might be

1873

# reversed due to non-commutative transforms.

1871

# reversed due to non-commutative transforms.

1874

orderedflags = REVIDX_FLAGS_ORDER

1872

orderedflags = REVIDX_FLAGS_ORDER

1875

if operation == 'write':

1873

if operation == 'write':

1876

orderedflags = reversed(orderedflags)

1874

orderedflags = reversed(orderedflags)

1877

1875

1878

for flag in orderedflags:

1876

for flag in orderedflags:

1879

# If a flagprocessor has been registered for a known flag, apply the

1877

# If a flagprocessor has been registered for a known flag, apply the

1880

# related operation transform and update result tuple.

1878

# related operation transform and update result tuple.

1881

if flag & flags:

1879

if flag & flags:

1882

vhash = True

1880

vhash = True

1883

1881

1884

if flag not in _flagprocessors:

1882

if flag not in _flagprocessors:

1885

message = _("missing processor for flag '%#x'") % (flag)

1883

message = _("missing processor for flag '%#x'") % (flag)

1886

raise RevlogError(message)

1884

raise RevlogError(message)

1887

1885

1888

processor = _flagprocessors[flag]

1886

processor = _flagprocessors[flag]

1889

if processor is not None:

1887

if processor is not None:

1890

readtransform, writetransform, rawtransform = processor

1888

readtransform, writetransform, rawtransform = processor

1891

1889

1892

if raw:

1890

if raw:

1893

vhash = rawtransform(self, text)

1891

vhash = rawtransform(self, text)

1894

elif operation == 'read':

1892

elif operation == 'read':

1895

text, vhash = readtransform(self, text)

1893

text, vhash = readtransform(self, text)

1896

else: # write operation

1894

else: # write operation

1897

text, vhash = writetransform(self, text)

1895

text, vhash = writetransform(self, text)

1898

validatehash = validatehash and vhash

1896

validatehash = validatehash and vhash

1899

1897

1900

return text, validatehash

1898

return text, validatehash

1901

1899

1902

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1900

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1903

"""Check node hash integrity.

1901

"""Check node hash integrity.

1904

1902

1905

Available as a function so that subclasses can extend hash mismatch

1903

Available as a function so that subclasses can extend hash mismatch

1906

behaviors as needed.

1904

behaviors as needed.

1907

"""

1905

"""

1908

try:

1906

try:

1909

if p1 is None and p2 is None:

1907

if p1 is None and p2 is None:

1910

p1, p2 = self.parents(node)

1908

p1, p2 = self.parents(node)

1911

if node != self.hash(text, p1, p2):

1909

if node != self.hash(text, p1, p2):

1912

revornode = rev

1910

revornode = rev

1913

if revornode is None:

1911

if revornode is None:

1914

revornode = templatefilters.short(hex(node))

1912

revornode = templatefilters.short(hex(node))

1915

raise RevlogError(_("integrity check failed on %s:%s")

1913

raise RevlogError(_("integrity check failed on %s:%s")

1916

% (self.indexfile, pycompat.bytestr(revornode)))

1914

% (self.indexfile, pycompat.bytestr(revornode)))

1917

except RevlogError:

1915

except RevlogError:

1918

if self._censorable and _censoredtext(text):

1916

if self._censorable and _censoredtext(text):

1919

raise error.CensoredNodeError(self.indexfile, node, text)

1917

raise error.CensoredNodeError(self.indexfile, node, text)

1920

raise

1918

raise

1921

1919

1922

def _enforceinlinesize(self, tr, fp=None):

1920

def _enforceinlinesize(self, tr, fp=None):

1923

"""Check if the revlog is too big for inline and convert if so.

1921

"""Check if the revlog is too big for inline and convert if so.

1924

1922

1925

This should be called after revisions are added to the revlog. If the

1923

This should be called after revisions are added to the revlog. If the

1926

revlog has grown too large to be an inline revlog, it will convert it

1924

revlog has grown too large to be an inline revlog, it will convert it

1927

to use multiple index and data files.

1925

to use multiple index and data files.

1928

"""

1926

"""

1929

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1927

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1930

return

1928

return

1931

1929

1932

trinfo = tr.find(self.indexfile)

1930

trinfo = tr.find(self.indexfile)

1933

if trinfo is None:

1931

if trinfo is None:

1934

raise RevlogError(_("%s not found in the transaction")

1932

raise RevlogError(_("%s not found in the transaction")

1935

% self.indexfile)

1933

% self.indexfile)

1936

1934

1937

trindex = trinfo[2]

1935

trindex = trinfo[2]

1938

if trindex is not None:

1936

if trindex is not None:

1939

dataoff = self.start(trindex)

1937

dataoff = self.start(trindex)

1940

else:

1938

else:

1941

# revlog was stripped at start of transaction, use all leftover data

1939

# revlog was stripped at start of transaction, use all leftover data

1942

trindex = len(self) - 1

1940

trindex = len(self) - 1

1943

dataoff = self.end(-2)

1941

dataoff = self.end(-2)

1944

1942

1945

tr.add(self.datafile, dataoff)

1943

tr.add(self.datafile, dataoff)

1946

1944

1947

if fp:

1945

if fp:

1948

fp.flush()

1946

fp.flush()

1949

fp.close()

1947

fp.close()

1950

1948

1951

with self._datafp('w') as df:

1949

with self._datafp('w') as df:

1952

for r in self:

1950

for r in self:

1953

df.write(self._getsegmentforrevs(r, r)[1])

1951

df.write(self._getsegmentforrevs(r, r)[1])

1954

1952

1955

with self._indexfp('w') as fp:

1953

with self._indexfp('w') as fp:

1956

self.version &= ~FLAG_INLINE_DATA

1954

self.version &= ~FLAG_INLINE_DATA

1957

self._inline = False

1955

self._inline = False

1958

io = self._io

1956

io = self._io

1959

for i in self:

1957

for i in self:

1960

e = io.packentry(self.index[i], self.node, self.version, i)

1958

e = io.packentry(self.index[i], self.node, self.version, i)

1961

fp.write(e)

1959

fp.write(e)

1962

1960

1963

# the temp file replace the real index when we exit the context

1961

# the temp file replace the real index when we exit the context

1964

# manager

1962

# manager

1965

1963

1966

tr.replace(self.indexfile, trindex * self._io.size)

1964

tr.replace(self.indexfile, trindex * self._io.size)

1967

self._chunkclear()

1965

self._chunkclear()

1968

1966

1969

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1967

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1970

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1968

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1971

"""add a revision to the log

1969

"""add a revision to the log

1972

1970

1973

text - the revision data to add

1971

text - the revision data to add

1974

transaction - the transaction object used for rollback

1972

transaction - the transaction object used for rollback

1975

link - the linkrev data to add

1973

link - the linkrev data to add

1976

p1, p2 - the parent nodeids of the revision

1974

p1, p2 - the parent nodeids of the revision

1977

cachedelta - an optional precomputed delta

1975

cachedelta - an optional precomputed delta

1978

node - nodeid of revision; typically node is not specified, and it is

1976

node - nodeid of revision; typically node is not specified, and it is

1979

computed by default as hash(text, p1, p2), however subclasses might

1977

computed by default as hash(text, p1, p2), however subclasses might

1980

use different hashing method (and override checkhash() in such case)

1978

use different hashing method (and override checkhash() in such case)

1981

flags - the known flags to set on the revision

1979

flags - the known flags to set on the revision

1982

deltacomputer - an optional _deltacomputer instance shared between

1980

deltacomputer - an optional _deltacomputer instance shared between

1983

multiple calls

1981

multiple calls

1984

"""

1982

"""

1985

if link == nullrev:

1983

if link == nullrev:

1986

raise RevlogError(_("attempted to add linkrev -1 to %s")

1984

raise RevlogError(_("attempted to add linkrev -1 to %s")

1987

% self.indexfile)

1985

% self.indexfile)

1988

1986

1989

if flags:

1987

if flags:

1990

node = node or self.hash(text, p1, p2)

1988

node = node or self.hash(text, p1, p2)

1991

1989

1992

rawtext, validatehash = self._processflags(text, flags, 'write')

1990

rawtext, validatehash = self._processflags(text, flags, 'write')

1993

1991

1994

# If the flag processor modifies the revision data, ignore any provided

1992

# If the flag processor modifies the revision data, ignore any provided

1995

# cachedelta.

1993

# cachedelta.

1996

if rawtext != text:

1994

if rawtext != text:

1997

cachedelta = None

1995

cachedelta = None

1998

1996

1999

if len(rawtext) > _maxentrysize:

1997

if len(rawtext) > _maxentrysize:

2000

raise RevlogError(

1998

raise RevlogError(

2001

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1999

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2002

% (self.indexfile, len(rawtext)))

2000

% (self.indexfile, len(rawtext)))

2003

2001

2004

node = node or self.hash(rawtext, p1, p2)

2002

node = node or self.hash(rawtext, p1, p2)

2005

if node in self.nodemap:

2003

if node in self.nodemap:

2006

return node

2004

return node

2007

2005

2008

if validatehash:

2006

if validatehash:

2009

self.checkhash(rawtext, node, p1=p1, p2=p2)

2007

self.checkhash(rawtext, node, p1=p1, p2=p2)

2010

2008

2011

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2009

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2012

flags, cachedelta=cachedelta,

2010

flags, cachedelta=cachedelta,

2013

deltacomputer=deltacomputer)

2011

deltacomputer=deltacomputer)

2014

2012

2015

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2013

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2016

cachedelta=None, deltacomputer=None):

2014

cachedelta=None, deltacomputer=None):

2017

"""add a raw revision with known flags, node and parents

2015

"""add a raw revision with known flags, node and parents

2018

useful when reusing a revision not stored in this revlog (ex: received

2016

useful when reusing a revision not stored in this revlog (ex: received

2019

over wire, or read from an external bundle).

2017

over wire, or read from an external bundle).

2020

"""

2018

"""

2021

dfh = None

2019

dfh = None

2022

if not self._inline:

2020

if not self._inline:

2023

dfh = self._datafp("a+")

2021

dfh = self._datafp("a+")

2024

ifh = self._indexfp("a+")

2022

ifh = self._indexfp("a+")

2025

try:

2023

try:

2026

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2024

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2027

flags, cachedelta, ifh, dfh,

2025

flags, cachedelta, ifh, dfh,

2028

deltacomputer=deltacomputer)

2026

deltacomputer=deltacomputer)

2029

finally:

2027

finally:

2030

if dfh:

2028

if dfh:

2031

dfh.close()

2029

dfh.close()

2032

ifh.close()

2030

ifh.close()

2033

2031

2034

def compress(self, data):

2032

def compress(self, data):

2035

"""Generate a possibly-compressed representation of data."""

2033

"""Generate a possibly-compressed representation of data."""

2036

if not data:

2034

if not data:

2037

return '', data

2035

return '', data

2038

2036

2039

compressed = self._compressor.compress(data)

2037

compressed = self._compressor.compress(data)

2040

2038

2041

if compressed:

2039

if compressed:

2042

# The revlog compressor added the header in the returned data.

2040

# The revlog compressor added the header in the returned data.

2043

return '', compressed

2041

return '', compressed

2044

2042

2045

if data[0:1] == '\0':

2043

if data[0:1] == '\0':

2046

return '', data

2044

return '', data

2047

return 'u', data

2045

return 'u', data

2048

2046

2049

def decompress(self, data):

2047

def decompress(self, data):

2050

"""Decompress a revlog chunk.

2048

"""Decompress a revlog chunk.

2051

2049

2052

The chunk is expected to begin with a header identifying the

2050

The chunk is expected to begin with a header identifying the

2053

format type so it can be routed to an appropriate decompressor.

2051

format type so it can be routed to an appropriate decompressor.

2054

"""

2052

"""

2055

if not data:

2053

if not data:

2056

return data

2054

return data

2057

2055

2058

# Revlogs are read much more frequently than they are written and many

2056

# Revlogs are read much more frequently than they are written and many

2059

# chunks only take microseconds to decompress, so performance is

2057

# chunks only take microseconds to decompress, so performance is

2060

# important here.

2058

# important here.

2061

#

2059

#

2062

# We can make a few assumptions about revlogs:

2060

# We can make a few assumptions about revlogs:

2063

#

2061

#

2064

# 1) the majority of chunks will be compressed (as opposed to inline

2062

# 1) the majority of chunks will be compressed (as opposed to inline

2065

# raw data).

2063

# raw data).

2066

# 2) decompressing *any* data will likely by at least 10x slower than

2064

# 2) decompressing *any* data will likely by at least 10x slower than

2067

# returning raw inline data.

2065

# returning raw inline data.

2068

# 3) we want to prioritize common and officially supported compression

2066

# 3) we want to prioritize common and officially supported compression

2069

# engines

2067

# engines

2070

#

2068

#

2071

# It follows that we want to optimize for "decompress compressed data

2069

# It follows that we want to optimize for "decompress compressed data

2072

# when encoded with common and officially supported compression engines"

2070

# when encoded with common and officially supported compression engines"

2073

# case over "raw data" and "data encoded by less common or non-official

2071

# case over "raw data" and "data encoded by less common or non-official

2074

# compression engines." That is why we have the inline lookup first

2072

# compression engines." That is why we have the inline lookup first

2075

# followed by the compengines lookup.

2073

# followed by the compengines lookup.

2076

#

2074

#

2077

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2075

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2078

# compressed chunks. And this matters for changelog and manifest reads.

2076

# compressed chunks. And this matters for changelog and manifest reads.

2079

t = data[0:1]

2077

t = data[0:1]

2080

2078

2081

if t == 'x':

2079

if t == 'x':

2082

try:

2080

try:

2083

return _zlibdecompress(data)

2081

return _zlibdecompress(data)

2084

except zlib.error as e:

2082

except zlib.error as e:

2085

raise RevlogError(_('revlog decompress error: %s') %

2083

raise RevlogError(_('revlog decompress error: %s') %

2086

stringutil.forcebytestr(e))

2084

stringutil.forcebytestr(e))

2087

# '\0' is more common than 'u' so it goes first.

2085

# '\0' is more common than 'u' so it goes first.

2088

elif t == '\0':

2086

elif t == '\0':

2089

return data

2087

return data

2090

elif t == 'u':

2088

elif t == 'u':

2091

return util.buffer(data, 1)

2089

return util.buffer(data, 1)

2092

2090

2093

try:

2091

try:

2094

compressor = self._decompressors[t]

2092

compressor = self._decompressors[t]

2095

except KeyError:

2093

except KeyError:

2096

try:

2094

try:

2097

engine = util.compengines.forrevlogheader(t)

2095

engine = util.compengines.forrevlogheader(t)

2098

compressor = engine.revlogcompressor()

2096

compressor = engine.revlogcompressor()

2099

self._decompressors[t] = compressor

2097

self._decompressors[t] = compressor

2100

except KeyError:

2098

except KeyError:

2101

raise RevlogError(_('unknown compression type %r') % t)

2099

raise RevlogError(_('unknown compression type %r') % t)

2102

2100

2103

return compressor.decompress(data)

2101

return compressor.decompress(data)

2104

2102

2105

def _isgooddeltainfo(self, deltainfo, revinfo):

2103

def _isgooddeltainfo(self, deltainfo, revinfo):

2106

"""Returns True if the given delta is good. Good means that it is within

2104

"""Returns True if the given delta is good. Good means that it is within

2107

the disk span, disk size, and chain length bounds that we know to be

2105

the disk span, disk size, and chain length bounds that we know to be

2108

performant."""

2106

performant."""

2109

if deltainfo is None:

2107

if deltainfo is None:

2110

return False

2108

return False

2111

2109

2112

# - 'deltainfo.distance' is the distance from the base revision --

2110

# - 'deltainfo.distance' is the distance from the base revision --

2113

# bounding it limits the amount of I/O we need to do.

2111

# bounding it limits the amount of I/O we need to do.

2114

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2112

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2115

# deltas we need to apply -- bounding it limits the amount of CPU

2113

# deltas we need to apply -- bounding it limits the amount of CPU

2116

# we consume.

2114

# we consume.

2117

2115

2118

textlen = revinfo.textlen

2116

textlen = revinfo.textlen

2119

defaultmax = textlen * 4

2117

defaultmax = textlen * 4

2120

maxdist = self._maxdeltachainspan

2118

maxdist = self._maxdeltachainspan

2121

if not maxdist:

2119

if not maxdist:

2122

maxdist = deltainfo.distance # ensure the conditional pass

2120

maxdist = deltainfo.distance # ensure the conditional pass

2123

maxdist = max(maxdist, defaultmax)

2121

maxdist = max(maxdist, defaultmax)

2124

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2122

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2125

deltainfo.compresseddeltalen > textlen * 2 or

2123

deltainfo.compresseddeltalen > textlen * 2 or

2126

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2124

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2127

return False

2125

return False

2128

2126

2129

return True

2127

return True

2130

2128

2131

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2129

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2132

cachedelta, ifh, dfh, alwayscache=False,

2130

cachedelta, ifh, dfh, alwayscache=False,

2133

deltacomputer=None):

2131

deltacomputer=None):

2134

"""internal function to add revisions to the log

2132

"""internal function to add revisions to the log

2135

2133

2136

see addrevision for argument descriptions.

2134

see addrevision for argument descriptions.

2137

2135

2138

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2136

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2139

2137

2140

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2138

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2141

be used.

2139

be used.

2142

2140

2143

invariants:

2141

invariants:

2144

- rawtext is optional (can be None); if not set, cachedelta must be set.

2142

- rawtext is optional (can be None); if not set, cachedelta must be set.

2145

if both are set, they must correspond to each other.

2143

if both are set, they must correspond to each other.

2146

"""

2144

"""

2147

if node == nullid:

2145

if node == nullid:

2148

raise RevlogError(_("%s: attempt to add null revision") %

2146

raise RevlogError(_("%s: attempt to add null revision") %

2149

(self.indexfile))

2147

(self.indexfile))

2150

if node == wdirid or node in wdirfilenodeids:

2148

if node == wdirid or node in wdirfilenodeids:

2151

raise RevlogError(_("%s: attempt to add wdir revision") %

2149

raise RevlogError(_("%s: attempt to add wdir revision") %

2152

(self.indexfile))

2150

(self.indexfile))

2153

2151

2154

if self._inline:

2152

if self._inline:

2155

fh = ifh

2153

fh = ifh

2156

else:

2154

else:

2157

fh = dfh

2155

fh = dfh

2158

2156

2159

btext = [rawtext]

2157

btext = [rawtext]

2160

2158

2161

curr = len(self)

2159

curr = len(self)

2162

prev = curr - 1

2160

prev = curr - 1

2163

offset = self.end(prev)

2161

offset = self.end(prev)

2164

p1r, p2r = self.rev(p1), self.rev(p2)

2162

p1r, p2r = self.rev(p1), self.rev(p2)

2165

2163

2166

# full versions are inserted when the needed deltas

2164

# full versions are inserted when the needed deltas

2167

# become comparable to the uncompressed text

2165

# become comparable to the uncompressed text

2168

if rawtext is None:

2166

if rawtext is None:

2169

# need rawtext size, before changed by flag processors, which is

2167

# need rawtext size, before changed by flag processors, which is

2170

# the non-raw size. use revlog explicitly to avoid filelog's extra

2168

# the non-raw size. use revlog explicitly to avoid filelog's extra

2171

# logic that might remove metadata size.

2169

# logic that might remove metadata size.

2172

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2170

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2173

cachedelta[1])

2171

cachedelta[1])

2174

else:

2172

else:

2175

textlen = len(rawtext)

2173

textlen = len(rawtext)

2176

2174

2177

if deltacomputer is None:

2175

if deltacomputer is None:

2178

deltacomputer = _deltacomputer(self)

2176

deltacomputer = _deltacomputer(self)

2179

2177

2180

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2178

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2181

2179

2182

# no delta for flag processor revision (see "candelta" for why)

2180

# no delta for flag processor revision (see "candelta" for why)

2183

# not calling candelta since only one revision needs test, also to

2181

# not calling candelta since only one revision needs test, also to

2184

# avoid overhead fetching flags again.

2182

# avoid overhead fetching flags again.

2185

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2183

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2186

deltainfo = None

2184

deltainfo = None

2187

else:

2185

else:

2188

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2186

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2189

2187

2190

if deltainfo is not None:

2188

if deltainfo is not None:

2191

base = deltainfo.base

2189

base = deltainfo.base

2192

chainbase = deltainfo.chainbase

2190

chainbase = deltainfo.chainbase

2193

data = deltainfo.data

2191

data = deltainfo.data

2194

l = deltainfo.deltalen

2192

l = deltainfo.deltalen

2195

else:

2193

else:

2196

rawtext = deltacomputer.buildtext(revinfo, fh)

2194

rawtext = deltacomputer.buildtext(revinfo, fh)

2197

data = self.compress(rawtext)

2195

data = self.compress(rawtext)

2198

l = len(data[1]) + len(data[0])

2196

l = len(data[1]) + len(data[0])

2199

base = chainbase = curr

2197

base = chainbase = curr

2200

2198

2201

e = (offset_type(offset, flags), l, textlen,

2199

e = (offset_type(offset, flags), l, textlen,

2202

base, link, p1r, p2r, node)

2200

base, link, p1r, p2r, node)

2203

self.index.insert(-1, e)

2201

self.index.insert(-1, e)

2204

self.nodemap[node] = curr

2202

self.nodemap[node] = curr

2205

2203

2206

entry = self._io.packentry(e, self.node, self.version, curr)

2204

entry = self._io.packentry(e, self.node, self.version, curr)

2207

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2205

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2208

2206

2209

if alwayscache and rawtext is None:

2207

if alwayscache and rawtext is None:

2210

rawtext = deltacomputer._buildtext(revinfo, fh)

2208

rawtext = deltacomputer._buildtext(revinfo, fh)

2211

2209

2212

if type(rawtext) == bytes: # only accept immutable objects

2210

if type(rawtext) == bytes: # only accept immutable objects

2213

self._cache = (node, curr, rawtext)

2211

self._cache = (node, curr, rawtext)

2214

self._chainbasecache[curr] = chainbase

2212

self._chainbasecache[curr] = chainbase

2215

return node

2213

return node

2216

2214

2217

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2215

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2218

# Files opened in a+ mode have inconsistent behavior on various

2216

# Files opened in a+ mode have inconsistent behavior on various

2219

# platforms. Windows requires that a file positioning call be made

2217

# platforms. Windows requires that a file positioning call be made

2220

# when the file handle transitions between reads and writes. See

2218

# when the file handle transitions between reads and writes. See

2221

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2219

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2222

# platforms, Python or the platform itself can be buggy. Some versions

2220

# platforms, Python or the platform itself can be buggy. Some versions

2223

# of Solaris have been observed to not append at the end of the file

2221

# of Solaris have been observed to not append at the end of the file

2224

# if the file was seeked to before the end. See issue4943 for more.

2222

# if the file was seeked to before the end. See issue4943 for more.

2225

#

2223

#

2226

# We work around this issue by inserting a seek() before writing.

2224

# We work around this issue by inserting a seek() before writing.

2227

# Note: This is likely not necessary on Python 3.

2225

# Note: This is likely not necessary on Python 3.

2228

ifh.seek(0, os.SEEK_END)

2226

ifh.seek(0, os.SEEK_END)

2229

if dfh:

2227

if dfh:

2230

dfh.seek(0, os.SEEK_END)

2228

dfh.seek(0, os.SEEK_END)

2231

2229

2232

curr = len(self) - 1

2230

curr = len(self) - 1

2233

if not self._inline:

2231

if not self._inline:

2234

transaction.add(self.datafile, offset)

2232

transaction.add(self.datafile, offset)

2235

transaction.add(self.indexfile, curr * len(entry))

2233

transaction.add(self.indexfile, curr * len(entry))

2236

if data[0]:

2234

if data[0]:

2237

dfh.write(data[0])

2235

dfh.write(data[0])

2238

dfh.write(data[1])

2236

dfh.write(data[1])

2239

ifh.write(entry)

2237

ifh.write(entry)

2240

else:

2238

else:

2241

offset += curr * self._io.size

2239

offset += curr * self._io.size

2242

transaction.add(self.indexfile, offset, curr)

2240

transaction.add(self.indexfile, offset, curr)

2243

ifh.write(entry)

2241

ifh.write(entry)

2244

ifh.write(data[0])

2242

ifh.write(data[0])

2245

ifh.write(data[1])

2243

ifh.write(data[1])

2246

self._enforceinlinesize(transaction, ifh)

2244

self._enforceinlinesize(transaction, ifh)

2247

2245

2248

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2246

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2249

"""

2247

"""

2250

add a delta group

2248

add a delta group

2251

2249

2252

given a set of deltas, add them to the revision log. the

2250

given a set of deltas, add them to the revision log. the

2253

first delta is against its parent, which should be in our

2251

first delta is against its parent, which should be in our

2254

log, the rest are against the previous delta.

2252

log, the rest are against the previous delta.

2255

2253

2256

If ``addrevisioncb`` is defined, it will be called with arguments of

2254

If ``addrevisioncb`` is defined, it will be called with arguments of

2257

this revlog and the node that was added.

2255

this revlog and the node that was added.

2258

"""

2256

"""

2259

2257

2260

nodes = []

2258

nodes = []

2261

2259

2262

r = len(self)

2260

r = len(self)

2263

end = 0

2261

end = 0

2264

if r:

2262

if r:

2265

end = self.end(r - 1)

2263

end = self.end(r - 1)

2266

ifh = self._indexfp("a+")

2264

ifh = self._indexfp("a+")

2267

isize = r * self._io.size

2265

isize = r * self._io.size

2268

if self._inline:

2266

if self._inline:

2269

transaction.add(self.indexfile, end + isize, r)

2267

transaction.add(self.indexfile, end + isize, r)

2270

dfh = None

2268

dfh = None

2271

else:

2269

else:

2272

transaction.add(self.indexfile, isize, r)

2270

transaction.add(self.indexfile, isize, r)

2273

transaction.add(self.datafile, end)

2271

transaction.add(self.datafile, end)

2274

dfh = self._datafp("a+")

2272

dfh = self._datafp("a+")

2275

def flush():

2273

def flush():

2276

if dfh:

2274

if dfh:

2277

dfh.flush()

2275

dfh.flush()

2278

ifh.flush()

2276

ifh.flush()

2279

try:

2277

try:

2280

deltacomputer = _deltacomputer(self)

2278

deltacomputer = _deltacomputer(self)

2281

# loop through our set of deltas

2279

# loop through our set of deltas

2282

for data in deltas:

2280

for data in deltas:

2283

node, p1, p2, linknode, deltabase, delta, flags = data

2281

node, p1, p2, linknode, deltabase, delta, flags = data

2284

link = linkmapper(linknode)

2282

link = linkmapper(linknode)

2285

flags = flags or REVIDX_DEFAULT_FLAGS

2283

flags = flags or REVIDX_DEFAULT_FLAGS

2286

2284

2287

nodes.append(node)

2285

nodes.append(node)

2288

2286

2289

if node in self.nodemap:

2287

if node in self.nodemap:

2290

# this can happen if two branches make the same change

2288

# this can happen if two branches make the same change

2291

continue

2289

continue

2292

2290

2293

for p in (p1, p2):

2291

for p in (p1, p2):

2294

if p not in self.nodemap:

2292

if p not in self.nodemap:

2295

raise LookupError(p, self.indexfile,

2293

raise LookupError(p, self.indexfile,

2296

_('unknown parent'))

2294

_('unknown parent'))

2297

2295

2298

if deltabase not in self.nodemap:

2296

if deltabase not in self.nodemap:

2299

raise LookupError(deltabase, self.indexfile,

2297

raise LookupError(deltabase, self.indexfile,

2300

_('unknown delta base'))

2298

_('unknown delta base'))

2301

2299

2302

baserev = self.rev(deltabase)

2300

baserev = self.rev(deltabase)

2303

2301

2304

if baserev != nullrev and self.iscensored(baserev):

2302

if baserev != nullrev and self.iscensored(baserev):

2305

# if base is censored, delta must be full replacement in a

2303

# if base is censored, delta must be full replacement in a

2306

# single patch operation

2304

# single patch operation

2307

hlen = struct.calcsize(">lll")

2305

hlen = struct.calcsize(">lll")

2308

oldlen = self.rawsize(baserev)

2306

oldlen = self.rawsize(baserev)

2309

newlen = len(delta) - hlen

2307

newlen = len(delta) - hlen

2310

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2308

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2311

raise error.CensoredBaseError(self.indexfile,

2309

raise error.CensoredBaseError(self.indexfile,

2312

self.node(baserev))

2310

self.node(baserev))

2313

2311

2314

if not flags and self._peek_iscensored(baserev, delta, flush):

2312

if not flags and self._peek_iscensored(baserev, delta, flush):

2315

flags |= REVIDX_ISCENSORED

2313

flags |= REVIDX_ISCENSORED

2316

2314

2317

# We assume consumers of addrevisioncb will want to retrieve

2315

# We assume consumers of addrevisioncb will want to retrieve

2318

# the added revision, which will require a call to

2316

# the added revision, which will require a call to

2319

# revision(). revision() will fast path if there is a cache

2317

# revision(). revision() will fast path if there is a cache

2320

# hit. So, we tell _addrevision() to always cache in this case.

2318

# hit. So, we tell _addrevision() to always cache in this case.

2321

# We're only using addgroup() in the context of changegroup

2319

# We're only using addgroup() in the context of changegroup

2322

# generation so the revision data can always be handled as raw

2320

# generation so the revision data can always be handled as raw

2323

# by the flagprocessor.

2321

# by the flagprocessor.

2324

self._addrevision(node, None, transaction, link,

2322

self._addrevision(node, None, transaction, link,

2325

p1, p2, flags, (baserev, delta),

2323

p1, p2, flags, (baserev, delta),

2326

ifh, dfh,

2324

ifh, dfh,

2327

alwayscache=bool(addrevisioncb),

2325

alwayscache=bool(addrevisioncb),

2328

deltacomputer=deltacomputer)

2326

deltacomputer=deltacomputer)

2329

2327

2330

if addrevisioncb:

2328

if addrevisioncb:

2331

addrevisioncb(self, node)

2329

addrevisioncb(self, node)

2332

2330

2333

if not dfh and not self._inline:

2331

if not dfh and not self._inline:

2334

# addrevision switched from inline to conventional

2332

# addrevision switched from inline to conventional

2335

# reopen the index

2333

# reopen the index

2336

ifh.close()

2334

ifh.close()

2337

dfh = self._datafp("a+")

2335

dfh = self._datafp("a+")

2338

ifh = self._indexfp("a+")

2336

ifh = self._indexfp("a+")

2339

finally:

2337

finally:

2340

if dfh:

2338

if dfh:

2341

dfh.close()

2339

dfh.close()

2342

ifh.close()

2340

ifh.close()

2343

2341

2344

return nodes

2342

return nodes

2345

2343

2346

def iscensored(self, rev):

2344

def iscensored(self, rev):

2347

"""Check if a file revision is censored."""

2345

"""Check if a file revision is censored."""

2348

if not self._censorable:

2346

if not self._censorable:

2349

return False

2347

return False

2350

2348

2351

return self.flags(rev) & REVIDX_ISCENSORED

2349

return self.flags(rev) & REVIDX_ISCENSORED

2352

2350

2353

def _peek_iscensored(self, baserev, delta, flush):

2351

def _peek_iscensored(self, baserev, delta, flush):

2354

"""Quickly check if a delta produces a censored revision."""

2352

"""Quickly check if a delta produces a censored revision."""

2355

if not self._censorable:

2353

if not self._censorable:

2356

return False

2354

return False

2357

2355

2358

# Fragile heuristic: unless new file meta keys are added alphabetically

2356

# Fragile heuristic: unless new file meta keys are added alphabetically

2359

# preceding "censored", all censored revisions are prefixed by

2357

# preceding "censored", all censored revisions are prefixed by

2360

# "\1\ncensored:". A delta producing such a censored revision must be a

2358

# "\1\ncensored:". A delta producing such a censored revision must be a

2361

# full-replacement delta, so we inspect the first and only patch in the

2359

# full-replacement delta, so we inspect the first and only patch in the

2362

# delta for this prefix.

2360

# delta for this prefix.

2363

hlen = struct.calcsize(">lll")

2361

hlen = struct.calcsize(">lll")

2364

if len(delta) <= hlen:

2362

if len(delta) <= hlen:

2365

return False

2363

return False

2366

2364

2367

oldlen = self.rawsize(baserev)

2365

oldlen = self.rawsize(baserev)

2368

newlen = len(delta) - hlen

2366

newlen = len(delta) - hlen

2369

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2367

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2370

return False

2368

return False

2371

2369

2372

add = "\1\ncensored:"

2370

add = "\1\ncensored:"

2373

addlen = len(add)

2371

addlen = len(add)

2374

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2372

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2375

2373

2376

def getstrippoint(self, minlink):

2374

def getstrippoint(self, minlink):

2377

"""find the minimum rev that must be stripped to strip the linkrev

2375

"""find the minimum rev that must be stripped to strip the linkrev

2378

2376

2379

Returns a tuple containing the minimum rev and a set of all revs that

2377

Returns a tuple containing the minimum rev and a set of all revs that

2380

have linkrevs that will be broken by this strip.

2378

have linkrevs that will be broken by this strip.

2381

"""

2379

"""

2382

brokenrevs = set()

2380

brokenrevs = set()

2383

strippoint = len(self)

2381

strippoint = len(self)

2384

2382

2385

heads = {}

2383

heads = {}

2386

futurelargelinkrevs = set()

2384

futurelargelinkrevs = set()

2387

for head in self.headrevs():

2385

for head in self.headrevs():

2388

headlinkrev = self.linkrev(head)

2386

headlinkrev = self.linkrev(head)

2389

heads[head] = headlinkrev

2387

heads[head] = headlinkrev

2390

if headlinkrev >= minlink:

2388

if headlinkrev >= minlink:

2391

futurelargelinkrevs.add(headlinkrev)

2389

futurelargelinkrevs.add(headlinkrev)

2392

2390

2393

# This algorithm involves walking down the rev graph, starting at the

2391

# This algorithm involves walking down the rev graph, starting at the

2394

# heads. Since the revs are topologically sorted according to linkrev,

2392

# heads. Since the revs are topologically sorted according to linkrev,

2395

# once all head linkrevs are below the minlink, we know there are

2393

# once all head linkrevs are below the minlink, we know there are

2396

# no more revs that could have a linkrev greater than minlink.

2394

# no more revs that could have a linkrev greater than minlink.

2397

# So we can stop walking.

2395

# So we can stop walking.

2398

while futurelargelinkrevs:

2396

while futurelargelinkrevs:

2399

strippoint -= 1

2397

strippoint -= 1

2400

linkrev = heads.pop(strippoint)

2398

linkrev = heads.pop(strippoint)

2401

2399

2402

if linkrev < minlink:

2400

if linkrev < minlink:

2403

brokenrevs.add(strippoint)

2401

brokenrevs.add(strippoint)

2404

else:

2402

else:

2405

futurelargelinkrevs.remove(linkrev)

2403

futurelargelinkrevs.remove(linkrev)

2406

2404

2407

for p in self.parentrevs(strippoint):

2405

for p in self.parentrevs(strippoint):

2408

if p != nullrev:

2406

if p != nullrev:

2409

plinkrev = self.linkrev(p)

2407

plinkrev = self.linkrev(p)

2410

heads[p] = plinkrev

2408

heads[p] = plinkrev

2411

if plinkrev >= minlink:

2409

if plinkrev >= minlink:

2412

futurelargelinkrevs.add(plinkrev)

2410

futurelargelinkrevs.add(plinkrev)

2413

2411

2414

return strippoint, brokenrevs

2412

return strippoint, brokenrevs

2415

2413

2416

def strip(self, minlink, transaction):

2414

def strip(self, minlink, transaction):

2417

"""truncate the revlog on the first revision with a linkrev >= minlink

2415

"""truncate the revlog on the first revision with a linkrev >= minlink

2418

2416

2419

This function is called when we're stripping revision minlink and

2417

This function is called when we're stripping revision minlink and

2420

its descendants from the repository.

2418

its descendants from the repository.

2421

2419

2422

We have to remove all revisions with linkrev >= minlink, because

2420

We have to remove all revisions with linkrev >= minlink, because

2423

the equivalent changelog revisions will be renumbered after the

2421

the equivalent changelog revisions will be renumbered after the

2424

strip.

2422

strip.

2425

2423

2426

So we truncate the revlog on the first of these revisions, and

2424

So we truncate the revlog on the first of these revisions, and

2427

trust that the caller has saved the revisions that shouldn't be

2425

trust that the caller has saved the revisions that shouldn't be

2428

removed and that it'll re-add them after this truncation.

2426

removed and that it'll re-add them after this truncation.

2429

"""

2427

"""

2430

if len(self) == 0:

2428

if len(self) == 0:

2431

return

2429

return

2432

2430

2433

rev, _ = self.getstrippoint(minlink)

2431

rev, _ = self.getstrippoint(minlink)

2434

if rev == len(self):

2432

if rev == len(self):

2435

return

2433

return

2436

2434

2437

# first truncate the files on disk

2435

# first truncate the files on disk

2438

end = self.start(rev)

2436

end = self.start(rev)

2439

if not self._inline:

2437

if not self._inline:

2440

transaction.add(self.datafile, end)

2438

transaction.add(self.datafile, end)

2441

end = rev * self._io.size

2439

end = rev * self._io.size

2442

else:

2440

else:

2443

end += rev * self._io.size

2441

end += rev * self._io.size

2444

2442

2445

transaction.add(self.indexfile, end)

2443

transaction.add(self.indexfile, end)

2446

2444

2447

# then reset internal state in memory to forget those revisions

2445

# then reset internal state in memory to forget those revisions

2448

self._cache = None

2446

self._cache = None

2449

self._chaininfocache = {}

2447

self._chaininfocache = {}

2450

self._chunkclear()

2448

self._chunkclear()

2451

for x in xrange(rev, len(self)):

2449

for x in xrange(rev, len(self)):

2452

del self.nodemap[self.node(x)]

2450

del self.nodemap[self.node(x)]

2453

2451

2454

del self.index[rev:-1]

2452

del self.index[rev:-1]

2455

self._nodepos = None

2453

self._nodepos = None

2456

2454

2457

def checksize(self):

2455

def checksize(self):

2458

expected = 0

2456

expected = 0

2459

if len(self):

2457

if len(self):

2460

expected = max(0, self.end(len(self) - 1))

2458

expected = max(0, self.end(len(self) - 1))

2461

2459

2462

try:

2460

try:

2463

with self._datafp() as f:

2461

with self._datafp() as f:

2464

f.seek(0, 2)

2462

f.seek(0, 2)

2465

actual = f.tell()

2463

actual = f.tell()

2466

dd = actual - expected

2464

dd = actual - expected

2467

except IOError as inst:

2465

except IOError as inst:

2468

if inst.errno != errno.ENOENT:

2466

if inst.errno != errno.ENOENT:

2469

raise

2467

raise

2470

dd = 0

2468

dd = 0

2471

2469

2472

try:

2470

try:

2473

f = self.opener(self.indexfile)

2471

f = self.opener(self.indexfile)

2474

f.seek(0, 2)

2472

f.seek(0, 2)

2475

actual = f.tell()

2473

actual = f.tell()

2476

f.close()

2474

f.close()

2477

s = self._io.size

2475

s = self._io.size

2478

i = max(0, actual // s)

2476

i = max(0, actual // s)

2479

di = actual - (i * s)

2477

di = actual - (i * s)

2480

if self._inline:

2478

if self._inline:

2481

databytes = 0

2479

databytes = 0

2482

for r in self:

2480

for r in self:

2483

databytes += max(0, self.length(r))

2481

databytes += max(0, self.length(r))

2484

dd = 0

2482

dd = 0

2485

di = actual - len(self) * s - databytes

2483

di = actual - len(self) * s - databytes

2486

except IOError as inst:

2484

except IOError as inst:

2487

if inst.errno != errno.ENOENT:

2485

if inst.errno != errno.ENOENT:

2488

raise

2486

raise

2489

di = 0

2487

di = 0

2490

2488

2491

return (dd, di)

2489

return (dd, di)

2492

2490

2493

def files(self):

2491

def files(self):

2494

res = [self.indexfile]

2492

res = [self.indexfile]

2495

if not self._inline:

2493

if not self._inline:

2496

res.append(self.datafile)

2494

res.append(self.datafile)

2497

return res

2495

return res

2498

2496

2499

DELTAREUSEALWAYS = 'always'

2497

DELTAREUSEALWAYS = 'always'

2500

DELTAREUSESAMEREVS = 'samerevs'

2498

DELTAREUSESAMEREVS = 'samerevs'

2501

DELTAREUSENEVER = 'never'

2499

DELTAREUSENEVER = 'never'

2502

2500

2503

DELTAREUSEFULLADD = 'fulladd'

2501

DELTAREUSEFULLADD = 'fulladd'

2504

2502

2505

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2503

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2506

2504

2507

def clone(self, tr, destrevlog, addrevisioncb=None,

2505

def clone(self, tr, destrevlog, addrevisioncb=None,

2508

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2506

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2509

"""Copy this revlog to another, possibly with format changes.

2507

"""Copy this revlog to another, possibly with format changes.

2510

2508

2511

The destination revlog will contain the same revisions and nodes.

2509

The destination revlog will contain the same revisions and nodes.

2512

However, it may not be bit-for-bit identical due to e.g. delta encoding

2510

However, it may not be bit-for-bit identical due to e.g. delta encoding

2513

differences.

2511

differences.

2514

2512

2515

The ``deltareuse`` argument control how deltas from the existing revlog

2513

The ``deltareuse`` argument control how deltas from the existing revlog

2516

are preserved in the destination revlog. The argument can have the

2514

are preserved in the destination revlog. The argument can have the

2517

following values:

2515

following values:

2518

2516

2519

DELTAREUSEALWAYS

2517

DELTAREUSEALWAYS

2520

Deltas will always be reused (if possible), even if the destination

2518

Deltas will always be reused (if possible), even if the destination

2521

revlog would not select the same revisions for the delta. This is the

2519

revlog would not select the same revisions for the delta. This is the

2522

fastest mode of operation.

2520

fastest mode of operation.

2523

DELTAREUSESAMEREVS

2521

DELTAREUSESAMEREVS

2524

Deltas will be reused if the destination revlog would pick the same

2522

Deltas will be reused if the destination revlog would pick the same

2525

revisions for the delta. This mode strikes a balance between speed

2523

revisions for the delta. This mode strikes a balance between speed

2526

and optimization.

2524

and optimization.

2527

DELTAREUSENEVER

2525

DELTAREUSENEVER

2528

Deltas will never be reused. This is the slowest mode of execution.

2526

Deltas will never be reused. This is the slowest mode of execution.

2529

This mode can be used to recompute deltas (e.g. if the diff/delta

2527

This mode can be used to recompute deltas (e.g. if the diff/delta

2530

algorithm changes).

2528

algorithm changes).

2531

2529

2532

Delta computation can be slow, so the choice of delta reuse policy can

2530

Delta computation can be slow, so the choice of delta reuse policy can

2533

significantly affect run time.

2531

significantly affect run time.

2534

2532

2535

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2533

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2536

two extremes. Deltas will be reused if they are appropriate. But if the

2534

two extremes. Deltas will be reused if they are appropriate. But if the

2537

delta could choose a better revision, it will do so. This means if you

2535

delta could choose a better revision, it will do so. This means if you

2538

are converting a non-generaldelta revlog to a generaldelta revlog,

2536

are converting a non-generaldelta revlog to a generaldelta revlog,

2539

deltas will be recomputed if the delta's parent isn't a parent of the

2537

deltas will be recomputed if the delta's parent isn't a parent of the

2540

revision.

2538

revision.

2541

2539

2542

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2540

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2543

controls whether to compute deltas against both parents for merges.

2541

controls whether to compute deltas against both parents for merges.

2544

By default, the current default is used.

2542

By default, the current default is used.

2545

"""

2543

"""

2546

if deltareuse not in self.DELTAREUSEALL:

2544

if deltareuse not in self.DELTAREUSEALL:

2547

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2545

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2548

2546

2549

if len(destrevlog):

2547

if len(destrevlog):

2550

raise ValueError(_('destination revlog is not empty'))

2548

raise ValueError(_('destination revlog is not empty'))

2551

2549

2552

if getattr(self, 'filteredrevs', None):

2550

if getattr(self, 'filteredrevs', None):

2553

raise ValueError(_('source revlog has filtered revisions'))

2551

raise ValueError(_('source revlog has filtered revisions'))

2554

if getattr(destrevlog, 'filteredrevs', None):

2552

if getattr(destrevlog, 'filteredrevs', None):

2555

raise ValueError(_('destination revlog has filtered revisions'))

2553

raise ValueError(_('destination revlog has filtered revisions'))

2556

2554

2557

# lazydeltabase controls whether to reuse a cached delta, if possible.

2555

# lazydeltabase controls whether to reuse a cached delta, if possible.

2558

oldlazydeltabase = destrevlog._lazydeltabase

2556

oldlazydeltabase = destrevlog._lazydeltabase

2559

oldamd = destrevlog._aggressivemergedeltas

2557

oldamd = destrevlog._aggressivemergedeltas

2560

2558

2561

try:

2559

try:

2562

if deltareuse == self.DELTAREUSEALWAYS:

2560

if deltareuse == self.DELTAREUSEALWAYS:

2563

destrevlog._lazydeltabase = True

2561

destrevlog._lazydeltabase = True

2564

elif deltareuse == self.DELTAREUSESAMEREVS:

2562

elif deltareuse == self.DELTAREUSESAMEREVS:

2565

destrevlog._lazydeltabase = False

2563

destrevlog._lazydeltabase = False

2566

2564

2567

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2565

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2568

2566

2569

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2567

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2570

self.DELTAREUSESAMEREVS)

2568

self.DELTAREUSESAMEREVS)

2571

2569

2572

deltacomputer = _deltacomputer(destrevlog)

2570

deltacomputer = _deltacomputer(destrevlog)

2573

index = self.index

2571

index = self.index

2574

for rev in self:

2572

for rev in self:

2575

entry = index[rev]

2573

entry = index[rev]

2576

2574

2577

# Some classes override linkrev to take filtered revs into

2575

# Some classes override linkrev to take filtered revs into

2578

# account. Use raw entry from index.

2576

# account. Use raw entry from index.

2579

flags = entry[0] & 0xffff

2577

flags = entry[0] & 0xffff

2580

linkrev = entry[4]

2578

linkrev = entry[4]

2581

p1 = index[entry[5]][7]

2579

p1 = index[entry[5]][7]

2582

p2 = index[entry[6]][7]

2580

p2 = index[entry[6]][7]

2583

node = entry[7]

2581

node = entry[7]

2584

2582

2585

# (Possibly) reuse the delta from the revlog if allowed and

2583

# (Possibly) reuse the delta from the revlog if allowed and

2586

# the revlog chunk is a delta.

2584

# the revlog chunk is a delta.

2587

cachedelta = None

2585

cachedelta = None

2588

rawtext = None

2586

rawtext = None

2589

if populatecachedelta:

2587

if populatecachedelta:

2590

dp = self.deltaparent(rev)

2588

dp = self.deltaparent(rev)

2591

if dp != nullrev:

2589

if dp != nullrev:

2592

cachedelta = (dp, bytes(self._chunk(rev)))

2590

cachedelta = (dp, bytes(self._chunk(rev)))

2593

2591

2594

if not cachedelta:

2592

if not cachedelta:

2595

rawtext = self.revision(rev, raw=True)

2593

rawtext = self.revision(rev, raw=True)

2596

2594

2597

2595

2598

if deltareuse == self.DELTAREUSEFULLADD:

2596

if deltareuse == self.DELTAREUSEFULLADD:

2599

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2597

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2600

cachedelta=cachedelta,

2598

cachedelta=cachedelta,

2601

node=node, flags=flags,

2599

node=node, flags=flags,

2602

deltacomputer=deltacomputer)

2600

deltacomputer=deltacomputer)

2603

else:

2601

else:

2604

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2602

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2605

checkambig=False)

2603

checkambig=False)

2606

dfh = None

2604

dfh = None

2607

if not destrevlog._inline:

2605

if not destrevlog._inline:

2608

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2606

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2609

try:

2607

try:

2610

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2608

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2611

p2, flags, cachedelta, ifh, dfh,

2609

p2, flags, cachedelta, ifh, dfh,

2612

deltacomputer=deltacomputer)

2610

deltacomputer=deltacomputer)

2613

finally:

2611

finally:

2614

if dfh:

2612

if dfh:

2615

dfh.close()

2613

dfh.close()

2616

ifh.close()

2614

ifh.close()

2617

2615

2618

if addrevisioncb:

2616

if addrevisioncb:

2619

addrevisioncb(self, rev, node)

2617

addrevisioncb(self, rev, node)

2620

finally:

2618

finally:

2621

destrevlog._lazydeltabase = oldlazydeltabase

2619

destrevlog._lazydeltabase = oldlazydeltabase

2622

destrevlog._aggressivemergedeltas = oldamd

2620

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # Trim empty revs at the end, but never the very first revision of a chain
                 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
                     endidx -= 1
                 return revs[startidx:endidx]
             def _slicechunk(revlog, revs):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 startbyte = start(revs[0])
                 endbyte = start(revs[-1]) + length(revs[-1])
                 readdata = deltachainspan = endbyte - startbyte
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > revlog._srmingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < revlog._srdensitythreshold:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._aggressivemergedeltas and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                      chainbase, chainlen, compresseddeltalen)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._srdensitythreshold = 0.25
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._withsparseread = bool(opts.get('with-sparse-read', False))
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         else:
                             assert p < len(i)
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
+                    """True if revision 'end' is an descendant of revision 'start'
+                    A revision is considered as a descendant of itself."""
                     if start == nullrev:
                         return True
                     elif start == end:
                         return True
-                    for i in self.descendants([start]):
+                    return start in self._commonancestorsheads(start, end)
-                        if i == end:
-                            return True
-                        elif i > end:
-                            break
-                    return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = deltainfo.distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
                         deltainfo.compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd