upstream/mercurial-mirror Commit - r38654:d083ae26

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

CensoredNodeError = error.CensoredNodeError

94

CensoredNodeError = error.CensoredNodeError

95

ProgrammingError = error.ProgrammingError

95

ProgrammingError = error.ProgrammingError

96

97

# Store flag processors (cf. 'addflagprocessor()' to register)

97

# Store flag processors (cf. 'addflagprocessor()' to register)

98

_flagprocessors = {

98

_flagprocessors = {

99

REVIDX_ISCENSORED: None,

99

REVIDX_ISCENSORED: None,

100

}

100

}

101

102

_mdre = re.compile('\1\n')

102

_mdre = re.compile('\1\n')

103

def parsemeta(text):

103

def parsemeta(text):

104

"""return (metadatadict, metadatasize)"""

104

"""return (metadatadict, metadatasize)"""

105

# text can be buffer, so we can't use .startswith or .index

105

# text can be buffer, so we can't use .startswith or .index

106

if text[:2] != '\1\n':

106

if text[:2] != '\1\n':

107

return None, None

107

return None, None

108

s = _mdre.search(text, 2).start()

108

s = _mdre.search(text, 2).start()

109

mtext = text[2:s]

109

mtext = text[2:s]

110

meta = {}

110

meta = {}

111

for l in mtext.splitlines():

111

for l in mtext.splitlines():

112

k, v = l.split(": ", 1)

112

k, v = l.split(": ", 1)

113

meta[k] = v

113

meta[k] = v

114

return meta, (s + 2)

114

return meta, (s + 2)

115

116

def packmeta(meta, text):

116

def packmeta(meta, text):

117

keys = sorted(meta)

117

keys = sorted(meta)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

return "\1\n%s\1\n%s" % (metatext, text)

119

return "\1\n%s\1\n%s" % (metatext, text)

120

121

def _censoredtext(text):

121

def _censoredtext(text):

122

m, offs = parsemeta(text)

122

m, offs = parsemeta(text)

123

return m and "censored" in m

123

return m and "censored" in m

124

125

def addflagprocessor(flag, processor):

125

def addflagprocessor(flag, processor):

126

"""Register a flag processor on a revision data flag.

126

"""Register a flag processor on a revision data flag.

127

128

Invariant:

128

Invariant:

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

- Only one flag processor can be registered on a specific flag.

131

- Only one flag processor can be registered on a specific flag.

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

following signatures:

133

following signatures:

134

- (read) f(self, rawtext) -> text, bool

134

- (read) f(self, rawtext) -> text, bool

135

- (write) f(self, text) -> rawtext, bool

135

- (write) f(self, text) -> rawtext, bool

136

- (raw) f(self, rawtext) -> bool

136

- (raw) f(self, rawtext) -> bool

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

directly visible to the user.

138

directly visible to the user.

139

The boolean returned by these transforms is used to determine whether

139

The boolean returned by these transforms is used to determine whether

140

the returned text can be used for hash integrity checking. For example,

140

the returned text can be used for hash integrity checking. For example,

141

if "write" returns False, then "text" is used to generate hash. If

141

if "write" returns False, then "text" is used to generate hash. If

142

"write" returns True, that basically means "rawtext" returned by "write"

142

"write" returns True, that basically means "rawtext" returned by "write"

143

should be used to generate hash. Usually, "write" and "read" return

143

should be used to generate hash. Usually, "write" and "read" return

144

different booleans. And "raw" returns a same boolean as "write".

144

different booleans. And "raw" returns a same boolean as "write".

145

146

Note: The 'raw' transform is used for changegroup generation and in some

146

Note: The 'raw' transform is used for changegroup generation and in some

147

debug commands. In this case the transform only indicates whether the

147

debug commands. In this case the transform only indicates whether the

148

contents can be used for hash integrity checks.

148

contents can be used for hash integrity checks.

149

"""

149

"""

150

if not flag & REVIDX_KNOWN_FLAGS:

150

if not flag & REVIDX_KNOWN_FLAGS:

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

raise ProgrammingError(msg)

152

raise ProgrammingError(msg)

153

if flag not in REVIDX_FLAGS_ORDER:

153

if flag not in REVIDX_FLAGS_ORDER:

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

raise ProgrammingError(msg)

155

raise ProgrammingError(msg)

156

if flag in _flagprocessors:

156

if flag in _flagprocessors:

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

raise error.Abort(msg)

158

raise error.Abort(msg)

159

_flagprocessors[flag] = processor

159

_flagprocessors[flag] = processor

160

161

def getoffset(q):

161

def getoffset(q):

162

return int(q >> 16)

162

return int(q >> 16)

163

164

def gettype(q):

164

def gettype(q):

165

return int(q & 0xFFFF)

165

return int(q & 0xFFFF)

166

167

def offset_type(offset, type):

167

def offset_type(offset, type):

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

raise ValueError('unknown revlog index flags')

169

raise ValueError('unknown revlog index flags')

170

return int(int(offset) << 16 | type)

170

return int(int(offset) << 16 | type)

171

172

_nullhash = hashlib.sha1(nullid)

172

_nullhash = hashlib.sha1(nullid)

173

174

def hash(text, p1, p2):

174

def hash(text, p1, p2):

175

"""generate a hash from the given text and its parent hashes

175

"""generate a hash from the given text and its parent hashes

176

177

This hash combines both the current file contents and its history

177

This hash combines both the current file contents and its history

178

in a manner that makes it easy to distinguish nodes with the same

178

in a manner that makes it easy to distinguish nodes with the same

179

content in the revision graph.

179

content in the revision graph.

180

"""

180

"""

181

# As of now, if one of the parent node is null, p2 is null

181

# As of now, if one of the parent node is null, p2 is null

182

if p2 == nullid:

182

if p2 == nullid:

183

# deep copy of a hash is faster than creating one

183

# deep copy of a hash is faster than creating one

184

s = _nullhash.copy()

184

s = _nullhash.copy()

185

s.update(p1)

185

s.update(p1)

186

else:

186

else:

187

# none of the parent nodes are nullid

187

# none of the parent nodes are nullid

188

if p1 < p2:

188

if p1 < p2:

189

a = p1

189

a = p1

190

b = p2

190

b = p2

191

else:

191

else:

192

a = p2

192

a = p2

193

b = p1

193

b = p1

194

s = hashlib.sha1(a)

194

s = hashlib.sha1(a)

195

s.update(b)

195

s.update(b)

196

s.update(text)

196

s.update(text)

197

return s.digest()

197

return s.digest()

198

199

def _trimchunk(revlog, revs, startidx, endidx=None):

199

def _trimchunk(revlog, revs, startidx, endidx=None):

200

"""returns revs[startidx:endidx] without empty trailing revs

200

"""returns revs[startidx:endidx] without empty trailing revs

201

"""

201

"""

202

length = revlog.length

202

length = revlog.length

203

204

if endidx is None:

204

if endidx is None:

205

endidx = len(revs)

205

endidx = len(revs)

206

207

# Trim empty revs at the end, but never the very first revision of a chain

207

# Trim empty revs at the end, but never the very first revision of a chain

208

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

208

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

209

endidx -= 1

209

endidx -= 1

210

211

return revs[startidx:endidx]

211

return revs[startidx:endidx]

212

213

def _slicechunk(revlog, revs):

213

def _slicechunk(revlog, revs):

214

"""slice revs to reduce the amount of unrelated data to be read from disk.

214

"""slice revs to reduce the amount of unrelated data to be read from disk.

215

216

``revs`` is sliced into groups that should be read in one time.

216

``revs`` is sliced into groups that should be read in one time.

217

Assume that revs are sorted.

217

Assume that revs are sorted.

218

"""

218

"""

219

start = revlog.start

219

start = revlog.start

220

length = revlog.length

220

length = revlog.length

221

222

if len(revs) <= 1:

222

if len(revs) <= 1:

223

yield revs

223

yield revs

224

return

224

return

225

226

startbyte = start(revs[0])

226

startbyte = start(revs[0])

227

endbyte = start(revs[-1]) + length(revs[-1])

227

endbyte = start(revs[-1]) + length(revs[-1])

228

readdata = deltachainspan = endbyte - startbyte

228

readdata = deltachainspan = endbyte - startbyte

229

230

if deltachainspan < revlog._srmingapsize:

231

yield revs

232

return

233

230

chainpayload = sum(length(r) for r in revs)

234

chainpayload = sum(length(r) for r in revs)

231

235

232

if deltachainspan:

236

if deltachainspan:

233

density = chainpayload / float(deltachainspan)

237

density = chainpayload / float(deltachainspan)

234

else:

238

else:

235

density = 1.0

239

density = 1.0

236

240

237

if density >= revlog._srdensitythreshold:

241

if density >= revlog._srdensitythreshold:

238

yield revs

242

yield revs

239

return

243

return

240

244

241

# Store the gaps in a heap to have them sorted by decreasing size

245

# Store the gaps in a heap to have them sorted by decreasing size

242

gapsheap = []

246

gapsheap = []

243

heapq.heapify(gapsheap)

247

heapq.heapify(gapsheap)

244

prevend = None

248

prevend = None

245

for i, rev in enumerate(revs):

249

for i, rev in enumerate(revs):

246

revstart = start(rev)

250

revstart = start(rev)

247

revlen = length(rev)

251

revlen = length(rev)

248

252

249

# Skip empty revisions to form larger holes

253

# Skip empty revisions to form larger holes

250

if revlen == 0:

254

if revlen == 0:

251

continue

255

continue

252

256

253

if prevend is not None:

257

if prevend is not None:

254

gapsize = revstart - prevend

258

gapsize = revstart - prevend

255

# only consider holes that are large enough

259

# only consider holes that are large enough

256

if gapsize > revlog._srmingapsize:

260

if gapsize > revlog._srmingapsize:

257

heapq.heappush(gapsheap, (-gapsize, i))

261

heapq.heappush(gapsheap, (-gapsize, i))

258

262

259

prevend = revstart + revlen

263

prevend = revstart + revlen

260

264

261

# Collect the indices of the largest holes until the density is acceptable

265

# Collect the indices of the largest holes until the density is acceptable

262

indicesheap = []

266

indicesheap = []

263

heapq.heapify(indicesheap)

267

heapq.heapify(indicesheap)

264

while gapsheap and density < revlog._srdensitythreshold:

268

while gapsheap and density < revlog._srdensitythreshold:

265

oppgapsize, gapidx = heapq.heappop(gapsheap)

269

oppgapsize, gapidx = heapq.heappop(gapsheap)

266

270

267

heapq.heappush(indicesheap, gapidx)

271

heapq.heappush(indicesheap, gapidx)

268

272

269

# the gap sizes are stored as negatives to be sorted decreasingly

273

# the gap sizes are stored as negatives to be sorted decreasingly

270

# by the heap

274

# by the heap

271

readdata -= (-oppgapsize)

275

readdata -= (-oppgapsize)

272

if readdata > 0:

276

if readdata > 0:

273

density = chainpayload / float(readdata)

277

density = chainpayload / float(readdata)

274

else:

278

else:

275

density = 1.0

279

density = 1.0

276

280

277

# Cut the revs at collected indices

281

# Cut the revs at collected indices

278

previdx = 0

282

previdx = 0

279

while indicesheap:

283

while indicesheap:

280

idx = heapq.heappop(indicesheap)

284

idx = heapq.heappop(indicesheap)

281

285

282

chunk = _trimchunk(revlog, revs, previdx, idx)

286

chunk = _trimchunk(revlog, revs, previdx, idx)

283

if chunk:

287

if chunk:

284

yield chunk

288

yield chunk

285

289

286

previdx = idx

290

previdx = idx

287

291

288

chunk = _trimchunk(revlog, revs, previdx)

292

chunk = _trimchunk(revlog, revs, previdx)

289

if chunk:

293

if chunk:

290

yield chunk

294

yield chunk

291

295

292

@attr.s(slots=True, frozen=True)

296

@attr.s(slots=True, frozen=True)

293

class _deltainfo(object):

297

class _deltainfo(object):

294

distance = attr.ib()

298

distance = attr.ib()

295

deltalen = attr.ib()

299

deltalen = attr.ib()

296

data = attr.ib()

300

data = attr.ib()

297

base = attr.ib()

301

base = attr.ib()

298

chainbase = attr.ib()

302

chainbase = attr.ib()

299

chainlen = attr.ib()

303

chainlen = attr.ib()

300

compresseddeltalen = attr.ib()

304

compresseddeltalen = attr.ib()

301

305

302

class _deltacomputer(object):

306

class _deltacomputer(object):

303

def __init__(self, revlog):

307

def __init__(self, revlog):

304

self.revlog = revlog

308

self.revlog = revlog

305

309

306

def _getcandidaterevs(self, p1, p2, cachedelta):

310

def _getcandidaterevs(self, p1, p2, cachedelta):

307

"""

311

"""

308

Provides revisions that present an interest to be diffed against,

312

Provides revisions that present an interest to be diffed against,

309

grouped by level of easiness.

313

grouped by level of easiness.

310

"""

314

"""

311

revlog = self.revlog

315

revlog = self.revlog

312

gdelta = revlog._generaldelta

316

gdelta = revlog._generaldelta

313

curr = len(revlog)

317

curr = len(revlog)

314

prev = curr - 1

318

prev = curr - 1

315

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

319

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

316

320

317

# should we try to build a delta?

321

# should we try to build a delta?

318

if prev != nullrev and revlog.storedeltachains:

322

if prev != nullrev and revlog.storedeltachains:

319

tested = set()

323

tested = set()

320

# This condition is true most of the time when processing

324

# This condition is true most of the time when processing

321

# changegroup data into a generaldelta repo. The only time it

325

# changegroup data into a generaldelta repo. The only time it

322

# isn't true is if this is the first revision in a delta chain

326

# isn't true is if this is the first revision in a delta chain

323

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

327

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

324

if cachedelta and gdelta and revlog._lazydeltabase:

328

if cachedelta and gdelta and revlog._lazydeltabase:

325

# Assume what we received from the server is a good choice

329

# Assume what we received from the server is a good choice

326

# build delta will reuse the cache

330

# build delta will reuse the cache

327

yield (cachedelta[0],)

331

yield (cachedelta[0],)

328

tested.add(cachedelta[0])

332

tested.add(cachedelta[0])

329

333

330

if gdelta:

334

if gdelta:

331

# exclude already lazy tested base if any

335

# exclude already lazy tested base if any

332

parents = [p for p in (p1r, p2r)

336

parents = [p for p in (p1r, p2r)

333

if p != nullrev and p not in tested]

337

if p != nullrev and p not in tested]

334

338

335

if not revlog._aggressivemergedeltas and len(parents) == 2:

339

if not revlog._aggressivemergedeltas and len(parents) == 2:

336

parents.sort()

340

parents.sort()

337

# To minimize the chance of having to build a fulltext,

341

# To minimize the chance of having to build a fulltext,

338

# pick first whichever parent is closest to us (max rev)

342

# pick first whichever parent is closest to us (max rev)

339

yield (parents[1],)

343

yield (parents[1],)

340

# then the other one (min rev) if the first did not fit

344

# then the other one (min rev) if the first did not fit

341

yield (parents[0],)

345

yield (parents[0],)

342

tested.update(parents)

346

tested.update(parents)

343

elif len(parents) > 0:

347

elif len(parents) > 0:

344

# Test all parents (1 or 2), and keep the best candidate

348

# Test all parents (1 or 2), and keep the best candidate

345

yield parents

349

yield parents

346

tested.update(parents)

350

tested.update(parents)

347

351

348

if prev not in tested:

352

if prev not in tested:

349

# other approach failed try against prev to hopefully save us a

353

# other approach failed try against prev to hopefully save us a

350

# fulltext.

354

# fulltext.

351

yield (prev,)

355

yield (prev,)

352

tested.add(prev)

356

tested.add(prev)

353

357

354

def buildtext(self, revinfo, fh):

358

def buildtext(self, revinfo, fh):

355

"""Builds a fulltext version of a revision

359

"""Builds a fulltext version of a revision

356

360

357

revinfo: _revisioninfo instance that contains all needed info

361

revinfo: _revisioninfo instance that contains all needed info

358

fh: file handle to either the .i or the .d revlog file,

362

fh: file handle to either the .i or the .d revlog file,

359

depending on whether it is inlined or not

363

depending on whether it is inlined or not

360

"""

364

"""

361

btext = revinfo.btext

365

btext = revinfo.btext

362

if btext[0] is not None:

366

if btext[0] is not None:

363

return btext[0]

367

return btext[0]

364

368

365

revlog = self.revlog

369

revlog = self.revlog

366

cachedelta = revinfo.cachedelta

370

cachedelta = revinfo.cachedelta

367

flags = revinfo.flags

371

flags = revinfo.flags

368

node = revinfo.node

372

node = revinfo.node

369

373

370

baserev = cachedelta[0]

374

baserev = cachedelta[0]

371

delta = cachedelta[1]

375

delta = cachedelta[1]

372

# special case deltas which replace entire base; no need to decode

376

# special case deltas which replace entire base; no need to decode

373

# base revision. this neatly avoids censored bases, which throw when

377

# base revision. this neatly avoids censored bases, which throw when

374

# they're decoded.

378

# they're decoded.

375

hlen = struct.calcsize(">lll")

379

hlen = struct.calcsize(">lll")

376

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

380

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

377

len(delta) - hlen):

381

len(delta) - hlen):

378

btext[0] = delta[hlen:]

382

btext[0] = delta[hlen:]

379

else:

383

else:

380

# deltabase is rawtext before changed by flag processors, which is

384

# deltabase is rawtext before changed by flag processors, which is

381

# equivalent to non-raw text

385

# equivalent to non-raw text

382

basetext = revlog.revision(baserev, _df=fh, raw=False)

386

basetext = revlog.revision(baserev, _df=fh, raw=False)

383

btext[0] = mdiff.patch(basetext, delta)

387

btext[0] = mdiff.patch(basetext, delta)

384

388

385

try:

389

try:

386

res = revlog._processflags(btext[0], flags, 'read', raw=True)

390

res = revlog._processflags(btext[0], flags, 'read', raw=True)

387

btext[0], validatehash = res

391

btext[0], validatehash = res

388

if validatehash:

392

if validatehash:

389

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

393

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

390

if flags & REVIDX_ISCENSORED:

394

if flags & REVIDX_ISCENSORED:

391

raise RevlogError(_('node %s is not censored') % node)

395

raise RevlogError(_('node %s is not censored') % node)

392

except CensoredNodeError:

396

except CensoredNodeError:

393

# must pass the censored index flag to add censored revisions

397

# must pass the censored index flag to add censored revisions

394

if not flags & REVIDX_ISCENSORED:

398

if not flags & REVIDX_ISCENSORED:

395

raise

399

raise

396

return btext[0]

400

return btext[0]

397

401

398

def _builddeltadiff(self, base, revinfo, fh):

402

def _builddeltadiff(self, base, revinfo, fh):

399

revlog = self.revlog

403

revlog = self.revlog

400

t = self.buildtext(revinfo, fh)

404

t = self.buildtext(revinfo, fh)

401

if revlog.iscensored(base):

405

if revlog.iscensored(base):

402

# deltas based on a censored revision must replace the

406

# deltas based on a censored revision must replace the

403

# full content in one patch, so delta works everywhere

407

# full content in one patch, so delta works everywhere

404

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

408

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

405

delta = header + t

409

delta = header + t

406

else:

410

else:

407

ptext = revlog.revision(base, _df=fh, raw=True)

411

ptext = revlog.revision(base, _df=fh, raw=True)

408

delta = mdiff.textdiff(ptext, t)

412

delta = mdiff.textdiff(ptext, t)

409

413

410

return delta

414

return delta

411

415

412

def _builddeltainfo(self, revinfo, base, fh):

416

def _builddeltainfo(self, revinfo, base, fh):

413

# can we use the cached delta?

417

# can we use the cached delta?

414

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

418

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

415

delta = revinfo.cachedelta[1]

419

delta = revinfo.cachedelta[1]

416

else:

420

else:

417

delta = self._builddeltadiff(base, revinfo, fh)

421

delta = self._builddeltadiff(base, revinfo, fh)

418

revlog = self.revlog

422

revlog = self.revlog

419

header, data = revlog.compress(delta)

423

header, data = revlog.compress(delta)

420

deltalen = len(header) + len(data)

424

deltalen = len(header) + len(data)

421

chainbase = revlog.chainbase(base)

425

chainbase = revlog.chainbase(base)

422

offset = revlog.end(len(revlog) - 1)

426

offset = revlog.end(len(revlog) - 1)

423

dist = deltalen + offset - revlog.start(chainbase)

427

dist = deltalen + offset - revlog.start(chainbase)

424

if revlog._generaldelta:

428

if revlog._generaldelta:

425

deltabase = base

429

deltabase = base

426

else:

430

else:

427

deltabase = chainbase

431

deltabase = chainbase

428

chainlen, compresseddeltalen = revlog._chaininfo(base)

432

chainlen, compresseddeltalen = revlog._chaininfo(base)

429

chainlen += 1

433

chainlen += 1

430

compresseddeltalen += deltalen

434

compresseddeltalen += deltalen

431

return _deltainfo(dist, deltalen, (header, data), deltabase,

435

return _deltainfo(dist, deltalen, (header, data), deltabase,

432

chainbase, chainlen, compresseddeltalen)

436

chainbase, chainlen, compresseddeltalen)

433

437

434

def finddeltainfo(self, revinfo, fh):

438

def finddeltainfo(self, revinfo, fh):

435

"""Find an acceptable delta against a candidate revision

439

"""Find an acceptable delta against a candidate revision

436

440

437

revinfo: information about the revision (instance of _revisioninfo)

441

revinfo: information about the revision (instance of _revisioninfo)

438

fh: file handle to either the .i or the .d revlog file,

442

fh: file handle to either the .i or the .d revlog file,

439

depending on whether it is inlined or not

443

depending on whether it is inlined or not

440

444

441

Returns the first acceptable candidate revision, as ordered by

445

Returns the first acceptable candidate revision, as ordered by

442

_getcandidaterevs

446

_getcandidaterevs

443

"""

447

"""

444

cachedelta = revinfo.cachedelta

448

cachedelta = revinfo.cachedelta

445

p1 = revinfo.p1

449

p1 = revinfo.p1

446

p2 = revinfo.p2

450

p2 = revinfo.p2

447

revlog = self.revlog

451

revlog = self.revlog

448

452

449

deltainfo = None

453

deltainfo = None

450

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

454

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

451

nominateddeltas = []

455

nominateddeltas = []

452

for candidaterev in candidaterevs:

456

for candidaterev in candidaterevs:

453

# no delta for rawtext-changing revs (see "candelta" for why)

457

# no delta for rawtext-changing revs (see "candelta" for why)

454

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

458

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

455

continue

459

continue

456

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

460

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

457

if revlog._isgooddeltainfo(candidatedelta, revinfo):

461

if revlog._isgooddeltainfo(candidatedelta, revinfo):

458

nominateddeltas.append(candidatedelta)

462

nominateddeltas.append(candidatedelta)

459

if nominateddeltas:

463

if nominateddeltas:

460

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

464

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

461

break

465

break

462

466

463

return deltainfo

467

return deltainfo

464

468

465

@attr.s(slots=True, frozen=True)

469

@attr.s(slots=True, frozen=True)

466

class _revisioninfo(object):

470

class _revisioninfo(object):

467

"""Information about a revision that allows building its fulltext

471

"""Information about a revision that allows building its fulltext

468

node: expected hash of the revision

472

node: expected hash of the revision

469

p1, p2: parent revs of the revision

473

p1, p2: parent revs of the revision

470

btext: built text cache consisting of a one-element list

474

btext: built text cache consisting of a one-element list

471

cachedelta: (baserev, uncompressed_delta) or None

475

cachedelta: (baserev, uncompressed_delta) or None

472

flags: flags associated to the revision storage

476

flags: flags associated to the revision storage

473

477

474

One of btext[0] or cachedelta must be set.

478

One of btext[0] or cachedelta must be set.

475

"""

479

"""

476

node = attr.ib()

480

node = attr.ib()

477

p1 = attr.ib()

481

p1 = attr.ib()

478

p2 = attr.ib()

482

p2 = attr.ib()

479

btext = attr.ib()

483

btext = attr.ib()

480

textlen = attr.ib()

484

textlen = attr.ib()

481

cachedelta = attr.ib()

485

cachedelta = attr.ib()

482

flags = attr.ib()

486

flags = attr.ib()

483

487

484

# index v0:

488

# index v0:

485

# 4 bytes: offset

489

# 4 bytes: offset

486

# 4 bytes: compressed length

490

# 4 bytes: compressed length

487

# 4 bytes: base rev

491

# 4 bytes: base rev

488

# 4 bytes: link rev

492

# 4 bytes: link rev

489

# 20 bytes: parent 1 nodeid

493

# 20 bytes: parent 1 nodeid

490

# 20 bytes: parent 2 nodeid

494

# 20 bytes: parent 2 nodeid

491

# 20 bytes: nodeid

495

# 20 bytes: nodeid

492

indexformatv0 = struct.Struct(">4l20s20s20s")

496

indexformatv0 = struct.Struct(">4l20s20s20s")

493

indexformatv0_pack = indexformatv0.pack

497

indexformatv0_pack = indexformatv0.pack

494

indexformatv0_unpack = indexformatv0.unpack

498

indexformatv0_unpack = indexformatv0.unpack

495

499

496

class revlogoldio(object):

500

class revlogoldio(object):

497

def __init__(self):

501

def __init__(self):

498

self.size = indexformatv0.size

502

self.size = indexformatv0.size

499

503

500

def parseindex(self, data, inline):

504

def parseindex(self, data, inline):

501

s = self.size

505

s = self.size

502

index = []

506

index = []

503

nodemap = {nullid: nullrev}

507

nodemap = {nullid: nullrev}

504

n = off = 0

508

n = off = 0

505

l = len(data)

509

l = len(data)

506

while off + s <= l:

510

while off + s <= l:

507

cur = data[off:off + s]

511

cur = data[off:off + s]

508

off += s

512

off += s

509

e = indexformatv0_unpack(cur)

513

e = indexformatv0_unpack(cur)

510

# transform to revlogv1 format

514

# transform to revlogv1 format

511

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

515

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

512

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

516

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

513

index.append(e2)

517

index.append(e2)

514

nodemap[e[6]] = n

518

nodemap[e[6]] = n

515

n += 1

519

n += 1

516

520

517

# add the magic null revision at -1

521

# add the magic null revision at -1

518

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

522

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

519

523

520

return index, nodemap, None

524

return index, nodemap, None

521

525

522

def packentry(self, entry, node, version, rev):

526

def packentry(self, entry, node, version, rev):

523

if gettype(entry[0]):

527

if gettype(entry[0]):

524

raise RevlogError(_('index entry flags need revlog version 1'))

528

raise RevlogError(_('index entry flags need revlog version 1'))

525

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

529

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

526

node(entry[5]), node(entry[6]), entry[7])

530

node(entry[5]), node(entry[6]), entry[7])

527

return indexformatv0_pack(*e2)

531

return indexformatv0_pack(*e2)

528

532

529

# index ng:

533

# index ng:

530

# 6 bytes: offset

534

# 6 bytes: offset

531

# 2 bytes: flags

535

# 2 bytes: flags

532

# 4 bytes: compressed length

536

# 4 bytes: compressed length

533

# 4 bytes: uncompressed length

537

# 4 bytes: uncompressed length

534

# 4 bytes: base rev

538

# 4 bytes: base rev

535

# 4 bytes: link rev

539

# 4 bytes: link rev

536

# 4 bytes: parent 1 rev

540

# 4 bytes: parent 1 rev

537

# 4 bytes: parent 2 rev

541

# 4 bytes: parent 2 rev

538

# 32 bytes: nodeid

542

# 32 bytes: nodeid

539

indexformatng = struct.Struct(">Qiiiiii20s12x")

543

indexformatng = struct.Struct(">Qiiiiii20s12x")

540

indexformatng_pack = indexformatng.pack

544

indexformatng_pack = indexformatng.pack

541

versionformat = struct.Struct(">I")

545

versionformat = struct.Struct(">I")

542

versionformat_pack = versionformat.pack

546

versionformat_pack = versionformat.pack

543

versionformat_unpack = versionformat.unpack

547

versionformat_unpack = versionformat.unpack

544

548

545

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

549

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

546

# signed integer)

550

# signed integer)

547

_maxentrysize = 0x7fffffff

551

_maxentrysize = 0x7fffffff

548

552

549

class revlogio(object):

553

class revlogio(object):

550

def __init__(self):

554

def __init__(self):

551

self.size = indexformatng.size

555

self.size = indexformatng.size

552

556

553

def parseindex(self, data, inline):

557

def parseindex(self, data, inline):

554

# call the C implementation to parse the index data

558

# call the C implementation to parse the index data

555

index, cache = parsers.parse_index2(data, inline)

559

index, cache = parsers.parse_index2(data, inline)

556

return index, getattr(index, 'nodemap', None), cache

560

return index, getattr(index, 'nodemap', None), cache

557

561

558

def packentry(self, entry, node, version, rev):

562

def packentry(self, entry, node, version, rev):

559

p = indexformatng_pack(*entry)

563

p = indexformatng_pack(*entry)

560

if rev == 0:

564

if rev == 0:

561

p = versionformat_pack(version) + p[4:]

565

p = versionformat_pack(version) + p[4:]

562

return p

566

return p

563

567

564

class revlog(object):

568

class revlog(object):

565

"""

569

"""

566

the underlying revision storage object

570

the underlying revision storage object

567

571

568

A revlog consists of two parts, an index and the revision data.

572

A revlog consists of two parts, an index and the revision data.

569

573

570

The index is a file with a fixed record size containing

574

The index is a file with a fixed record size containing

571

information on each revision, including its nodeid (hash), the

575

information on each revision, including its nodeid (hash), the

572

nodeids of its parents, the position and offset of its data within

576

nodeids of its parents, the position and offset of its data within

573

the data file, and the revision it's based on. Finally, each entry

577

the data file, and the revision it's based on. Finally, each entry

574

contains a linkrev entry that can serve as a pointer to external

578

contains a linkrev entry that can serve as a pointer to external

575

data.

579

data.

576

580

577

The revision data itself is a linear collection of data chunks.

581

The revision data itself is a linear collection of data chunks.

578

Each chunk represents a revision and is usually represented as a

582

Each chunk represents a revision and is usually represented as a

579

delta against the previous chunk. To bound lookup time, runs of

583

delta against the previous chunk. To bound lookup time, runs of

580

deltas are limited to about 2 times the length of the original

584

deltas are limited to about 2 times the length of the original

581

version data. This makes retrieval of a version proportional to

585

version data. This makes retrieval of a version proportional to

582

its size, or O(1) relative to the number of revisions.

586

its size, or O(1) relative to the number of revisions.

583

587

584

Both pieces of the revlog are written to in an append-only

588

Both pieces of the revlog are written to in an append-only

585

fashion, which means we never need to rewrite a file to insert or

589

fashion, which means we never need to rewrite a file to insert or

586

remove data, and can use some simple techniques to avoid the need

590

remove data, and can use some simple techniques to avoid the need

587

for locking while reading.

591

for locking while reading.

588

592

589

If checkambig, indexfile is opened with checkambig=True at

593

If checkambig, indexfile is opened with checkambig=True at

590

writing, to avoid file stat ambiguity.

594

writing, to avoid file stat ambiguity.

591

595

592

If mmaplargeindex is True, and an mmapindexthreshold is set, the

596

If mmaplargeindex is True, and an mmapindexthreshold is set, the

593

index will be mmapped rather than read if it is larger than the

597

index will be mmapped rather than read if it is larger than the

594

configured threshold.

598

configured threshold.

595

599

596

If censorable is True, the revlog can have censored revisions.

600

If censorable is True, the revlog can have censored revisions.

597

"""

601

"""

598

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

602

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

599

mmaplargeindex=False, censorable=False):

603

mmaplargeindex=False, censorable=False):

600

"""

604

"""

601

create a revlog object

605

create a revlog object

602

606

603

opener is a function that abstracts the file opening operation

607

opener is a function that abstracts the file opening operation

604

and can be used to implement COW semantics or the like.

608

and can be used to implement COW semantics or the like.

605

"""

609

"""

606

self.indexfile = indexfile

610

self.indexfile = indexfile

607

self.datafile = datafile or (indexfile[:-2] + ".d")

611

self.datafile = datafile or (indexfile[:-2] + ".d")

608

self.opener = opener

612

self.opener = opener

609

# When True, indexfile is opened with checkambig=True at writing, to

613

# When True, indexfile is opened with checkambig=True at writing, to

610

# avoid file stat ambiguity.

614

# avoid file stat ambiguity.

611

self._checkambig = checkambig

615

self._checkambig = checkambig

612

self._censorable = censorable

616

self._censorable = censorable

613

# 3-tuple of (node, rev, text) for a raw revision.

617

# 3-tuple of (node, rev, text) for a raw revision.

614

self._cache = None

618

self._cache = None

615

# Maps rev to chain base rev.

619

# Maps rev to chain base rev.

616

self._chainbasecache = util.lrucachedict(100)

620

self._chainbasecache = util.lrucachedict(100)

617

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

621

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

618

self._chunkcache = (0, '')

622

self._chunkcache = (0, '')

619

# How much data to read and cache into the raw revlog data cache.

623

# How much data to read and cache into the raw revlog data cache.

620

self._chunkcachesize = 65536

624

self._chunkcachesize = 65536

621

self._maxchainlen = None

625

self._maxchainlen = None

622

self._aggressivemergedeltas = True

626

self._aggressivemergedeltas = True

623

self.index = []

627

self.index = []

624

# Mapping of partial identifiers to full nodes.

628

# Mapping of partial identifiers to full nodes.

625

self._pcache = {}

629

self._pcache = {}

626

# Mapping of revision integer to full node.

630

# Mapping of revision integer to full node.

627

self._nodecache = {nullid: nullrev}

631

self._nodecache = {nullid: nullrev}

628

self._nodepos = None

632

self._nodepos = None

629

self._compengine = 'zlib'

633

self._compengine = 'zlib'

630

self._maxdeltachainspan = -1

634

self._maxdeltachainspan = -1

631

self._withsparseread = False

635

self._withsparseread = False

632

self._srdensitythreshold = 0.50

636

self._srdensitythreshold = 0.50

633

self._srmingapsize = 262144

637

self._srmingapsize = 262144

634

638

635

mmapindexthreshold = None

639

mmapindexthreshold = None

636

v = REVLOG_DEFAULT_VERSION

640

v = REVLOG_DEFAULT_VERSION

637

opts = getattr(opener, 'options', None)

641

opts = getattr(opener, 'options', None)

638

if opts is not None:

642

if opts is not None:

639

if 'revlogv2' in opts:

643

if 'revlogv2' in opts:

640

# version 2 revlogs always use generaldelta.

644

# version 2 revlogs always use generaldelta.

641

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

645

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

642

elif 'revlogv1' in opts:

646

elif 'revlogv1' in opts:

643

if 'generaldelta' in opts:

647

if 'generaldelta' in opts:

644

v |= FLAG_GENERALDELTA

648

v |= FLAG_GENERALDELTA

645

else:

649

else:

646

v = 0

650

v = 0

647

if 'chunkcachesize' in opts:

651

if 'chunkcachesize' in opts:

648

self._chunkcachesize = opts['chunkcachesize']

652

self._chunkcachesize = opts['chunkcachesize']

649

if 'maxchainlen' in opts:

653

if 'maxchainlen' in opts:

650

self._maxchainlen = opts['maxchainlen']

654

self._maxchainlen = opts['maxchainlen']

651

if 'aggressivemergedeltas' in opts:

655

if 'aggressivemergedeltas' in opts:

652

self._aggressivemergedeltas = opts['aggressivemergedeltas']

656

self._aggressivemergedeltas = opts['aggressivemergedeltas']

653

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

657

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

654

if 'compengine' in opts:

658

if 'compengine' in opts:

655

self._compengine = opts['compengine']

659

self._compengine = opts['compengine']

656

if 'maxdeltachainspan' in opts:

660

if 'maxdeltachainspan' in opts:

657

self._maxdeltachainspan = opts['maxdeltachainspan']

661

self._maxdeltachainspan = opts['maxdeltachainspan']

658

if mmaplargeindex and 'mmapindexthreshold' in opts:

662

if mmaplargeindex and 'mmapindexthreshold' in opts:

659

mmapindexthreshold = opts['mmapindexthreshold']

663

mmapindexthreshold = opts['mmapindexthreshold']

660

self._withsparseread = bool(opts.get('with-sparse-read', False))

664

self._withsparseread = bool(opts.get('with-sparse-read', False))

661

if 'sparse-read-density-threshold' in opts:

665

if 'sparse-read-density-threshold' in opts:

662

self._srdensitythreshold = opts['sparse-read-density-threshold']

666

self._srdensitythreshold = opts['sparse-read-density-threshold']

663

if 'sparse-read-min-gap-size' in opts:

667

if 'sparse-read-min-gap-size' in opts:

664

self._srmingapsize = opts['sparse-read-min-gap-size']

668

self._srmingapsize = opts['sparse-read-min-gap-size']

665

669

666

if self._chunkcachesize <= 0:

670

if self._chunkcachesize <= 0:

667

raise RevlogError(_('revlog chunk cache size %r is not greater '

671

raise RevlogError(_('revlog chunk cache size %r is not greater '

668

'than 0') % self._chunkcachesize)

672

'than 0') % self._chunkcachesize)

669

elif self._chunkcachesize & (self._chunkcachesize - 1):

673

elif self._chunkcachesize & (self._chunkcachesize - 1):

670

raise RevlogError(_('revlog chunk cache size %r is not a power '

674

raise RevlogError(_('revlog chunk cache size %r is not a power '

671

'of 2') % self._chunkcachesize)

675

'of 2') % self._chunkcachesize)

672

676

673

indexdata = ''

677

indexdata = ''

674

self._initempty = True

678

self._initempty = True

675

try:

679

try:

676

with self._indexfp() as f:

680

with self._indexfp() as f:

677

if (mmapindexthreshold is not None and

681

if (mmapindexthreshold is not None and

678

self.opener.fstat(f).st_size >= mmapindexthreshold):

682

self.opener.fstat(f).st_size >= mmapindexthreshold):

679

indexdata = util.buffer(util.mmapread(f))

683

indexdata = util.buffer(util.mmapread(f))

680

else:

684

else:

681

indexdata = f.read()

685

indexdata = f.read()

682

if len(indexdata) > 0:

686

if len(indexdata) > 0:

683

v = versionformat_unpack(indexdata[:4])[0]

687

v = versionformat_unpack(indexdata[:4])[0]

684

self._initempty = False

688

self._initempty = False

685

except IOError as inst:

689

except IOError as inst:

686

if inst.errno != errno.ENOENT:

690

if inst.errno != errno.ENOENT:

687

raise

691

raise

688

692

689

self.version = v

693

self.version = v

690

self._inline = v & FLAG_INLINE_DATA

694

self._inline = v & FLAG_INLINE_DATA

691

self._generaldelta = v & FLAG_GENERALDELTA

695

self._generaldelta = v & FLAG_GENERALDELTA

692

flags = v & ~0xFFFF

696

flags = v & ~0xFFFF

693

fmt = v & 0xFFFF

697

fmt = v & 0xFFFF

694

if fmt == REVLOGV0:

698

if fmt == REVLOGV0:

695

if flags:

699

if flags:

696

raise RevlogError(_('unknown flags (%#04x) in version %d '

700

raise RevlogError(_('unknown flags (%#04x) in version %d '

697

'revlog %s') %

701

'revlog %s') %

698

(flags >> 16, fmt, self.indexfile))

702

(flags >> 16, fmt, self.indexfile))

699

elif fmt == REVLOGV1:

703

elif fmt == REVLOGV1:

700

if flags & ~REVLOGV1_FLAGS:

704

if flags & ~REVLOGV1_FLAGS:

701

raise RevlogError(_('unknown flags (%#04x) in version %d '

705

raise RevlogError(_('unknown flags (%#04x) in version %d '

702

'revlog %s') %

706

'revlog %s') %

703

(flags >> 16, fmt, self.indexfile))

707

(flags >> 16, fmt, self.indexfile))

704

elif fmt == REVLOGV2:

708

elif fmt == REVLOGV2:

705

if flags & ~REVLOGV2_FLAGS:

709

if flags & ~REVLOGV2_FLAGS:

706

raise RevlogError(_('unknown flags (%#04x) in version %d '

710

raise RevlogError(_('unknown flags (%#04x) in version %d '

707

'revlog %s') %

711

'revlog %s') %

708

(flags >> 16, fmt, self.indexfile))

712

(flags >> 16, fmt, self.indexfile))

709

else:

713

else:

710

raise RevlogError(_('unknown version (%d) in revlog %s') %

714

raise RevlogError(_('unknown version (%d) in revlog %s') %

711

(fmt, self.indexfile))

715

(fmt, self.indexfile))

712

716

713

self.storedeltachains = True

717

self.storedeltachains = True

714

718

715

self._io = revlogio()

719

self._io = revlogio()

716

if self.version == REVLOGV0:

720

if self.version == REVLOGV0:

717

self._io = revlogoldio()

721

self._io = revlogoldio()

718

try:

722

try:

719

d = self._io.parseindex(indexdata, self._inline)

723

d = self._io.parseindex(indexdata, self._inline)

720

except (ValueError, IndexError):

724

except (ValueError, IndexError):

721

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

725

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

722

self.index, nodemap, self._chunkcache = d

726

self.index, nodemap, self._chunkcache = d

723

if nodemap is not None:

727

if nodemap is not None:

724

self.nodemap = self._nodecache = nodemap

728

self.nodemap = self._nodecache = nodemap

725

if not self._chunkcache:

729

if not self._chunkcache:

726

self._chunkclear()

730

self._chunkclear()

727

# revnum -> (chain-length, sum-delta-length)

731

# revnum -> (chain-length, sum-delta-length)

728

self._chaininfocache = {}

732

self._chaininfocache = {}

729

# revlog header -> revlog compressor

733

# revlog header -> revlog compressor

730

self._decompressors = {}

734

self._decompressors = {}

731

735

732

@util.propertycache

736

@util.propertycache

733

def _compressor(self):

737

def _compressor(self):

734

return util.compengines[self._compengine].revlogcompressor()

738

return util.compengines[self._compengine].revlogcompressor()

735

739

736

def _indexfp(self, mode='r'):

740

def _indexfp(self, mode='r'):

737

"""file object for the revlog's index file"""

741

"""file object for the revlog's index file"""

738

args = {r'mode': mode}

742

args = {r'mode': mode}

739

if mode != 'r':

743

if mode != 'r':

740

args[r'checkambig'] = self._checkambig

744

args[r'checkambig'] = self._checkambig

741

if mode == 'w':

745

if mode == 'w':

742

args[r'atomictemp'] = True

746

args[r'atomictemp'] = True

743

return self.opener(self.indexfile, **args)

747

return self.opener(self.indexfile, **args)

744

748

745

def _datafp(self, mode='r'):

749

def _datafp(self, mode='r'):

746

"""file object for the revlog's data file"""

750

"""file object for the revlog's data file"""

747

return self.opener(self.datafile, mode=mode)

751

return self.opener(self.datafile, mode=mode)

748

752

749

@contextlib.contextmanager

753

@contextlib.contextmanager

750

def _datareadfp(self, existingfp=None):

754

def _datareadfp(self, existingfp=None):

751

"""file object suitable to read data"""

755

"""file object suitable to read data"""

752

if existingfp is not None:

756

if existingfp is not None:

753

yield existingfp

757

yield existingfp

754

else:

758

else:

755

if self._inline:

759

if self._inline:

756

func = self._indexfp

760

func = self._indexfp

757

else:

761

else:

758

func = self._datafp

762

func = self._datafp

759

with func() as fp:

763

with func() as fp:

760

yield fp

764

yield fp

761

765

762

def tip(self):

766

def tip(self):

763

return self.node(len(self.index) - 2)

767

return self.node(len(self.index) - 2)

764

def __contains__(self, rev):

768

def __contains__(self, rev):

765

return 0 <= rev < len(self)

769

return 0 <= rev < len(self)

766

def __len__(self):

770

def __len__(self):

767

return len(self.index) - 1

771

return len(self.index) - 1

768

def __iter__(self):

772

def __iter__(self):

769

return iter(xrange(len(self)))

773

return iter(xrange(len(self)))

770

def revs(self, start=0, stop=None):

774

def revs(self, start=0, stop=None):

771

"""iterate over all rev in this revlog (from start to stop)"""

775

"""iterate over all rev in this revlog (from start to stop)"""

772

step = 1

776

step = 1

773

if stop is not None:

777

if stop is not None:

774

if start > stop:

778

if start > stop:

775

step = -1

779

step = -1

776

stop += step

780

stop += step

777

else:

781

else:

778

stop = len(self)

782

stop = len(self)

779

return xrange(start, stop, step)

783

return xrange(start, stop, step)

780

784

781

@util.propertycache

785

@util.propertycache

782

def nodemap(self):

786

def nodemap(self):

783

self.rev(self.node(0))

787

self.rev(self.node(0))

784

return self._nodecache

788

return self._nodecache

785

789

786

def hasnode(self, node):

790

def hasnode(self, node):

787

try:

791

try:

788

self.rev(node)

792

self.rev(node)

789

return True

793

return True

790

except KeyError:

794

except KeyError:

791

return False

795

return False

792

796

793

def candelta(self, baserev, rev):

797

def candelta(self, baserev, rev):

794

"""whether two revisions (baserev, rev) can be delta-ed or not"""

798

"""whether two revisions (baserev, rev) can be delta-ed or not"""

795

# Disable delta if either rev requires a content-changing flag

799

# Disable delta if either rev requires a content-changing flag

796

# processor (ex. LFS). This is because such flag processor can alter

800

# processor (ex. LFS). This is because such flag processor can alter

797

# the rawtext content that the delta will be based on, and two clients

801

# the rawtext content that the delta will be based on, and two clients

798

# could have a same revlog node with different flags (i.e. different

802

# could have a same revlog node with different flags (i.e. different

799

# rawtext contents) and the delta could be incompatible.

803

# rawtext contents) and the delta could be incompatible.

800

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

804

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

801

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

805

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

802

return False

806

return False

803

return True

807

return True

804

808

805

def clearcaches(self):

809

def clearcaches(self):

806

self._cache = None

810

self._cache = None

807

self._chainbasecache.clear()

811

self._chainbasecache.clear()

808

self._chunkcache = (0, '')

812

self._chunkcache = (0, '')

809

self._pcache = {}

813

self._pcache = {}

810

814

811

try:

815

try:

812

self._nodecache.clearcaches()

816

self._nodecache.clearcaches()

813

except AttributeError:

817

except AttributeError:

814

self._nodecache = {nullid: nullrev}

818

self._nodecache = {nullid: nullrev}

815

self._nodepos = None

819

self._nodepos = None

816

820

817

def rev(self, node):

821

def rev(self, node):

818

try:

822

try:

819

return self._nodecache[node]

823

return self._nodecache[node]

820

except TypeError:

824

except TypeError:

821

raise

825

raise

822

except RevlogError:

826

except RevlogError:

823

# parsers.c radix tree lookup failed

827

# parsers.c radix tree lookup failed

824

if node == wdirid or node in wdirfilenodeids:

828

if node == wdirid or node in wdirfilenodeids:

825

raise error.WdirUnsupported

829

raise error.WdirUnsupported

826

raise LookupError(node, self.indexfile, _('no node'))

830

raise LookupError(node, self.indexfile, _('no node'))

827

except KeyError:

831

except KeyError:

828

# pure python cache lookup failed

832

# pure python cache lookup failed

829

n = self._nodecache

833

n = self._nodecache

830

i = self.index

834

i = self.index

831

p = self._nodepos

835

p = self._nodepos

832

if p is None:

836

if p is None:

833

p = len(i) - 2

837

p = len(i) - 2

834

else:

838

else:

835

assert p < len(i)

839

assert p < len(i)

836

for r in xrange(p, -1, -1):

840

for r in xrange(p, -1, -1):

837

v = i[r][7]

841

v = i[r][7]

838

n[v] = r

842

n[v] = r

839

if v == node:

843

if v == node:

840

self._nodepos = r - 1

844

self._nodepos = r - 1

841

return r

845

return r

842

if node == wdirid or node in wdirfilenodeids:

846

if node == wdirid or node in wdirfilenodeids:

843

raise error.WdirUnsupported

847

raise error.WdirUnsupported

844

raise LookupError(node, self.indexfile, _('no node'))

848

raise LookupError(node, self.indexfile, _('no node'))

845

849

846

# Accessors for index entries.

850

# Accessors for index entries.

847

851

848

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

852

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

849

# are flags.

853

# are flags.

850

def start(self, rev):

854

def start(self, rev):

851

return int(self.index[rev][0] >> 16)

855

return int(self.index[rev][0] >> 16)

852

856

853

def flags(self, rev):

857

def flags(self, rev):

854

return self.index[rev][0] & 0xFFFF

858

return self.index[rev][0] & 0xFFFF

855

859

856

def length(self, rev):

860

def length(self, rev):

857

return self.index[rev][1]

861

return self.index[rev][1]

858

862

859

def rawsize(self, rev):

863

def rawsize(self, rev):

860

"""return the length of the uncompressed text for a given revision"""

864

"""return the length of the uncompressed text for a given revision"""

861

l = self.index[rev][2]

865

l = self.index[rev][2]

862

if l >= 0:

866

if l >= 0:

863

return l

867

return l

864

868

865

t = self.revision(rev, raw=True)

869

t = self.revision(rev, raw=True)

866

return len(t)

870

return len(t)

867

871

868

def size(self, rev):

872

def size(self, rev):

869

"""length of non-raw text (processed by a "read" flag processor)"""

873

"""length of non-raw text (processed by a "read" flag processor)"""

870

# fast path: if no "read" flag processor could change the content,

874

# fast path: if no "read" flag processor could change the content,

871

# size is rawsize. note: ELLIPSIS is known to not change the content.

875

# size is rawsize. note: ELLIPSIS is known to not change the content.

872

flags = self.flags(rev)

876

flags = self.flags(rev)

873

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

877

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

874

return self.rawsize(rev)

878

return self.rawsize(rev)

875

879

876

return len(self.revision(rev, raw=False))

880

return len(self.revision(rev, raw=False))

877

881

878

def chainbase(self, rev):

882

def chainbase(self, rev):

879

base = self._chainbasecache.get(rev)

883

base = self._chainbasecache.get(rev)

880

if base is not None:

884

if base is not None:

881

return base

885

return base

882

886

883

index = self.index

887

index = self.index

884

iterrev = rev

888

iterrev = rev

885

base = index[iterrev][3]

889

base = index[iterrev][3]

886

while base != iterrev:

890

while base != iterrev:

887

iterrev = base

891

iterrev = base

888

base = index[iterrev][3]

892

base = index[iterrev][3]

889

893

890

self._chainbasecache[rev] = base

894

self._chainbasecache[rev] = base

891

return base

895

return base

892

896

893

def linkrev(self, rev):

897

def linkrev(self, rev):

894

return self.index[rev][4]

898

return self.index[rev][4]

895

899

896

def parentrevs(self, rev):

900

def parentrevs(self, rev):

897

try:

901

try:

898

entry = self.index[rev]

902

entry = self.index[rev]

899

except IndexError:

903

except IndexError:

900

if rev == wdirrev:

904

if rev == wdirrev:

901

raise error.WdirUnsupported

905

raise error.WdirUnsupported

902

raise

906

raise

903

907

904

return entry[5], entry[6]

908

return entry[5], entry[6]

905

909

906

def node(self, rev):

910

def node(self, rev):

907

try:

911

try:

908

return self.index[rev][7]

912

return self.index[rev][7]

909

except IndexError:

913

except IndexError:

910

if rev == wdirrev:

914

if rev == wdirrev:

911

raise error.WdirUnsupported

915

raise error.WdirUnsupported

912

raise

916

raise

913

917

914

# Derived from index values.

918

# Derived from index values.

915

919

916

def end(self, rev):

920

def end(self, rev):

917

return self.start(rev) + self.length(rev)

921

return self.start(rev) + self.length(rev)

918

922

919

def parents(self, node):

923

def parents(self, node):

920

i = self.index

924

i = self.index

921

d = i[self.rev(node)]

925

d = i[self.rev(node)]

922

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

926

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

923

927

924

def chainlen(self, rev):

928

def chainlen(self, rev):

925

return self._chaininfo(rev)[0]

929

return self._chaininfo(rev)[0]

926

930

927

def _chaininfo(self, rev):

931

def _chaininfo(self, rev):

928

chaininfocache = self._chaininfocache

932

chaininfocache = self._chaininfocache

929

if rev in chaininfocache:

933

if rev in chaininfocache:

930

return chaininfocache[rev]

934

return chaininfocache[rev]

931

index = self.index

935

index = self.index

932

generaldelta = self._generaldelta

936

generaldelta = self._generaldelta

933

iterrev = rev

937

iterrev = rev

934

e = index[iterrev]

938

e = index[iterrev]

935

clen = 0

939

clen = 0

936

compresseddeltalen = 0

940

compresseddeltalen = 0

937

while iterrev != e[3]:

941

while iterrev != e[3]:

938

clen += 1

942

clen += 1

939

compresseddeltalen += e[1]

943

compresseddeltalen += e[1]

940

if generaldelta:

944

if generaldelta:

941

iterrev = e[3]

945

iterrev = e[3]

942

else:

946

else:

943

iterrev -= 1

947

iterrev -= 1

944

if iterrev in chaininfocache:

948

if iterrev in chaininfocache:

945

t = chaininfocache[iterrev]

949

t = chaininfocache[iterrev]

946

clen += t[0]

950

clen += t[0]

947

compresseddeltalen += t[1]

951

compresseddeltalen += t[1]

948

break

952

break

949

e = index[iterrev]

953

e = index[iterrev]

950

else:

954

else:

951

# Add text length of base since decompressing that also takes

955

# Add text length of base since decompressing that also takes

952

# work. For cache hits the length is already included.

956

# work. For cache hits the length is already included.

953

compresseddeltalen += e[1]

957

compresseddeltalen += e[1]

954

r = (clen, compresseddeltalen)

958

r = (clen, compresseddeltalen)

955

chaininfocache[rev] = r

959

chaininfocache[rev] = r

956

return r

960

return r

957

961

958

def _deltachain(self, rev, stoprev=None):

962

def _deltachain(self, rev, stoprev=None):

959

"""Obtain the delta chain for a revision.

963

"""Obtain the delta chain for a revision.

960

964

961

``stoprev`` specifies a revision to stop at. If not specified, we

965

``stoprev`` specifies a revision to stop at. If not specified, we

962

stop at the base of the chain.

966

stop at the base of the chain.

963

967

964

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

968

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

965

revs in ascending order and ``stopped`` is a bool indicating whether

969

revs in ascending order and ``stopped`` is a bool indicating whether

966

``stoprev`` was hit.

970

``stoprev`` was hit.

967

"""

971

"""

968

# Try C implementation.

972

# Try C implementation.

969

try:

973

try:

970

return self.index.deltachain(rev, stoprev, self._generaldelta)

974

return self.index.deltachain(rev, stoprev, self._generaldelta)

971

except AttributeError:

975

except AttributeError:

972

pass

976

pass

973

977

974

chain = []

978

chain = []

975

979

976

# Alias to prevent attribute lookup in tight loop.

980

# Alias to prevent attribute lookup in tight loop.

977

index = self.index

981

index = self.index

978

generaldelta = self._generaldelta

982

generaldelta = self._generaldelta

979

983

980

iterrev = rev

984

iterrev = rev

981

e = index[iterrev]

985

e = index[iterrev]

982

while iterrev != e[3] and iterrev != stoprev:

986

while iterrev != e[3] and iterrev != stoprev:

983

chain.append(iterrev)

987

chain.append(iterrev)

984

if generaldelta:

988

if generaldelta:

985

iterrev = e[3]

989

iterrev = e[3]

986

else:

990

else:

987

iterrev -= 1

991

iterrev -= 1

988

e = index[iterrev]

992

e = index[iterrev]

989

993

990

if iterrev == stoprev:

994

if iterrev == stoprev:

991

stopped = True

995

stopped = True

992

else:

996

else:

993

chain.append(iterrev)

997

chain.append(iterrev)

994

stopped = False

998

stopped = False

995

999

996

chain.reverse()

1000

chain.reverse()

997

return chain, stopped

1001

return chain, stopped

998

1002

999

def ancestors(self, revs, stoprev=0, inclusive=False):

1003

def ancestors(self, revs, stoprev=0, inclusive=False):

1000

"""Generate the ancestors of 'revs' in reverse topological order.

1004

"""Generate the ancestors of 'revs' in reverse topological order.

1001

Does not generate revs lower than stoprev.

1005

Does not generate revs lower than stoprev.

1002

1006

1003

See the documentation for ancestor.lazyancestors for more details."""

1007

See the documentation for ancestor.lazyancestors for more details."""

1004

1008

1005

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1009

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1006

inclusive=inclusive)

1010

inclusive=inclusive)

1007

1011

1008

def descendants(self, revs):

1012

def descendants(self, revs):

1009

"""Generate the descendants of 'revs' in revision order.

1013

"""Generate the descendants of 'revs' in revision order.

1010

1014

1011

Yield a sequence of revision numbers starting with a child of

1015

Yield a sequence of revision numbers starting with a child of

1012

some rev in revs, i.e., each revision is *not* considered a

1016

some rev in revs, i.e., each revision is *not* considered a

1013

descendant of itself. Results are ordered by revision number (a

1017

descendant of itself. Results are ordered by revision number (a

1014

topological sort)."""

1018

topological sort)."""

1015

first = min(revs)

1019

first = min(revs)

1016

if first == nullrev:

1020

if first == nullrev:

1017

for i in self:

1021

for i in self:

1018

yield i

1022

yield i

1019

return

1023

return

1020

1024

1021

seen = set(revs)

1025

seen = set(revs)

1022

for i in self.revs(start=first + 1):

1026

for i in self.revs(start=first + 1):

1023

for x in self.parentrevs(i):

1027

for x in self.parentrevs(i):

1024

if x != nullrev and x in seen:

1028

if x != nullrev and x in seen:

1025

seen.add(i)

1029

seen.add(i)

1026

yield i

1030

yield i

1027

break

1031

break

1028

1032

1029

def findcommonmissing(self, common=None, heads=None):

1033

def findcommonmissing(self, common=None, heads=None):

1030

"""Return a tuple of the ancestors of common and the ancestors of heads

1034

"""Return a tuple of the ancestors of common and the ancestors of heads

1031

that are not ancestors of common. In revset terminology, we return the

1035

that are not ancestors of common. In revset terminology, we return the

1032

tuple:

1036

tuple:

1033

1037

1034

::common, (::heads) - (::common)

1038

::common, (::heads) - (::common)

1035

1039

1036

The list is sorted by revision number, meaning it is

1040

The list is sorted by revision number, meaning it is

1037

topologically sorted.

1041

topologically sorted.

1038

1042

1039

'heads' and 'common' are both lists of node IDs. If heads is

1043

'heads' and 'common' are both lists of node IDs. If heads is

1040

not supplied, uses all of the revlog's heads. If common is not

1044

not supplied, uses all of the revlog's heads. If common is not

1041

supplied, uses nullid."""

1045

supplied, uses nullid."""

1042

if common is None:

1046

if common is None:

1043

common = [nullid]

1047

common = [nullid]

1044

if heads is None:

1048

if heads is None:

1045

heads = self.heads()

1049

heads = self.heads()

1046

1050

1047

common = [self.rev(n) for n in common]

1051

common = [self.rev(n) for n in common]

1048

heads = [self.rev(n) for n in heads]

1052

heads = [self.rev(n) for n in heads]

1049

1053

1050

# we want the ancestors, but inclusive

1054

# we want the ancestors, but inclusive

1051

class lazyset(object):

1055

class lazyset(object):

1052

def __init__(self, lazyvalues):

1056

def __init__(self, lazyvalues):

1053

self.addedvalues = set()

1057

self.addedvalues = set()

1054

self.lazyvalues = lazyvalues

1058

self.lazyvalues = lazyvalues

1055

1059

1056

def __contains__(self, value):

1060

def __contains__(self, value):

1057

return value in self.addedvalues or value in self.lazyvalues

1061

return value in self.addedvalues or value in self.lazyvalues

1058

1062

1059

def __iter__(self):

1063

def __iter__(self):

1060

added = self.addedvalues

1064

added = self.addedvalues

1061

for r in added:

1065

for r in added:

1062

yield r

1066

yield r

1063

for r in self.lazyvalues:

1067

for r in self.lazyvalues:

1064

if not r in added:

1068

if not r in added:

1065

yield r

1069

yield r

1066

1070

1067

def add(self, value):

1071

def add(self, value):

1068

self.addedvalues.add(value)

1072

self.addedvalues.add(value)

1069

1073

1070

def update(self, values):

1074

def update(self, values):

1071

self.addedvalues.update(values)

1075

self.addedvalues.update(values)

1072

1076

1073

has = lazyset(self.ancestors(common))

1077

has = lazyset(self.ancestors(common))

1074

has.add(nullrev)

1078

has.add(nullrev)

1075

has.update(common)

1079

has.update(common)

1076

1080

1077

# take all ancestors from heads that aren't in has

1081

# take all ancestors from heads that aren't in has

1078

missing = set()

1082

missing = set()

1079

visit = collections.deque(r for r in heads if r not in has)

1083

visit = collections.deque(r for r in heads if r not in has)

1080

while visit:

1084

while visit:

1081

r = visit.popleft()

1085

r = visit.popleft()

1082

if r in missing:

1086

if r in missing:

1083

continue

1087

continue

1084

else:

1088

else:

1085

missing.add(r)

1089

missing.add(r)

1086

for p in self.parentrevs(r):

1090

for p in self.parentrevs(r):

1087

if p not in has:

1091

if p not in has:

1088

visit.append(p)

1092

visit.append(p)

1089

missing = list(missing)

1093

missing = list(missing)

1090

missing.sort()

1094

missing.sort()

1091

return has, [self.node(miss) for miss in missing]

1095

return has, [self.node(miss) for miss in missing]

1092

1096

1093

def incrementalmissingrevs(self, common=None):

1097

def incrementalmissingrevs(self, common=None):

1094

"""Return an object that can be used to incrementally compute the

1098

"""Return an object that can be used to incrementally compute the

1095

revision numbers of the ancestors of arbitrary sets that are not

1099

revision numbers of the ancestors of arbitrary sets that are not

1096

ancestors of common. This is an ancestor.incrementalmissingancestors

1100

ancestors of common. This is an ancestor.incrementalmissingancestors

1097

object.

1101

object.

1098

1102

1099

'common' is a list of revision numbers. If common is not supplied, uses

1103

'common' is a list of revision numbers. If common is not supplied, uses

1100

nullrev.

1104

nullrev.

1101

"""

1105

"""

1102

if common is None:

1106

if common is None:

1103

common = [nullrev]

1107

common = [nullrev]

1104

1108

1105

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1109

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1106

1110

1107

def findmissingrevs(self, common=None, heads=None):

1111

def findmissingrevs(self, common=None, heads=None):

1108

"""Return the revision numbers of the ancestors of heads that

1112

"""Return the revision numbers of the ancestors of heads that

1109

are not ancestors of common.

1113

are not ancestors of common.

1110

1114

1111

More specifically, return a list of revision numbers corresponding to

1115

More specifically, return a list of revision numbers corresponding to

1112

nodes N such that every N satisfies the following constraints:

1116

nodes N such that every N satisfies the following constraints:

1113

1117

1114

1. N is an ancestor of some node in 'heads'

1118

1. N is an ancestor of some node in 'heads'

1115

2. N is not an ancestor of any node in 'common'

1119

2. N is not an ancestor of any node in 'common'

1116

1120

1117

The list is sorted by revision number, meaning it is

1121

The list is sorted by revision number, meaning it is

1118

topologically sorted.

1122

topologically sorted.

1119

1123

1120

'heads' and 'common' are both lists of revision numbers. If heads is

1124

'heads' and 'common' are both lists of revision numbers. If heads is

1121

not supplied, uses all of the revlog's heads. If common is not

1125

not supplied, uses all of the revlog's heads. If common is not

1122

supplied, uses nullid."""

1126

supplied, uses nullid."""

1123

if common is None:

1127

if common is None:

1124

common = [nullrev]

1128

common = [nullrev]

1125

if heads is None:

1129

if heads is None:

1126

heads = self.headrevs()

1130

heads = self.headrevs()

1127

1131

1128

inc = self.incrementalmissingrevs(common=common)

1132

inc = self.incrementalmissingrevs(common=common)

1129

return inc.missingancestors(heads)

1133

return inc.missingancestors(heads)

1130

1134

1131

def findmissing(self, common=None, heads=None):

1135

def findmissing(self, common=None, heads=None):

1132

"""Return the ancestors of heads that are not ancestors of common.

1136

"""Return the ancestors of heads that are not ancestors of common.

1133

1137

1134

More specifically, return a list of nodes N such that every N

1138

More specifically, return a list of nodes N such that every N

1135

satisfies the following constraints:

1139

satisfies the following constraints:

1136

1140

1137

1. N is an ancestor of some node in 'heads'

1141

1. N is an ancestor of some node in 'heads'

1138

2. N is not an ancestor of any node in 'common'

1142

2. N is not an ancestor of any node in 'common'

1139

1143

1140

The list is sorted by revision number, meaning it is

1144

The list is sorted by revision number, meaning it is

1141

topologically sorted.

1145

topologically sorted.

1142

1146

1143

'heads' and 'common' are both lists of node IDs. If heads is

1147

'heads' and 'common' are both lists of node IDs. If heads is

1144

not supplied, uses all of the revlog's heads. If common is not

1148

not supplied, uses all of the revlog's heads. If common is not

1145

supplied, uses nullid."""

1149

supplied, uses nullid."""

1146

if common is None:

1150

if common is None:

1147

common = [nullid]

1151

common = [nullid]

1148

if heads is None:

1152

if heads is None:

1149

heads = self.heads()

1153

heads = self.heads()

1150

1154

1151

common = [self.rev(n) for n in common]

1155

common = [self.rev(n) for n in common]

1152

heads = [self.rev(n) for n in heads]

1156

heads = [self.rev(n) for n in heads]

1153

1157

1154

inc = self.incrementalmissingrevs(common=common)

1158

inc = self.incrementalmissingrevs(common=common)

1155

return [self.node(r) for r in inc.missingancestors(heads)]

1159

return [self.node(r) for r in inc.missingancestors(heads)]

1156

1160

1157

def nodesbetween(self, roots=None, heads=None):

1161

def nodesbetween(self, roots=None, heads=None):

1158

"""Return a topological path from 'roots' to 'heads'.

1162

"""Return a topological path from 'roots' to 'heads'.

1159

1163

1160

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1164

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1161

topologically sorted list of all nodes N that satisfy both of

1165

topologically sorted list of all nodes N that satisfy both of

1162

these constraints:

1166

these constraints:

1163

1167

1164

1. N is a descendant of some node in 'roots'

1168

1. N is a descendant of some node in 'roots'

1165

2. N is an ancestor of some node in 'heads'

1169

2. N is an ancestor of some node in 'heads'

1166

1170

1167

Every node is considered to be both a descendant and an ancestor

1171

Every node is considered to be both a descendant and an ancestor

1168

of itself, so every reachable node in 'roots' and 'heads' will be

1172

of itself, so every reachable node in 'roots' and 'heads' will be

1169

included in 'nodes'.

1173

included in 'nodes'.

1170

1174

1171

'outroots' is the list of reachable nodes in 'roots', i.e., the

1175

'outroots' is the list of reachable nodes in 'roots', i.e., the

1172

subset of 'roots' that is returned in 'nodes'. Likewise,

1176

subset of 'roots' that is returned in 'nodes'. Likewise,

1173

'outheads' is the subset of 'heads' that is also in 'nodes'.

1177

'outheads' is the subset of 'heads' that is also in 'nodes'.

1174

1178

1175

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1179

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1176

unspecified, uses nullid as the only root. If 'heads' is

1180

unspecified, uses nullid as the only root. If 'heads' is

1177

unspecified, uses list of all of the revlog's heads."""

1181

unspecified, uses list of all of the revlog's heads."""

1178

nonodes = ([], [], [])

1182

nonodes = ([], [], [])

1179

if roots is not None:

1183

if roots is not None:

1180

roots = list(roots)

1184

roots = list(roots)

1181

if not roots:

1185

if not roots:

1182

return nonodes

1186

return nonodes

1183

lowestrev = min([self.rev(n) for n in roots])

1187

lowestrev = min([self.rev(n) for n in roots])

1184

else:

1188

else:

1185

roots = [nullid] # Everybody's a descendant of nullid

1189

roots = [nullid] # Everybody's a descendant of nullid

1186

lowestrev = nullrev

1190

lowestrev = nullrev

1187

if (lowestrev == nullrev) and (heads is None):

1191

if (lowestrev == nullrev) and (heads is None):

1188

# We want _all_ the nodes!

1192

# We want _all_ the nodes!

1189

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1193

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1190

if heads is None:

1194

if heads is None:

1191

# All nodes are ancestors, so the latest ancestor is the last

1195

# All nodes are ancestors, so the latest ancestor is the last

1192

# node.

1196

# node.

1193

highestrev = len(self) - 1

1197

highestrev = len(self) - 1

1194

# Set ancestors to None to signal that every node is an ancestor.

1198

# Set ancestors to None to signal that every node is an ancestor.

1195

ancestors = None

1199

ancestors = None

1196

# Set heads to an empty dictionary for later discovery of heads

1200

# Set heads to an empty dictionary for later discovery of heads

1197

heads = {}

1201

heads = {}

1198

else:

1202

else:

1199

heads = list(heads)

1203

heads = list(heads)

1200

if not heads:

1204

if not heads:

1201

return nonodes

1205

return nonodes

1202

ancestors = set()

1206

ancestors = set()

1203

# Turn heads into a dictionary so we can remove 'fake' heads.

1207

# Turn heads into a dictionary so we can remove 'fake' heads.

1204

# Also, later we will be using it to filter out the heads we can't

1208

# Also, later we will be using it to filter out the heads we can't

1205

# find from roots.

1209

# find from roots.

1206

heads = dict.fromkeys(heads, False)

1210

heads = dict.fromkeys(heads, False)

1207

# Start at the top and keep marking parents until we're done.

1211

# Start at the top and keep marking parents until we're done.

1208

nodestotag = set(heads)

1212

nodestotag = set(heads)

1209

# Remember where the top was so we can use it as a limit later.

1213

# Remember where the top was so we can use it as a limit later.

1210

highestrev = max([self.rev(n) for n in nodestotag])

1214

highestrev = max([self.rev(n) for n in nodestotag])

1211

while nodestotag:

1215

while nodestotag:

1212

# grab a node to tag

1216

# grab a node to tag

1213

n = nodestotag.pop()

1217

n = nodestotag.pop()

1214

# Never tag nullid

1218

# Never tag nullid

1215

if n == nullid:

1219

if n == nullid:

1216

continue

1220

continue

1217

# A node's revision number represents its place in a

1221

# A node's revision number represents its place in a

1218

# topologically sorted list of nodes.

1222

# topologically sorted list of nodes.

1219

r = self.rev(n)

1223

r = self.rev(n)

1220

if r >= lowestrev:

1224

if r >= lowestrev:

1221

if n not in ancestors:

1225

if n not in ancestors:

1222

# If we are possibly a descendant of one of the roots

1226

# If we are possibly a descendant of one of the roots

1223

# and we haven't already been marked as an ancestor

1227

# and we haven't already been marked as an ancestor

1224

ancestors.add(n) # Mark as ancestor

1228

ancestors.add(n) # Mark as ancestor

1225

# Add non-nullid parents to list of nodes to tag.

1229

# Add non-nullid parents to list of nodes to tag.

1226

nodestotag.update([p for p in self.parents(n) if

1230

nodestotag.update([p for p in self.parents(n) if

1227

p != nullid])

1231

p != nullid])

1228

elif n in heads: # We've seen it before, is it a fake head?

1232

elif n in heads: # We've seen it before, is it a fake head?

1229

# So it is, real heads should not be the ancestors of

1233

# So it is, real heads should not be the ancestors of

1230

# any other heads.

1234

# any other heads.

1231

heads.pop(n)

1235

heads.pop(n)

1232

if not ancestors:

1236

if not ancestors:

1233

return nonodes

1237

return nonodes

1234

# Now that we have our set of ancestors, we want to remove any

1238

# Now that we have our set of ancestors, we want to remove any

1235

# roots that are not ancestors.

1239

# roots that are not ancestors.

1236

1240

1237

# If one of the roots was nullid, everything is included anyway.

1241

# If one of the roots was nullid, everything is included anyway.

1238

if lowestrev > nullrev:

1242

if lowestrev > nullrev:

1239

# But, since we weren't, let's recompute the lowest rev to not

1243

# But, since we weren't, let's recompute the lowest rev to not

1240

# include roots that aren't ancestors.

1244

# include roots that aren't ancestors.

1241

1245

1242

# Filter out roots that aren't ancestors of heads

1246

# Filter out roots that aren't ancestors of heads

1243

roots = [root for root in roots if root in ancestors]

1247

roots = [root for root in roots if root in ancestors]

1244

# Recompute the lowest revision

1248

# Recompute the lowest revision

1245

if roots:

1249

if roots:

1246

lowestrev = min([self.rev(root) for root in roots])

1250

lowestrev = min([self.rev(root) for root in roots])

1247

else:

1251

else:

1248

# No more roots? Return empty list

1252

# No more roots? Return empty list

1249

return nonodes

1253

return nonodes

1250

else:

1254

else:

1251

# We are descending from nullid, and don't need to care about

1255

# We are descending from nullid, and don't need to care about

1252

# any other roots.

1256

# any other roots.

1253

lowestrev = nullrev

1257

lowestrev = nullrev

1254

roots = [nullid]

1258

roots = [nullid]

1255

# Transform our roots list into a set.

1259

# Transform our roots list into a set.

1256

descendants = set(roots)

1260

descendants = set(roots)

1257

# Also, keep the original roots so we can filter out roots that aren't

1261

# Also, keep the original roots so we can filter out roots that aren't

1258

# 'real' roots (i.e. are descended from other roots).

1262

# 'real' roots (i.e. are descended from other roots).

1259

roots = descendants.copy()

1263

roots = descendants.copy()

1260

# Our topologically sorted list of output nodes.

1264

# Our topologically sorted list of output nodes.

1261

orderedout = []

1265

orderedout = []

1262

# Don't start at nullid since we don't want nullid in our output list,

1266

# Don't start at nullid since we don't want nullid in our output list,

1263

# and if nullid shows up in descendants, empty parents will look like

1267

# and if nullid shows up in descendants, empty parents will look like

1264

# they're descendants.

1268

# they're descendants.

1265

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1269

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1266

n = self.node(r)

1270

n = self.node(r)

1267

isdescendant = False

1271

isdescendant = False

1268

if lowestrev == nullrev: # Everybody is a descendant of nullid

1272

if lowestrev == nullrev: # Everybody is a descendant of nullid

1269

isdescendant = True

1273

isdescendant = True

1270

elif n in descendants:

1274

elif n in descendants:

1271

# n is already a descendant

1275

# n is already a descendant

1272

isdescendant = True

1276

isdescendant = True

1273

# This check only needs to be done here because all the roots

1277

# This check only needs to be done here because all the roots

1274

# will start being marked is descendants before the loop.

1278

# will start being marked is descendants before the loop.

1275

if n in roots:

1279

if n in roots:

1276

# If n was a root, check if it's a 'real' root.

1280

# If n was a root, check if it's a 'real' root.

1277

p = tuple(self.parents(n))

1281

p = tuple(self.parents(n))

1278

# If any of its parents are descendants, it's not a root.

1282

# If any of its parents are descendants, it's not a root.

1279

if (p[0] in descendants) or (p[1] in descendants):

1283

if (p[0] in descendants) or (p[1] in descendants):

1280

roots.remove(n)

1284

roots.remove(n)

1281

else:

1285

else:

1282

p = tuple(self.parents(n))

1286

p = tuple(self.parents(n))

1283

# A node is a descendant if either of its parents are

1287

# A node is a descendant if either of its parents are

1284

# descendants. (We seeded the dependents list with the roots

1288

# descendants. (We seeded the dependents list with the roots

1285

# up there, remember?)

1289

# up there, remember?)

1286

if (p[0] in descendants) or (p[1] in descendants):

1290

if (p[0] in descendants) or (p[1] in descendants):

1287

descendants.add(n)

1291

descendants.add(n)

1288

isdescendant = True

1292

isdescendant = True

1289

if isdescendant and ((ancestors is None) or (n in ancestors)):

1293

if isdescendant and ((ancestors is None) or (n in ancestors)):

1290

# Only include nodes that are both descendants and ancestors.

1294

# Only include nodes that are both descendants and ancestors.

1291

orderedout.append(n)

1295

orderedout.append(n)

1292

if (ancestors is not None) and (n in heads):

1296

if (ancestors is not None) and (n in heads):

1293

# We're trying to figure out which heads are reachable

1297

# We're trying to figure out which heads are reachable

1294

# from roots.

1298

# from roots.

1295

# Mark this head as having been reached

1299

# Mark this head as having been reached

1296

heads[n] = True

1300

heads[n] = True

1297

elif ancestors is None:

1301

elif ancestors is None:

1298

# Otherwise, we're trying to discover the heads.

1302

# Otherwise, we're trying to discover the heads.

1299

# Assume this is a head because if it isn't, the next step

1303

# Assume this is a head because if it isn't, the next step

1300

# will eventually remove it.

1304

# will eventually remove it.

1301

heads[n] = True

1305

heads[n] = True

1302

# But, obviously its parents aren't.

1306

# But, obviously its parents aren't.

1303

for p in self.parents(n):

1307

for p in self.parents(n):

1304

heads.pop(p, None)

1308

heads.pop(p, None)

1305

heads = [head for head, flag in heads.iteritems() if flag]

1309

heads = [head for head, flag in heads.iteritems() if flag]

1306

roots = list(roots)

1310

roots = list(roots)

1307

assert orderedout

1311

assert orderedout

1308

assert roots

1312

assert roots

1309

assert heads

1313

assert heads

1310

return (orderedout, roots, heads)

1314

return (orderedout, roots, heads)

1311

1315

1312

def headrevs(self):

1316

def headrevs(self):

1313

try:

1317

try:

1314

return self.index.headrevs()

1318

return self.index.headrevs()

1315

except AttributeError:

1319

except AttributeError:

1316

return self._headrevs()

1320

return self._headrevs()

1317

1321

1318

def computephases(self, roots):

1322

def computephases(self, roots):

1319

return self.index.computephasesmapsets(roots)

1323

return self.index.computephasesmapsets(roots)

1320

1324

1321

def _headrevs(self):

1325

def _headrevs(self):

1322

count = len(self)

1326

count = len(self)

1323

if not count:

1327

if not count:

1324

return [nullrev]

1328

return [nullrev]

1325

# we won't iter over filtered rev so nobody is a head at start

1329

# we won't iter over filtered rev so nobody is a head at start

1326

ishead = [0] * (count + 1)

1330

ishead = [0] * (count + 1)

1327

index = self.index

1331

index = self.index

1328

for r in self:

1332

for r in self:

1329

ishead[r] = 1 # I may be an head

1333

ishead[r] = 1 # I may be an head

1330

e = index[r]

1334

e = index[r]

1331

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1335

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1332

return [r for r, val in enumerate(ishead) if val]

1336

return [r for r, val in enumerate(ishead) if val]

1333

1337

1334

def heads(self, start=None, stop=None):

1338

def heads(self, start=None, stop=None):

1335

"""return the list of all nodes that have no children

1339

"""return the list of all nodes that have no children

1336

1340

1337

if start is specified, only heads that are descendants of

1341

if start is specified, only heads that are descendants of

1338

start will be returned

1342

start will be returned

1339

if stop is specified, it will consider all the revs from stop

1343

if stop is specified, it will consider all the revs from stop

1340

as if they had no children

1344

as if they had no children

1341

"""

1345

"""

1342

if start is None and stop is None:

1346

if start is None and stop is None:

1343

if not len(self):

1347

if not len(self):

1344

return [nullid]

1348

return [nullid]

1345

return [self.node(r) for r in self.headrevs()]

1349

return [self.node(r) for r in self.headrevs()]

1346

1350

1347

if start is None:

1351

if start is None:

1348

start = nullid

1352

start = nullid

1349

if stop is None:

1353

if stop is None:

1350

stop = []

1354

stop = []

1351

stoprevs = set([self.rev(n) for n in stop])

1355

stoprevs = set([self.rev(n) for n in stop])

1352

startrev = self.rev(start)

1356

startrev = self.rev(start)

1353

reachable = {startrev}

1357

reachable = {startrev}

1354

heads = {startrev}

1358

heads = {startrev}

1355

1359

1356

parentrevs = self.parentrevs

1360

parentrevs = self.parentrevs

1357

for r in self.revs(start=startrev + 1):

1361

for r in self.revs(start=startrev + 1):

1358

for p in parentrevs(r):

1362

for p in parentrevs(r):

1359

if p in reachable:

1363

if p in reachable:

1360

if r not in stoprevs:

1364

if r not in stoprevs:

1361

reachable.add(r)

1365

reachable.add(r)

1362

heads.add(r)

1366

heads.add(r)

1363

if p in heads and p not in stoprevs:

1367

if p in heads and p not in stoprevs:

1364

heads.remove(p)

1368

heads.remove(p)

1365

1369

1366

return [self.node(r) for r in heads]

1370

return [self.node(r) for r in heads]

1367

1371

1368

def children(self, node):

1372

def children(self, node):

1369

"""find the children of a given node"""

1373

"""find the children of a given node"""

1370

c = []

1374

c = []

1371

p = self.rev(node)

1375

p = self.rev(node)

1372

for r in self.revs(start=p + 1):

1376

for r in self.revs(start=p + 1):

1373

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1377

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1374

if prevs:

1378

if prevs:

1375

for pr in prevs:

1379

for pr in prevs:

1376

if pr == p:

1380

if pr == p:

1377

c.append(self.node(r))

1381

c.append(self.node(r))

1378

elif p == nullrev:

1382

elif p == nullrev:

1379

c.append(self.node(r))

1383

c.append(self.node(r))

1380

return c

1384

return c

1381

1385

1382

def descendant(self, start, end):

1386

def descendant(self, start, end):

1383

"""True if revision 'end' is an descendant of revision 'start'

1387

"""True if revision 'end' is an descendant of revision 'start'

1384

1388

1385

A revision is considered as a descendant of itself."""

1389

A revision is considered as a descendant of itself."""

1386

if start == nullrev:

1390

if start == nullrev:

1387

return True

1391

return True

1388

elif start == end:

1392

elif start == end:

1389

return True

1393

return True

1390

return start in self._commonancestorsheads(start, end)

1394

return start in self._commonancestorsheads(start, end)

1391

1395

1392

def commonancestorsheads(self, a, b):

1396

def commonancestorsheads(self, a, b):

1393

"""calculate all the heads of the common ancestors of nodes a and b"""

1397

"""calculate all the heads of the common ancestors of nodes a and b"""

1394

a, b = self.rev(a), self.rev(b)

1398

a, b = self.rev(a), self.rev(b)

1395

ancs = self._commonancestorsheads(a, b)

1399

ancs = self._commonancestorsheads(a, b)

1396

return pycompat.maplist(self.node, ancs)

1400

return pycompat.maplist(self.node, ancs)

1397

1401

1398

def _commonancestorsheads(self, *revs):

1402

def _commonancestorsheads(self, *revs):

1399

"""calculate all the heads of the common ancestors of revs"""

1403

"""calculate all the heads of the common ancestors of revs"""

1400

try:

1404

try:

1401

ancs = self.index.commonancestorsheads(*revs)

1405

ancs = self.index.commonancestorsheads(*revs)

1402

except (AttributeError, OverflowError): # C implementation failed

1406

except (AttributeError, OverflowError): # C implementation failed

1403

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1407

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1404

return ancs

1408

return ancs

1405

1409

1406

def isancestor(self, a, b):

1410

def isancestor(self, a, b):

1407

"""return True if node a is an ancestor of node b

1411

"""return True if node a is an ancestor of node b

1408

1412

1409

The implementation of this is trivial but the use of

1413

The implementation of this is trivial but the use of

1410

commonancestorsheads is not."""

1414

commonancestorsheads is not."""

1411

a, b = self.rev(a), self.rev(b)

1415

a, b = self.rev(a), self.rev(b)

1412

return self.descendant(a, b)

1416

return self.descendant(a, b)

1413

1417

1414

def ancestor(self, a, b):

1418

def ancestor(self, a, b):

1415

"""calculate the "best" common ancestor of nodes a and b"""

1419

"""calculate the "best" common ancestor of nodes a and b"""

1416

1420

1417

a, b = self.rev(a), self.rev(b)

1421

a, b = self.rev(a), self.rev(b)

1418

try:

1422

try:

1419

ancs = self.index.ancestors(a, b)

1423

ancs = self.index.ancestors(a, b)

1420

except (AttributeError, OverflowError):

1424

except (AttributeError, OverflowError):

1421

ancs = ancestor.ancestors(self.parentrevs, a, b)

1425

ancs = ancestor.ancestors(self.parentrevs, a, b)

1422

if ancs:

1426

if ancs:

1423

# choose a consistent winner when there's a tie

1427

# choose a consistent winner when there's a tie

1424

return min(map(self.node, ancs))

1428

return min(map(self.node, ancs))

1425

return nullid

1429

return nullid

1426

1430

1427

def _match(self, id):

1431

def _match(self, id):

1428

if isinstance(id, int):

1432

if isinstance(id, int):

1429

# rev

1433

# rev

1430

return self.node(id)

1434

return self.node(id)

1431

if len(id) == 20:

1435

if len(id) == 20:

1432

# possibly a binary node

1436

# possibly a binary node

1433

# odds of a binary node being all hex in ASCII are 1 in 10**25

1437

# odds of a binary node being all hex in ASCII are 1 in 10**25

1434

try:

1438

try:

1435

node = id

1439

node = id

1436

self.rev(node) # quick search the index

1440

self.rev(node) # quick search the index

1437

return node

1441

return node

1438

except LookupError:

1442

except LookupError:

1439

pass # may be partial hex id

1443

pass # may be partial hex id

1440

try:

1444

try:

1441

# str(rev)

1445

# str(rev)

1442

rev = int(id)

1446

rev = int(id)

1443

if "%d" % rev != id:

1447

if "%d" % rev != id:

1444

raise ValueError

1448

raise ValueError

1445

if rev < 0:

1449

if rev < 0:

1446

rev = len(self) + rev

1450

rev = len(self) + rev

1447

if rev < 0 or rev >= len(self):

1451

if rev < 0 or rev >= len(self):

1448

raise ValueError

1452

raise ValueError

1449

return self.node(rev)

1453

return self.node(rev)

1450

except (ValueError, OverflowError):

1454

except (ValueError, OverflowError):

1451

pass

1455

pass

1452

if len(id) == 40:

1456

if len(id) == 40:

1453

try:

1457

try:

1454

# a full hex nodeid?

1458

# a full hex nodeid?

1455

node = bin(id)

1459

node = bin(id)

1456

self.rev(node)

1460

self.rev(node)

1457

return node

1461

return node

1458

except (TypeError, LookupError):

1462

except (TypeError, LookupError):

1459

pass

1463

pass

1460

1464

1461

def _partialmatch(self, id):

1465

def _partialmatch(self, id):

1462

# we don't care wdirfilenodeids as they should be always full hash

1466

# we don't care wdirfilenodeids as they should be always full hash

1463

maybewdir = wdirhex.startswith(id)

1467

maybewdir = wdirhex.startswith(id)

1464

try:

1468

try:

1465

partial = self.index.partialmatch(id)

1469

partial = self.index.partialmatch(id)

1466

if partial and self.hasnode(partial):

1470

if partial and self.hasnode(partial):

1467

if maybewdir:

1471

if maybewdir:

1468

# single 'ff...' match in radix tree, ambiguous with wdir

1472

# single 'ff...' match in radix tree, ambiguous with wdir

1469

raise RevlogError

1473

raise RevlogError

1470

return partial

1474

return partial

1471

if maybewdir:

1475

if maybewdir:

1472

# no 'ff...' match in radix tree, wdir identified

1476

# no 'ff...' match in radix tree, wdir identified

1473

raise error.WdirUnsupported

1477

raise error.WdirUnsupported

1474

return None

1478

return None

1475

except RevlogError:

1479

except RevlogError:

1476

# parsers.c radix tree lookup gave multiple matches

1480

# parsers.c radix tree lookup gave multiple matches

1477

# fast path: for unfiltered changelog, radix tree is accurate

1481

# fast path: for unfiltered changelog, radix tree is accurate

1478

if not getattr(self, 'filteredrevs', None):

1482

if not getattr(self, 'filteredrevs', None):

1479

raise LookupError(id, self.indexfile,

1483

raise LookupError(id, self.indexfile,

1480

_('ambiguous identifier'))

1484

_('ambiguous identifier'))

1481

# fall through to slow path that filters hidden revisions

1485

# fall through to slow path that filters hidden revisions

1482

except (AttributeError, ValueError):

1486

except (AttributeError, ValueError):

1483

# we are pure python, or key was too short to search radix tree

1487

# we are pure python, or key was too short to search radix tree

1484

pass

1488

pass

1485

1489

1486

if id in self._pcache:

1490

if id in self._pcache:

1487

return self._pcache[id]

1491

return self._pcache[id]

1488

1492

1489

if len(id) <= 40:

1493

if len(id) <= 40:

1490

try:

1494

try:

1491

# hex(node)[:...]

1495

# hex(node)[:...]

1492

l = len(id) // 2 # grab an even number of digits

1496

l = len(id) // 2 # grab an even number of digits

1493

prefix = bin(id[:l * 2])

1497

prefix = bin(id[:l * 2])

1494

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1498

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1495

nl = [n for n in nl if hex(n).startswith(id) and

1499

nl = [n for n in nl if hex(n).startswith(id) and

1496

self.hasnode(n)]

1500

self.hasnode(n)]

1497

if len(nl) > 0:

1501

if len(nl) > 0:

1498

if len(nl) == 1 and not maybewdir:

1502

if len(nl) == 1 and not maybewdir:

1499

self._pcache[id] = nl[0]

1503

self._pcache[id] = nl[0]

1500

return nl[0]

1504

return nl[0]

1501

raise LookupError(id, self.indexfile,

1505

raise LookupError(id, self.indexfile,

1502

_('ambiguous identifier'))

1506

_('ambiguous identifier'))

1503

if maybewdir:

1507

if maybewdir:

1504

raise error.WdirUnsupported

1508

raise error.WdirUnsupported

1505

return None

1509

return None

1506

except TypeError:

1510

except TypeError:

1507

pass

1511

pass

1508

1512

1509

def lookup(self, id):

1513

def lookup(self, id):

1510

"""locate a node based on:

1514

"""locate a node based on:

1511

- revision number or str(revision number)

1515

- revision number or str(revision number)

1512

- nodeid or subset of hex nodeid

1516

- nodeid or subset of hex nodeid

1513

"""

1517

"""

1514

n = self._match(id)

1518

n = self._match(id)

1515

if n is not None:

1519

if n is not None:

1516

return n

1520

return n

1517

n = self._partialmatch(id)

1521

n = self._partialmatch(id)

1518

if n:

1522

if n:

1519

return n

1523

return n

1520

1524

1521

raise LookupError(id, self.indexfile, _('no match found'))

1525

raise LookupError(id, self.indexfile, _('no match found'))

1522

1526

1523

def shortest(self, node, minlength=1):

1527

def shortest(self, node, minlength=1):

1524

"""Find the shortest unambiguous prefix that matches node."""

1528

"""Find the shortest unambiguous prefix that matches node."""

1525

def isvalid(prefix):

1529

def isvalid(prefix):

1526

try:

1530

try:

1527

node = self._partialmatch(prefix)

1531

node = self._partialmatch(prefix)

1528

except error.RevlogError:

1532

except error.RevlogError:

1529

return False

1533

return False

1530

except error.WdirUnsupported:

1534

except error.WdirUnsupported:

1531

# single 'ff...' match

1535

# single 'ff...' match

1532

return True

1536

return True

1533

if node is None:

1537

if node is None:

1534

raise LookupError(node, self.indexfile, _('no node'))

1538

raise LookupError(node, self.indexfile, _('no node'))

1535

return True

1539

return True

1536

1540

1537

def maybewdir(prefix):

1541

def maybewdir(prefix):

1538

return all(c == 'f' for c in prefix)

1542

return all(c == 'f' for c in prefix)

1539

1543

1540

hexnode = hex(node)

1544

hexnode = hex(node)

1541

1545

1542

def disambiguate(hexnode, minlength):

1546

def disambiguate(hexnode, minlength):

1543

"""Disambiguate against wdirid."""

1547

"""Disambiguate against wdirid."""

1544

for length in range(minlength, 41):

1548

for length in range(minlength, 41):

1545

prefix = hexnode[:length]

1549

prefix = hexnode[:length]

1546

if not maybewdir(prefix):

1550

if not maybewdir(prefix):

1547

return prefix

1551

return prefix

1548

1552

1549

if not getattr(self, 'filteredrevs', None):

1553

if not getattr(self, 'filteredrevs', None):

1550

try:

1554

try:

1551

length = max(self.index.shortest(node), minlength)

1555

length = max(self.index.shortest(node), minlength)

1552

return disambiguate(hexnode, length)

1556

return disambiguate(hexnode, length)

1553

except RevlogError:

1557

except RevlogError:

1554

if node != wdirid:

1558

if node != wdirid:

1555

raise LookupError(node, self.indexfile, _('no node'))

1559

raise LookupError(node, self.indexfile, _('no node'))

1556

except AttributeError:

1560

except AttributeError:

1557

# Fall through to pure code

1561

# Fall through to pure code

1558

pass

1562

pass

1559

1563

1560

if node == wdirid:

1564

if node == wdirid:

1561

for length in range(minlength, 41):

1565

for length in range(minlength, 41):

1562

prefix = hexnode[:length]

1566

prefix = hexnode[:length]

1563

if isvalid(prefix):

1567

if isvalid(prefix):

1564

return prefix

1568

return prefix

1565

1569

1566

for length in range(minlength, 41):

1570

for length in range(minlength, 41):

1567

prefix = hexnode[:length]

1571

prefix = hexnode[:length]

1568

if isvalid(prefix):

1572

if isvalid(prefix):

1569

return disambiguate(hexnode, length)

1573

return disambiguate(hexnode, length)

1570

1574

1571

def cmp(self, node, text):

1575

def cmp(self, node, text):

1572

"""compare text with a given file revision

1576

"""compare text with a given file revision

1573

1577

1574

returns True if text is different than what is stored.

1578

returns True if text is different than what is stored.

1575

"""

1579

"""

1576

p1, p2 = self.parents(node)

1580

p1, p2 = self.parents(node)

1577

return hash(text, p1, p2) != node

1581

return hash(text, p1, p2) != node

1578

1582

1579

def _cachesegment(self, offset, data):

1583

def _cachesegment(self, offset, data):

1580

"""Add a segment to the revlog cache.

1584

"""Add a segment to the revlog cache.

1581

1585

1582

Accepts an absolute offset and the data that is at that location.

1586

Accepts an absolute offset and the data that is at that location.

1583

"""

1587

"""

1584

o, d = self._chunkcache

1588

o, d = self._chunkcache

1585

# try to add to existing cache

1589

# try to add to existing cache

1586

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1590

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1587

self._chunkcache = o, d + data

1591

self._chunkcache = o, d + data

1588

else:

1592

else:

1589

self._chunkcache = offset, data

1593

self._chunkcache = offset, data

1590

1594

1591

def _readsegment(self, offset, length, df=None):

1595

def _readsegment(self, offset, length, df=None):

1592

"""Load a segment of raw data from the revlog.

1596

"""Load a segment of raw data from the revlog.

1593

1597

1594

Accepts an absolute offset, length to read, and an optional existing

1598

Accepts an absolute offset, length to read, and an optional existing

1595

file handle to read from.

1599

file handle to read from.

1596

1600

1597

If an existing file handle is passed, it will be seeked and the

1601

If an existing file handle is passed, it will be seeked and the

1598

original seek position will NOT be restored.

1602

original seek position will NOT be restored.

1599

1603

1600

Returns a str or buffer of raw byte data.

1604

Returns a str or buffer of raw byte data.

1601

"""

1605

"""

1602

# Cache data both forward and backward around the requested

1606

# Cache data both forward and backward around the requested

1603

# data, in a fixed size window. This helps speed up operations

1607

# data, in a fixed size window. This helps speed up operations

1604

# involving reading the revlog backwards.

1608

# involving reading the revlog backwards.

1605

cachesize = self._chunkcachesize

1609

cachesize = self._chunkcachesize

1606

realoffset = offset & ~(cachesize - 1)

1610

realoffset = offset & ~(cachesize - 1)

1607

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1611

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1608

- realoffset)

1612

- realoffset)

1609

with self._datareadfp(df) as df:

1613

with self._datareadfp(df) as df:

1610

df.seek(realoffset)

1614

df.seek(realoffset)

1611

d = df.read(reallength)

1615

d = df.read(reallength)

1612

self._cachesegment(realoffset, d)

1616

self._cachesegment(realoffset, d)

1613

if offset != realoffset or reallength != length:

1617

if offset != realoffset or reallength != length:

1614

return util.buffer(d, offset - realoffset, length)

1618

return util.buffer(d, offset - realoffset, length)

1615

return d

1619

return d

1616

1620

1617

def _getsegment(self, offset, length, df=None):

1621

def _getsegment(self, offset, length, df=None):

1618

"""Obtain a segment of raw data from the revlog.

1622

"""Obtain a segment of raw data from the revlog.

1619

1623

1620

Accepts an absolute offset, length of bytes to obtain, and an

1624

Accepts an absolute offset, length of bytes to obtain, and an

1621

optional file handle to the already-opened revlog. If the file

1625

optional file handle to the already-opened revlog. If the file

1622

handle is used, it's original seek position will not be preserved.

1626

handle is used, it's original seek position will not be preserved.

1623

1627

1624

Requests for data may be returned from a cache.

1628

Requests for data may be returned from a cache.

1625

1629

1626

Returns a str or a buffer instance of raw byte data.

1630

Returns a str or a buffer instance of raw byte data.

1627

"""

1631

"""

1628

o, d = self._chunkcache

1632

o, d = self._chunkcache

1629

l = len(d)

1633

l = len(d)

1630

1634

1631

# is it in the cache?

1635

# is it in the cache?

1632

cachestart = offset - o

1636

cachestart = offset - o

1633

cacheend = cachestart + length

1637

cacheend = cachestart + length

1634

if cachestart >= 0 and cacheend <= l:

1638

if cachestart >= 0 and cacheend <= l:

1635

if cachestart == 0 and cacheend == l:

1639

if cachestart == 0 and cacheend == l:

1636

return d # avoid a copy

1640

return d # avoid a copy

1637

return util.buffer(d, cachestart, cacheend - cachestart)

1641

return util.buffer(d, cachestart, cacheend - cachestart)

1638

1642

1639

return self._readsegment(offset, length, df=df)

1643

return self._readsegment(offset, length, df=df)

1640

1644

1641

def _getsegmentforrevs(self, startrev, endrev, df=None):

1645

def _getsegmentforrevs(self, startrev, endrev, df=None):

1642

"""Obtain a segment of raw data corresponding to a range of revisions.

1646

"""Obtain a segment of raw data corresponding to a range of revisions.

1643

1647

1644

Accepts the start and end revisions and an optional already-open

1648

Accepts the start and end revisions and an optional already-open

1645

file handle to be used for reading. If the file handle is read, its

1649

file handle to be used for reading. If the file handle is read, its

1646

seek position will not be preserved.

1650

seek position will not be preserved.

1647

1651

1648

Requests for data may be satisfied by a cache.

1652

Requests for data may be satisfied by a cache.

1649

1653

1650

Returns a 2-tuple of (offset, data) for the requested range of

1654

Returns a 2-tuple of (offset, data) for the requested range of

1651

revisions. Offset is the integer offset from the beginning of the

1655

revisions. Offset is the integer offset from the beginning of the

1652

revlog and data is a str or buffer of the raw byte data.

1656

revlog and data is a str or buffer of the raw byte data.

1653

1657

1654

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1658

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1655

to determine where each revision's data begins and ends.

1659

to determine where each revision's data begins and ends.

1656

"""

1660

"""

1657

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1661

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1658

# (functions are expensive).

1662

# (functions are expensive).

1659

index = self.index

1663

index = self.index

1660

istart = index[startrev]

1664

istart = index[startrev]

1661

start = int(istart[0] >> 16)

1665

start = int(istart[0] >> 16)

1662

if startrev == endrev:

1666

if startrev == endrev:

1663

end = start + istart[1]

1667

end = start + istart[1]

1664

else:

1668

else:

1665

iend = index[endrev]

1669

iend = index[endrev]

1666

end = int(iend[0] >> 16) + iend[1]

1670

end = int(iend[0] >> 16) + iend[1]

1667

1671

1668

if self._inline:

1672

if self._inline:

1669

start += (startrev + 1) * self._io.size

1673

start += (startrev + 1) * self._io.size

1670

end += (endrev + 1) * self._io.size

1674

end += (endrev + 1) * self._io.size

1671

length = end - start

1675

length = end - start

1672

1676

1673

return start, self._getsegment(start, length, df=df)

1677

return start, self._getsegment(start, length, df=df)

1674

1678

1675

def _chunk(self, rev, df=None):

1679

def _chunk(self, rev, df=None):

1676

"""Obtain a single decompressed chunk for a revision.

1680

"""Obtain a single decompressed chunk for a revision.

1677

1681

1678

Accepts an integer revision and an optional already-open file handle

1682

Accepts an integer revision and an optional already-open file handle

1679

to be used for reading. If used, the seek position of the file will not

1683

to be used for reading. If used, the seek position of the file will not

1680

be preserved.

1684

be preserved.

1681

1685

1682

Returns a str holding uncompressed data for the requested revision.

1686

Returns a str holding uncompressed data for the requested revision.

1683

"""

1687

"""

1684

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1688

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1685

1689

1686

def _chunks(self, revs, df=None):

1690

def _chunks(self, revs, df=None):

1687

"""Obtain decompressed chunks for the specified revisions.

1691

"""Obtain decompressed chunks for the specified revisions.

1688

1692

1689

Accepts an iterable of numeric revisions that are assumed to be in

1693

Accepts an iterable of numeric revisions that are assumed to be in

1690

ascending order. Also accepts an optional already-open file handle

1694

ascending order. Also accepts an optional already-open file handle

1691

to be used for reading. If used, the seek position of the file will

1695

to be used for reading. If used, the seek position of the file will

1692

not be preserved.

1696

not be preserved.

1693

1697

1694

This function is similar to calling ``self._chunk()`` multiple times,

1698

This function is similar to calling ``self._chunk()`` multiple times,

1695

but is faster.

1699

but is faster.

1696

1700

1697

Returns a list with decompressed data for each requested revision.

1701

Returns a list with decompressed data for each requested revision.

1698

"""

1702

"""

1699

if not revs:

1703

if not revs:

1700

return []

1704

return []

1701

start = self.start

1705

start = self.start

1702

length = self.length

1706

length = self.length

1703

inline = self._inline

1707

inline = self._inline

1704

iosize = self._io.size

1708

iosize = self._io.size

1705

buffer = util.buffer

1709

buffer = util.buffer

1706

1710

1707

l = []

1711

l = []

1708

ladd = l.append

1712

ladd = l.append

1709

1713

1710

if not self._withsparseread:

1714

if not self._withsparseread:

1711

slicedchunks = (revs,)

1715

slicedchunks = (revs,)

1712

else:

1716

else:

1713

slicedchunks = _slicechunk(self, revs)

1717

slicedchunks = _slicechunk(self, revs)

1714

1718

1715

for revschunk in slicedchunks:

1719

for revschunk in slicedchunks:

1716

firstrev = revschunk[0]

1720

firstrev = revschunk[0]

1717

# Skip trailing revisions with empty diff

1721

# Skip trailing revisions with empty diff

1718

for lastrev in revschunk[::-1]:

1722

for lastrev in revschunk[::-1]:

1719

if length(lastrev) != 0:

1723

if length(lastrev) != 0:

1720

break

1724

break

1721

1725

1722

try:

1726

try:

1723

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1727

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1724

except OverflowError:

1728

except OverflowError:

1725

# issue4215 - we can't cache a run of chunks greater than

1729

# issue4215 - we can't cache a run of chunks greater than

1726

# 2G on Windows

1730

# 2G on Windows

1727

return [self._chunk(rev, df=df) for rev in revschunk]

1731

return [self._chunk(rev, df=df) for rev in revschunk]

1728

1732

1729

decomp = self.decompress

1733

decomp = self.decompress

1730

for rev in revschunk:

1734

for rev in revschunk:

1731

chunkstart = start(rev)

1735

chunkstart = start(rev)

1732

if inline:

1736

if inline:

1733

chunkstart += (rev + 1) * iosize

1737

chunkstart += (rev + 1) * iosize

1734

chunklength = length(rev)

1738

chunklength = length(rev)

1735

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1739

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1736

1740

1737

return l

1741

return l

1738

1742

1739

def _chunkclear(self):

1743

def _chunkclear(self):

1740

"""Clear the raw chunk cache."""

1744

"""Clear the raw chunk cache."""

1741

self._chunkcache = (0, '')

1745

self._chunkcache = (0, '')

1742

1746

1743

def deltaparent(self, rev):

1747

def deltaparent(self, rev):

1744

"""return deltaparent of the given revision"""

1748

"""return deltaparent of the given revision"""

1745

base = self.index[rev][3]

1749

base = self.index[rev][3]

1746

if base == rev:

1750

if base == rev:

1747

return nullrev

1751

return nullrev

1748

elif self._generaldelta:

1752

elif self._generaldelta:

1749

return base

1753

return base

1750

else:

1754

else:

1751

return rev - 1

1755

return rev - 1

1752

1756

1753

def revdiff(self, rev1, rev2):

1757

def revdiff(self, rev1, rev2):

1754

"""return or calculate a delta between two revisions

1758

"""return or calculate a delta between two revisions

1755

1759

1756

The delta calculated is in binary form and is intended to be written to

1760

The delta calculated is in binary form and is intended to be written to

1757

revlog data directly. So this function needs raw revision data.

1761

revlog data directly. So this function needs raw revision data.

1758

"""

1762

"""

1759

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1763

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1760

return bytes(self._chunk(rev2))

1764

return bytes(self._chunk(rev2))

1761

1765

1762

return mdiff.textdiff(self.revision(rev1, raw=True),

1766

return mdiff.textdiff(self.revision(rev1, raw=True),

1763

self.revision(rev2, raw=True))

1767

self.revision(rev2, raw=True))

1764

1768

1765

def revision(self, nodeorrev, _df=None, raw=False):

1769

def revision(self, nodeorrev, _df=None, raw=False):

1766

"""return an uncompressed revision of a given node or revision

1770

"""return an uncompressed revision of a given node or revision

1767

number.

1771

number.

1768

1772

1769

_df - an existing file handle to read from. (internal-only)

1773

_df - an existing file handle to read from. (internal-only)

1770

raw - an optional argument specifying if the revision data is to be

1774

raw - an optional argument specifying if the revision data is to be

1771

treated as raw data when applying flag transforms. 'raw' should be set

1775

treated as raw data when applying flag transforms. 'raw' should be set

1772

to True when generating changegroups or in debug commands.

1776

to True when generating changegroups or in debug commands.

1773

"""

1777

"""

1774

if isinstance(nodeorrev, int):

1778

if isinstance(nodeorrev, int):

1775

rev = nodeorrev

1779

rev = nodeorrev

1776

node = self.node(rev)

1780

node = self.node(rev)

1777

else:

1781

else:

1778

node = nodeorrev

1782

node = nodeorrev

1779

rev = None

1783

rev = None

1780

1784

1781

cachedrev = None

1785

cachedrev = None

1782

flags = None

1786

flags = None

1783

rawtext = None

1787

rawtext = None

1784

if node == nullid:

1788

if node == nullid:

1785

return ""

1789

return ""

1786

if self._cache:

1790

if self._cache:

1787

if self._cache[0] == node:

1791

if self._cache[0] == node:

1788

# _cache only stores rawtext

1792

# _cache only stores rawtext

1789

if raw:

1793

if raw:

1790

return self._cache[2]

1794

return self._cache[2]

1791

# duplicated, but good for perf

1795

# duplicated, but good for perf

1792

if rev is None:

1796

if rev is None:

1793

rev = self.rev(node)

1797

rev = self.rev(node)

1794

if flags is None:

1798

if flags is None:

1795

flags = self.flags(rev)

1799

flags = self.flags(rev)

1796

# no extra flags set, no flag processor runs, text = rawtext

1800

# no extra flags set, no flag processor runs, text = rawtext

1797

if flags == REVIDX_DEFAULT_FLAGS:

1801

if flags == REVIDX_DEFAULT_FLAGS:

1798

return self._cache[2]

1802

return self._cache[2]

1799

# rawtext is reusable. need to run flag processor

1803

# rawtext is reusable. need to run flag processor

1800

rawtext = self._cache[2]

1804

rawtext = self._cache[2]

1801

1805

1802

cachedrev = self._cache[1]

1806

cachedrev = self._cache[1]

1803

1807

1804

# look up what we need to read

1808

# look up what we need to read

1805

if rawtext is None:

1809

if rawtext is None:

1806

if rev is None:

1810

if rev is None:

1807

rev = self.rev(node)

1811

rev = self.rev(node)

1808

1812

1809

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1813

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1810

if stopped:

1814

if stopped:

1811

rawtext = self._cache[2]

1815

rawtext = self._cache[2]

1812

1816

1813

# drop cache to save memory

1817

# drop cache to save memory

1814

self._cache = None

1818

self._cache = None

1815

1819

1816

bins = self._chunks(chain, df=_df)

1820

bins = self._chunks(chain, df=_df)

1817

if rawtext is None:

1821

if rawtext is None:

1818

rawtext = bytes(bins[0])

1822

rawtext = bytes(bins[0])

1819

bins = bins[1:]

1823

bins = bins[1:]

1820

1824

1821

rawtext = mdiff.patches(rawtext, bins)

1825

rawtext = mdiff.patches(rawtext, bins)

1822

self._cache = (node, rev, rawtext)

1826

self._cache = (node, rev, rawtext)

1823

1827

1824

if flags is None:

1828

if flags is None:

1825

if rev is None:

1829

if rev is None:

1826

rev = self.rev(node)

1830

rev = self.rev(node)

1827

flags = self.flags(rev)

1831

flags = self.flags(rev)

1828

1832

1829

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1833

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1830

if validatehash:

1834

if validatehash:

1831

self.checkhash(text, node, rev=rev)

1835

self.checkhash(text, node, rev=rev)

1832

1836

1833

return text

1837

return text

1834

1838

1835

def hash(self, text, p1, p2):

1839

def hash(self, text, p1, p2):

1836

"""Compute a node hash.

1840

"""Compute a node hash.

1837

1841

1838

Available as a function so that subclasses can replace the hash

1842

Available as a function so that subclasses can replace the hash

1839

as needed.

1843

as needed.

1840

"""

1844

"""

1841

return hash(text, p1, p2)

1845

return hash(text, p1, p2)

1842

1846

1843

def _processflags(self, text, flags, operation, raw=False):

1847

def _processflags(self, text, flags, operation, raw=False):

1844

"""Inspect revision data flags and applies transforms defined by

1848

"""Inspect revision data flags and applies transforms defined by

1845

registered flag processors.

1849

registered flag processors.

1846

1850

1847

``text`` - the revision data to process

1851

``text`` - the revision data to process

1848

``flags`` - the revision flags

1852

``flags`` - the revision flags

1849

``operation`` - the operation being performed (read or write)

1853

``operation`` - the operation being performed (read or write)

1850

``raw`` - an optional argument describing if the raw transform should be

1854

``raw`` - an optional argument describing if the raw transform should be

1851

applied.

1855

applied.

1852

1856

1853

This method processes the flags in the order (or reverse order if

1857

This method processes the flags in the order (or reverse order if

1854

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1858

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1855

flag processors registered for present flags. The order of flags defined

1859

flag processors registered for present flags. The order of flags defined

1856

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1860

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1857

1861

1858

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1862

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1859

processed text and ``validatehash`` is a bool indicating whether the

1863

processed text and ``validatehash`` is a bool indicating whether the

1860

returned text should be checked for hash integrity.

1864

returned text should be checked for hash integrity.

1861

1865

1862

Note: If the ``raw`` argument is set, it has precedence over the

1866

Note: If the ``raw`` argument is set, it has precedence over the

1863

operation and will only update the value of ``validatehash``.

1867

operation and will only update the value of ``validatehash``.

1864

"""

1868

"""

1865

# fast path: no flag processors will run

1869

# fast path: no flag processors will run

1866

if flags == 0:

1870

if flags == 0:

1867

return text, True

1871

return text, True

1868

if not operation in ('read', 'write'):

1872

if not operation in ('read', 'write'):

1869

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1873

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1870

# Check all flags are known.

1874

# Check all flags are known.

1871

if flags & ~REVIDX_KNOWN_FLAGS:

1875

if flags & ~REVIDX_KNOWN_FLAGS:

1872

raise RevlogError(_("incompatible revision flag '%#x'") %

1876

raise RevlogError(_("incompatible revision flag '%#x'") %

1873

(flags & ~REVIDX_KNOWN_FLAGS))

1877

(flags & ~REVIDX_KNOWN_FLAGS))

1874

validatehash = True

1878

validatehash = True

1875

# Depending on the operation (read or write), the order might be

1879

# Depending on the operation (read or write), the order might be

1876

# reversed due to non-commutative transforms.

1880

# reversed due to non-commutative transforms.

1877

orderedflags = REVIDX_FLAGS_ORDER

1881

orderedflags = REVIDX_FLAGS_ORDER

1878

if operation == 'write':

1882

if operation == 'write':

1879

orderedflags = reversed(orderedflags)

1883

orderedflags = reversed(orderedflags)

1880

1884

1881

for flag in orderedflags:

1885

for flag in orderedflags:

1882

# If a flagprocessor has been registered for a known flag, apply the

1886

# If a flagprocessor has been registered for a known flag, apply the

1883

# related operation transform and update result tuple.

1887

# related operation transform and update result tuple.

1884

if flag & flags:

1888

if flag & flags:

1885

vhash = True

1889

vhash = True

1886

1890

1887

if flag not in _flagprocessors:

1891

if flag not in _flagprocessors:

1888

message = _("missing processor for flag '%#x'") % (flag)

1892

message = _("missing processor for flag '%#x'") % (flag)

1889

raise RevlogError(message)

1893

raise RevlogError(message)

1890

1894

1891

processor = _flagprocessors[flag]

1895

processor = _flagprocessors[flag]

1892

if processor is not None:

1896

if processor is not None:

1893

readtransform, writetransform, rawtransform = processor

1897

readtransform, writetransform, rawtransform = processor

1894

1898

1895

if raw:

1899

if raw:

1896

vhash = rawtransform(self, text)

1900

vhash = rawtransform(self, text)

1897

elif operation == 'read':

1901

elif operation == 'read':

1898

text, vhash = readtransform(self, text)

1902

text, vhash = readtransform(self, text)

1899

else: # write operation

1903

else: # write operation

1900

text, vhash = writetransform(self, text)

1904

text, vhash = writetransform(self, text)

1901

validatehash = validatehash and vhash

1905

validatehash = validatehash and vhash

1902

1906

1903

return text, validatehash

1907

return text, validatehash

1904

1908

1905

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1909

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1906

"""Check node hash integrity.

1910

"""Check node hash integrity.

1907

1911

1908

Available as a function so that subclasses can extend hash mismatch

1912

Available as a function so that subclasses can extend hash mismatch

1909

behaviors as needed.

1913

behaviors as needed.

1910

"""

1914

"""

1911

try:

1915

try:

1912

if p1 is None and p2 is None:

1916

if p1 is None and p2 is None:

1913

p1, p2 = self.parents(node)

1917

p1, p2 = self.parents(node)

1914

if node != self.hash(text, p1, p2):

1918

if node != self.hash(text, p1, p2):

1915

revornode = rev

1919

revornode = rev

1916

if revornode is None:

1920

if revornode is None:

1917

revornode = templatefilters.short(hex(node))

1921

revornode = templatefilters.short(hex(node))

1918

raise RevlogError(_("integrity check failed on %s:%s")

1922

raise RevlogError(_("integrity check failed on %s:%s")

1919

% (self.indexfile, pycompat.bytestr(revornode)))

1923

% (self.indexfile, pycompat.bytestr(revornode)))

1920

except RevlogError:

1924

except RevlogError:

1921

if self._censorable and _censoredtext(text):

1925

if self._censorable and _censoredtext(text):

1922

raise error.CensoredNodeError(self.indexfile, node, text)

1926

raise error.CensoredNodeError(self.indexfile, node, text)

1923

raise

1927

raise

1924

1928

1925

def _enforceinlinesize(self, tr, fp=None):

1929

def _enforceinlinesize(self, tr, fp=None):

1926

"""Check if the revlog is too big for inline and convert if so.

1930

"""Check if the revlog is too big for inline and convert if so.

1927

1931

1928

This should be called after revisions are added to the revlog. If the

1932

This should be called after revisions are added to the revlog. If the

1929

revlog has grown too large to be an inline revlog, it will convert it

1933

revlog has grown too large to be an inline revlog, it will convert it

1930

to use multiple index and data files.

1934

to use multiple index and data files.

1931

"""

1935

"""

1932

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1936

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1933

return

1937

return

1934

1938

1935

trinfo = tr.find(self.indexfile)

1939

trinfo = tr.find(self.indexfile)

1936

if trinfo is None:

1940

if trinfo is None:

1937

raise RevlogError(_("%s not found in the transaction")

1941

raise RevlogError(_("%s not found in the transaction")

1938

% self.indexfile)

1942

% self.indexfile)

1939

1943

1940

trindex = trinfo[2]

1944

trindex = trinfo[2]

1941

if trindex is not None:

1945

if trindex is not None:

1942

dataoff = self.start(trindex)

1946

dataoff = self.start(trindex)

1943

else:

1947

else:

1944

# revlog was stripped at start of transaction, use all leftover data

1948

# revlog was stripped at start of transaction, use all leftover data

1945

trindex = len(self) - 1

1949

trindex = len(self) - 1

1946

dataoff = self.end(-2)

1950

dataoff = self.end(-2)

1947

1951

1948

tr.add(self.datafile, dataoff)

1952

tr.add(self.datafile, dataoff)

1949

1953

1950

if fp:

1954

if fp:

1951

fp.flush()

1955

fp.flush()

1952

fp.close()

1956

fp.close()

1953

1957

1954

with self._datafp('w') as df:

1958

with self._datafp('w') as df:

1955

for r in self:

1959

for r in self:

1956

df.write(self._getsegmentforrevs(r, r)[1])

1960

df.write(self._getsegmentforrevs(r, r)[1])

1957

1961

1958

with self._indexfp('w') as fp:

1962

with self._indexfp('w') as fp:

1959

self.version &= ~FLAG_INLINE_DATA

1963

self.version &= ~FLAG_INLINE_DATA

1960

self._inline = False

1964

self._inline = False

1961

io = self._io

1965

io = self._io

1962

for i in self:

1966

for i in self:

1963

e = io.packentry(self.index[i], self.node, self.version, i)

1967

e = io.packentry(self.index[i], self.node, self.version, i)

1964

fp.write(e)

1968

fp.write(e)

1965

1969

1966

# the temp file replace the real index when we exit the context

1970

# the temp file replace the real index when we exit the context

1967

# manager

1971

# manager

1968

1972

1969

tr.replace(self.indexfile, trindex * self._io.size)

1973

tr.replace(self.indexfile, trindex * self._io.size)

1970

self._chunkclear()

1974

self._chunkclear()

1971

1975

1972

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1976

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1973

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1977

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1974

"""add a revision to the log

1978

"""add a revision to the log

1975

1979

1976

text - the revision data to add

1980

text - the revision data to add

1977

transaction - the transaction object used for rollback

1981

transaction - the transaction object used for rollback

1978

link - the linkrev data to add

1982

link - the linkrev data to add

1979

p1, p2 - the parent nodeids of the revision

1983

p1, p2 - the parent nodeids of the revision

1980

cachedelta - an optional precomputed delta

1984

cachedelta - an optional precomputed delta

1981

node - nodeid of revision; typically node is not specified, and it is

1985

node - nodeid of revision; typically node is not specified, and it is

1982

computed by default as hash(text, p1, p2), however subclasses might

1986

computed by default as hash(text, p1, p2), however subclasses might

1983

use different hashing method (and override checkhash() in such case)

1987

use different hashing method (and override checkhash() in such case)

1984

flags - the known flags to set on the revision

1988

flags - the known flags to set on the revision

1985

deltacomputer - an optional _deltacomputer instance shared between

1989

deltacomputer - an optional _deltacomputer instance shared between

1986

multiple calls

1990

multiple calls

1987

"""

1991

"""

1988

if link == nullrev:

1992

if link == nullrev:

1989

raise RevlogError(_("attempted to add linkrev -1 to %s")

1993

raise RevlogError(_("attempted to add linkrev -1 to %s")

1990

% self.indexfile)

1994

% self.indexfile)

1991

1995

1992

if flags:

1996

if flags:

1993

node = node or self.hash(text, p1, p2)

1997

node = node or self.hash(text, p1, p2)

1994

1998

1995

rawtext, validatehash = self._processflags(text, flags, 'write')

1999

rawtext, validatehash = self._processflags(text, flags, 'write')

1996

2000

1997

# If the flag processor modifies the revision data, ignore any provided

2001

# If the flag processor modifies the revision data, ignore any provided

1998

# cachedelta.

2002

# cachedelta.

1999

if rawtext != text:

2003

if rawtext != text:

2000

cachedelta = None

2004

cachedelta = None

2001

2005

2002

if len(rawtext) > _maxentrysize:

2006

if len(rawtext) > _maxentrysize:

2003

raise RevlogError(

2007

raise RevlogError(

2004

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2008

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2005

% (self.indexfile, len(rawtext)))

2009

% (self.indexfile, len(rawtext)))

2006

2010

2007

node = node or self.hash(rawtext, p1, p2)

2011

node = node or self.hash(rawtext, p1, p2)

2008

if node in self.nodemap:

2012

if node in self.nodemap:

2009

return node

2013

return node

2010

2014

2011

if validatehash:

2015

if validatehash:

2012

self.checkhash(rawtext, node, p1=p1, p2=p2)

2016

self.checkhash(rawtext, node, p1=p1, p2=p2)

2013

2017

2014

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2018

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2015

flags, cachedelta=cachedelta,

2019

flags, cachedelta=cachedelta,

2016

deltacomputer=deltacomputer)

2020

deltacomputer=deltacomputer)

2017

2021

2018

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2022

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2019

cachedelta=None, deltacomputer=None):

2023

cachedelta=None, deltacomputer=None):

2020

"""add a raw revision with known flags, node and parents

2024

"""add a raw revision with known flags, node and parents

2021

useful when reusing a revision not stored in this revlog (ex: received

2025

useful when reusing a revision not stored in this revlog (ex: received

2022

over wire, or read from an external bundle).

2026

over wire, or read from an external bundle).

2023

"""

2027

"""

2024

dfh = None

2028

dfh = None

2025

if not self._inline:

2029

if not self._inline:

2026

dfh = self._datafp("a+")

2030

dfh = self._datafp("a+")

2027

ifh = self._indexfp("a+")

2031

ifh = self._indexfp("a+")

2028

try:

2032

try:

2029

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2033

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2030

flags, cachedelta, ifh, dfh,

2034

flags, cachedelta, ifh, dfh,

2031

deltacomputer=deltacomputer)

2035

deltacomputer=deltacomputer)

2032

finally:

2036

finally:

2033

if dfh:

2037

if dfh:

2034

dfh.close()

2038

dfh.close()

2035

ifh.close()

2039

ifh.close()

2036

2040

2037

def compress(self, data):

2041

def compress(self, data):

2038

"""Generate a possibly-compressed representation of data."""

2042

"""Generate a possibly-compressed representation of data."""

2039

if not data:

2043

if not data:

2040

return '', data

2044

return '', data

2041

2045

2042

compressed = self._compressor.compress(data)

2046

compressed = self._compressor.compress(data)

2043

2047

2044

if compressed:

2048

if compressed:

2045

# The revlog compressor added the header in the returned data.

2049

# The revlog compressor added the header in the returned data.

2046

return '', compressed

2050

return '', compressed

2047

2051

2048

if data[0:1] == '\0':

2052

if data[0:1] == '\0':

2049

return '', data

2053

return '', data

2050

return 'u', data

2054

return 'u', data

2051

2055

2052

def decompress(self, data):

2056

def decompress(self, data):

2053

"""Decompress a revlog chunk.

2057

"""Decompress a revlog chunk.

2054

2058

2055

The chunk is expected to begin with a header identifying the

2059

The chunk is expected to begin with a header identifying the

2056

format type so it can be routed to an appropriate decompressor.

2060

format type so it can be routed to an appropriate decompressor.

2057

"""

2061

"""

2058

if not data:

2062

if not data:

2059

return data

2063

return data

2060

2064

2061

# Revlogs are read much more frequently than they are written and many

2065

# Revlogs are read much more frequently than they are written and many

2062

# chunks only take microseconds to decompress, so performance is

2066

# chunks only take microseconds to decompress, so performance is

2063

# important here.

2067

# important here.

2064

#

2068

#

2065

# We can make a few assumptions about revlogs:

2069

# We can make a few assumptions about revlogs:

2066

#

2070

#

2067

# 1) the majority of chunks will be compressed (as opposed to inline

2071

# 1) the majority of chunks will be compressed (as opposed to inline

2068

# raw data).

2072

# raw data).

2069

# 2) decompressing *any* data will likely by at least 10x slower than

2073

# 2) decompressing *any* data will likely by at least 10x slower than

2070

# returning raw inline data.

2074

# returning raw inline data.

2071

# 3) we want to prioritize common and officially supported compression

2075

# 3) we want to prioritize common and officially supported compression

2072

# engines

2076

# engines

2073

#

2077

#

2074

# It follows that we want to optimize for "decompress compressed data

2078

# It follows that we want to optimize for "decompress compressed data

2075

# when encoded with common and officially supported compression engines"

2079

# when encoded with common and officially supported compression engines"

2076

# case over "raw data" and "data encoded by less common or non-official

2080

# case over "raw data" and "data encoded by less common or non-official

2077

# compression engines." That is why we have the inline lookup first

2081

# compression engines." That is why we have the inline lookup first

2078

# followed by the compengines lookup.

2082

# followed by the compengines lookup.

2079

#

2083

#

2080

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2084

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2081

# compressed chunks. And this matters for changelog and manifest reads.

2085

# compressed chunks. And this matters for changelog and manifest reads.

2082

t = data[0:1]

2086

t = data[0:1]

2083

2087

2084

if t == 'x':

2088

if t == 'x':

2085

try:

2089

try:

2086

return _zlibdecompress(data)

2090

return _zlibdecompress(data)

2087

except zlib.error as e:

2091

except zlib.error as e:

2088

raise RevlogError(_('revlog decompress error: %s') %

2092

raise RevlogError(_('revlog decompress error: %s') %

2089

stringutil.forcebytestr(e))

2093

stringutil.forcebytestr(e))

2090

# '\0' is more common than 'u' so it goes first.

2094

# '\0' is more common than 'u' so it goes first.

2091

elif t == '\0':

2095

elif t == '\0':

2092

return data

2096

return data

2093

elif t == 'u':

2097

elif t == 'u':

2094

return util.buffer(data, 1)

2098

return util.buffer(data, 1)

2095

2099

2096

try:

2100

try:

2097

compressor = self._decompressors[t]

2101

compressor = self._decompressors[t]

2098

except KeyError:

2102

except KeyError:

2099

try:

2103

try:

2100

engine = util.compengines.forrevlogheader(t)

2104

engine = util.compengines.forrevlogheader(t)

2101

compressor = engine.revlogcompressor()

2105

compressor = engine.revlogcompressor()

2102

self._decompressors[t] = compressor

2106

self._decompressors[t] = compressor

2103

except KeyError:

2107

except KeyError:

2104

raise RevlogError(_('unknown compression type %r') % t)

2108

raise RevlogError(_('unknown compression type %r') % t)

2105

2109

2106

return compressor.decompress(data)

2110

return compressor.decompress(data)

2107

2111

2108

def _isgooddeltainfo(self, deltainfo, revinfo):

2112

def _isgooddeltainfo(self, deltainfo, revinfo):

2109

"""Returns True if the given delta is good. Good means that it is within

2113

"""Returns True if the given delta is good. Good means that it is within

2110

the disk span, disk size, and chain length bounds that we know to be

2114

the disk span, disk size, and chain length bounds that we know to be

2111

performant."""

2115

performant."""

2112

if deltainfo is None:

2116

if deltainfo is None:

2113

return False

2117

return False

2114

2118

2115

# - 'deltainfo.distance' is the distance from the base revision --

2119

# - 'deltainfo.distance' is the distance from the base revision --

2116

# bounding it limits the amount of I/O we need to do.

2120

# bounding it limits the amount of I/O we need to do.

2117

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2121

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2118

# deltas we need to apply -- bounding it limits the amount of CPU

2122

# deltas we need to apply -- bounding it limits the amount of CPU

2119

# we consume.

2123

# we consume.

2120

2124

2121

textlen = revinfo.textlen

2125

textlen = revinfo.textlen

2122

defaultmax = textlen * 4

2126

defaultmax = textlen * 4

2123

maxdist = self._maxdeltachainspan

2127

maxdist = self._maxdeltachainspan

2124

if not maxdist:

2128

if not maxdist:

2125

maxdist = deltainfo.distance # ensure the conditional pass

2129

maxdist = deltainfo.distance # ensure the conditional pass

2126

maxdist = max(maxdist, defaultmax)

2130

maxdist = max(maxdist, defaultmax)

2127

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2131

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2128

deltainfo.compresseddeltalen > textlen * 2 or

2132

deltainfo.compresseddeltalen > textlen * 2 or

2129

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2133

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2130

return False

2134

return False

2131

2135

2132

return True

2136

return True

2133

2137

2134

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2138

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2135

cachedelta, ifh, dfh, alwayscache=False,

2139

cachedelta, ifh, dfh, alwayscache=False,

2136

deltacomputer=None):

2140

deltacomputer=None):

2137

"""internal function to add revisions to the log

2141

"""internal function to add revisions to the log

2138

2142

2139

see addrevision for argument descriptions.

2143

see addrevision for argument descriptions.

2140

2144

2141

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2145

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2142

2146

2143

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2147

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2144

be used.

2148

be used.

2145

2149

2146

invariants:

2150

invariants:

2147

- rawtext is optional (can be None); if not set, cachedelta must be set.

2151

- rawtext is optional (can be None); if not set, cachedelta must be set.

2148

if both are set, they must correspond to each other.

2152

if both are set, they must correspond to each other.

2149

"""

2153

"""

2150

if node == nullid:

2154

if node == nullid:

2151

raise RevlogError(_("%s: attempt to add null revision") %

2155

raise RevlogError(_("%s: attempt to add null revision") %

2152

(self.indexfile))

2156

(self.indexfile))

2153

if node == wdirid or node in wdirfilenodeids:

2157

if node == wdirid or node in wdirfilenodeids:

2154

raise RevlogError(_("%s: attempt to add wdir revision") %

2158

raise RevlogError(_("%s: attempt to add wdir revision") %

2155

(self.indexfile))

2159

(self.indexfile))

2156

2160

2157

if self._inline:

2161

if self._inline:

2158

fh = ifh

2162

fh = ifh

2159

else:

2163

else:

2160

fh = dfh

2164

fh = dfh

2161

2165

2162

btext = [rawtext]

2166

btext = [rawtext]

2163

2167

2164

curr = len(self)

2168

curr = len(self)

2165

prev = curr - 1

2169

prev = curr - 1

2166

offset = self.end(prev)

2170

offset = self.end(prev)

2167

p1r, p2r = self.rev(p1), self.rev(p2)

2171

p1r, p2r = self.rev(p1), self.rev(p2)

2168

2172

2169

# full versions are inserted when the needed deltas

2173

# full versions are inserted when the needed deltas

2170

# become comparable to the uncompressed text

2174

# become comparable to the uncompressed text

2171

if rawtext is None:

2175

if rawtext is None:

2172

# need rawtext size, before changed by flag processors, which is

2176

# need rawtext size, before changed by flag processors, which is

2173

# the non-raw size. use revlog explicitly to avoid filelog's extra

2177

# the non-raw size. use revlog explicitly to avoid filelog's extra

2174

# logic that might remove metadata size.

2178

# logic that might remove metadata size.

2175

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2179

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2176

cachedelta[1])

2180

cachedelta[1])

2177

else:

2181

else:

2178

textlen = len(rawtext)

2182

textlen = len(rawtext)

2179

2183

2180

if deltacomputer is None:

2184

if deltacomputer is None:

2181

deltacomputer = _deltacomputer(self)

2185

deltacomputer = _deltacomputer(self)

2182

2186

2183

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2187

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2184

2188

2185

# no delta for flag processor revision (see "candelta" for why)

2189

# no delta for flag processor revision (see "candelta" for why)

2186

# not calling candelta since only one revision needs test, also to

2190

# not calling candelta since only one revision needs test, also to

2187

# avoid overhead fetching flags again.

2191

# avoid overhead fetching flags again.

2188

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2192

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2189

deltainfo = None

2193

deltainfo = None

2190

else:

2194

else:

2191

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2195

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2192

2196

2193

if deltainfo is not None:

2197

if deltainfo is not None:

2194

base = deltainfo.base

2198

base = deltainfo.base

2195

chainbase = deltainfo.chainbase

2199

chainbase = deltainfo.chainbase

2196

data = deltainfo.data

2200

data = deltainfo.data

2197

l = deltainfo.deltalen

2201

l = deltainfo.deltalen

2198

else:

2202

else:

2199

rawtext = deltacomputer.buildtext(revinfo, fh)

2203

rawtext = deltacomputer.buildtext(revinfo, fh)

2200

data = self.compress(rawtext)

2204

data = self.compress(rawtext)

2201

l = len(data[1]) + len(data[0])

2205

l = len(data[1]) + len(data[0])

2202

base = chainbase = curr

2206

base = chainbase = curr

2203

2207

2204

e = (offset_type(offset, flags), l, textlen,

2208

e = (offset_type(offset, flags), l, textlen,

2205

base, link, p1r, p2r, node)

2209

base, link, p1r, p2r, node)

2206

self.index.insert(-1, e)

2210

self.index.insert(-1, e)

2207

self.nodemap[node] = curr

2211

self.nodemap[node] = curr

2208

2212

2209

entry = self._io.packentry(e, self.node, self.version, curr)

2213

entry = self._io.packentry(e, self.node, self.version, curr)

2210

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2214

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2211

2215

2212

if alwayscache and rawtext is None:

2216

if alwayscache and rawtext is None:

2213

rawtext = deltacomputer._buildtext(revinfo, fh)

2217

rawtext = deltacomputer._buildtext(revinfo, fh)

2214

2218

2215

if type(rawtext) == bytes: # only accept immutable objects

2219

if type(rawtext) == bytes: # only accept immutable objects

2216

self._cache = (node, curr, rawtext)

2220

self._cache = (node, curr, rawtext)

2217

self._chainbasecache[curr] = chainbase

2221

self._chainbasecache[curr] = chainbase

2218

return node

2222

return node

2219

2223

2220

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2224

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2221

# Files opened in a+ mode have inconsistent behavior on various

2225

# Files opened in a+ mode have inconsistent behavior on various

2222

# platforms. Windows requires that a file positioning call be made

2226

# platforms. Windows requires that a file positioning call be made

2223

# when the file handle transitions between reads and writes. See

2227

# when the file handle transitions between reads and writes. See

2224

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2228

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2225

# platforms, Python or the platform itself can be buggy. Some versions

2229

# platforms, Python or the platform itself can be buggy. Some versions

2226

# of Solaris have been observed to not append at the end of the file

2230

# of Solaris have been observed to not append at the end of the file

2227

# if the file was seeked to before the end. See issue4943 for more.

2231

# if the file was seeked to before the end. See issue4943 for more.

2228

#

2232

#

2229

# We work around this issue by inserting a seek() before writing.

2233

# We work around this issue by inserting a seek() before writing.

2230

# Note: This is likely not necessary on Python 3.

2234

# Note: This is likely not necessary on Python 3.

2231

ifh.seek(0, os.SEEK_END)

2235

ifh.seek(0, os.SEEK_END)

2232

if dfh:

2236

if dfh:

2233

dfh.seek(0, os.SEEK_END)

2237

dfh.seek(0, os.SEEK_END)

2234

2238

2235

curr = len(self) - 1

2239

curr = len(self) - 1

2236

if not self._inline:

2240

if not self._inline:

2237

transaction.add(self.datafile, offset)

2241

transaction.add(self.datafile, offset)

2238

transaction.add(self.indexfile, curr * len(entry))

2242

transaction.add(self.indexfile, curr * len(entry))

2239

if data[0]:

2243

if data[0]:

2240

dfh.write(data[0])

2244

dfh.write(data[0])

2241

dfh.write(data[1])

2245

dfh.write(data[1])

2242

ifh.write(entry)

2246

ifh.write(entry)

2243

else:

2247

else:

2244

offset += curr * self._io.size

2248

offset += curr * self._io.size

2245

transaction.add(self.indexfile, offset, curr)

2249

transaction.add(self.indexfile, offset, curr)

2246

ifh.write(entry)

2250

ifh.write(entry)

2247

ifh.write(data[0])

2251

ifh.write(data[0])

2248

ifh.write(data[1])

2252

ifh.write(data[1])

2249

self._enforceinlinesize(transaction, ifh)

2253

self._enforceinlinesize(transaction, ifh)

2250

2254

2251

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2255

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2252

"""

2256

"""

2253

add a delta group

2257

add a delta group

2254

2258

2255

given a set of deltas, add them to the revision log. the

2259

given a set of deltas, add them to the revision log. the

2256

first delta is against its parent, which should be in our

2260

first delta is against its parent, which should be in our

2257

log, the rest are against the previous delta.

2261

log, the rest are against the previous delta.

2258

2262

2259

If ``addrevisioncb`` is defined, it will be called with arguments of

2263

If ``addrevisioncb`` is defined, it will be called with arguments of

2260

this revlog and the node that was added.

2264

this revlog and the node that was added.

2261

"""

2265

"""

2262

2266

2263

nodes = []

2267

nodes = []

2264

2268

2265

r = len(self)

2269

r = len(self)

2266

end = 0

2270

end = 0

2267

if r:

2271

if r:

2268

end = self.end(r - 1)

2272

end = self.end(r - 1)

2269

ifh = self._indexfp("a+")

2273

ifh = self._indexfp("a+")

2270

isize = r * self._io.size

2274

isize = r * self._io.size

2271

if self._inline:

2275

if self._inline:

2272

transaction.add(self.indexfile, end + isize, r)

2276

transaction.add(self.indexfile, end + isize, r)

2273

dfh = None

2277

dfh = None

2274

else:

2278

else:

2275

transaction.add(self.indexfile, isize, r)

2279

transaction.add(self.indexfile, isize, r)

2276

transaction.add(self.datafile, end)

2280

transaction.add(self.datafile, end)

2277

dfh = self._datafp("a+")

2281

dfh = self._datafp("a+")

2278

def flush():

2282

def flush():

2279

if dfh:

2283

if dfh:

2280

dfh.flush()

2284

dfh.flush()

2281

ifh.flush()

2285

ifh.flush()

2282

try:

2286

try:

2283

deltacomputer = _deltacomputer(self)

2287

deltacomputer = _deltacomputer(self)

2284

# loop through our set of deltas

2288

# loop through our set of deltas

2285

for data in deltas:

2289

for data in deltas:

2286

node, p1, p2, linknode, deltabase, delta, flags = data

2290

node, p1, p2, linknode, deltabase, delta, flags = data

2287

link = linkmapper(linknode)

2291

link = linkmapper(linknode)

2288

flags = flags or REVIDX_DEFAULT_FLAGS

2292

flags = flags or REVIDX_DEFAULT_FLAGS

2289

2293

2290

nodes.append(node)

2294

nodes.append(node)

2291

2295

2292

if node in self.nodemap:

2296

if node in self.nodemap:

2293

# this can happen if two branches make the same change

2297

# this can happen if two branches make the same change

2294

continue

2298

continue

2295

2299

2296

for p in (p1, p2):

2300

for p in (p1, p2):

2297

if p not in self.nodemap:

2301

if p not in self.nodemap:

2298

raise LookupError(p, self.indexfile,

2302

raise LookupError(p, self.indexfile,

2299

_('unknown parent'))

2303

_('unknown parent'))

2300

2304

2301

if deltabase not in self.nodemap:

2305

if deltabase not in self.nodemap:

2302

raise LookupError(deltabase, self.indexfile,

2306

raise LookupError(deltabase, self.indexfile,

2303

_('unknown delta base'))

2307

_('unknown delta base'))

2304

2308

2305

baserev = self.rev(deltabase)

2309

baserev = self.rev(deltabase)

2306

2310

2307

if baserev != nullrev and self.iscensored(baserev):

2311

if baserev != nullrev and self.iscensored(baserev):

2308

# if base is censored, delta must be full replacement in a

2312

# if base is censored, delta must be full replacement in a

2309

# single patch operation

2313

# single patch operation

2310

hlen = struct.calcsize(">lll")

2314

hlen = struct.calcsize(">lll")

2311

oldlen = self.rawsize(baserev)

2315

oldlen = self.rawsize(baserev)

2312

newlen = len(delta) - hlen

2316

newlen = len(delta) - hlen

2313

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2317

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2314

raise error.CensoredBaseError(self.indexfile,

2318

raise error.CensoredBaseError(self.indexfile,

2315

self.node(baserev))

2319

self.node(baserev))

2316

2320

2317

if not flags and self._peek_iscensored(baserev, delta, flush):

2321

if not flags and self._peek_iscensored(baserev, delta, flush):

2318

flags |= REVIDX_ISCENSORED

2322

flags |= REVIDX_ISCENSORED

2319

2323

2320

# We assume consumers of addrevisioncb will want to retrieve

2324

# We assume consumers of addrevisioncb will want to retrieve

2321

# the added revision, which will require a call to

2325

# the added revision, which will require a call to

2322

# revision(). revision() will fast path if there is a cache

2326

# revision(). revision() will fast path if there is a cache

2323

# hit. So, we tell _addrevision() to always cache in this case.

2327

# hit. So, we tell _addrevision() to always cache in this case.

2324

# We're only using addgroup() in the context of changegroup

2328

# We're only using addgroup() in the context of changegroup

2325

# generation so the revision data can always be handled as raw

2329

# generation so the revision data can always be handled as raw

2326

# by the flagprocessor.

2330

# by the flagprocessor.

2327

self._addrevision(node, None, transaction, link,

2331

self._addrevision(node, None, transaction, link,

2328

p1, p2, flags, (baserev, delta),

2332

p1, p2, flags, (baserev, delta),

2329

ifh, dfh,

2333

ifh, dfh,

2330

alwayscache=bool(addrevisioncb),

2334

alwayscache=bool(addrevisioncb),

2331

deltacomputer=deltacomputer)

2335

deltacomputer=deltacomputer)

2332

2336

2333

if addrevisioncb:

2337

if addrevisioncb:

2334

addrevisioncb(self, node)

2338

addrevisioncb(self, node)

2335

2339

2336

if not dfh and not self._inline:

2340

if not dfh and not self._inline:

2337

# addrevision switched from inline to conventional

2341

# addrevision switched from inline to conventional

2338

# reopen the index

2342

# reopen the index

2339

ifh.close()

2343

ifh.close()

2340

dfh = self._datafp("a+")

2344

dfh = self._datafp("a+")

2341

ifh = self._indexfp("a+")

2345

ifh = self._indexfp("a+")

2342

finally:

2346

finally:

2343

if dfh:

2347

if dfh:

2344

dfh.close()

2348

dfh.close()

2345

ifh.close()

2349

ifh.close()

2346

2350

2347

return nodes

2351

return nodes

2348

2352

2349

def iscensored(self, rev):

2353

def iscensored(self, rev):

2350

"""Check if a file revision is censored."""

2354

"""Check if a file revision is censored."""

2351

if not self._censorable:

2355

if not self._censorable:

2352

return False

2356

return False

2353

2357

2354

return self.flags(rev) & REVIDX_ISCENSORED

2358

return self.flags(rev) & REVIDX_ISCENSORED

2355

2359

2356

def _peek_iscensored(self, baserev, delta, flush):

2360

def _peek_iscensored(self, baserev, delta, flush):

2357

"""Quickly check if a delta produces a censored revision."""

2361

"""Quickly check if a delta produces a censored revision."""

2358

if not self._censorable:

2362

if not self._censorable:

2359

return False

2363

return False

2360

2364

2361

# Fragile heuristic: unless new file meta keys are added alphabetically

2365

# Fragile heuristic: unless new file meta keys are added alphabetically

2362

# preceding "censored", all censored revisions are prefixed by

2366

# preceding "censored", all censored revisions are prefixed by

2363

# "\1\ncensored:". A delta producing such a censored revision must be a

2367

# "\1\ncensored:". A delta producing such a censored revision must be a

2364

# full-replacement delta, so we inspect the first and only patch in the

2368

# full-replacement delta, so we inspect the first and only patch in the

2365

# delta for this prefix.

2369

# delta for this prefix.

2366

hlen = struct.calcsize(">lll")

2370

hlen = struct.calcsize(">lll")

2367

if len(delta) <= hlen:

2371

if len(delta) <= hlen:

2368

return False

2372

return False

2369

2373

2370

oldlen = self.rawsize(baserev)

2374

oldlen = self.rawsize(baserev)

2371

newlen = len(delta) - hlen

2375

newlen = len(delta) - hlen

2372

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2376

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2373

return False

2377

return False

2374

2378

2375

add = "\1\ncensored:"

2379

add = "\1\ncensored:"

2376

addlen = len(add)

2380

addlen = len(add)

2377

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2381

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2378

2382

2379

def getstrippoint(self, minlink):

2383

def getstrippoint(self, minlink):

2380

"""find the minimum rev that must be stripped to strip the linkrev

2384

"""find the minimum rev that must be stripped to strip the linkrev

2381

2385

2382

Returns a tuple containing the minimum rev and a set of all revs that

2386

Returns a tuple containing the minimum rev and a set of all revs that

2383

have linkrevs that will be broken by this strip.

2387

have linkrevs that will be broken by this strip.

2384

"""

2388

"""

2385

brokenrevs = set()

2389

brokenrevs = set()

2386

strippoint = len(self)

2390

strippoint = len(self)

2387

2391

2388

heads = {}

2392

heads = {}

2389

futurelargelinkrevs = set()

2393

futurelargelinkrevs = set()

2390

for head in self.headrevs():

2394

for head in self.headrevs():

2391

headlinkrev = self.linkrev(head)

2395

headlinkrev = self.linkrev(head)

2392

heads[head] = headlinkrev

2396

heads[head] = headlinkrev

2393

if headlinkrev >= minlink:

2397

if headlinkrev >= minlink:

2394

futurelargelinkrevs.add(headlinkrev)

2398

futurelargelinkrevs.add(headlinkrev)

2395

2399

2396

# This algorithm involves walking down the rev graph, starting at the

2400

# This algorithm involves walking down the rev graph, starting at the

2397

# heads. Since the revs are topologically sorted according to linkrev,

2401

# heads. Since the revs are topologically sorted according to linkrev,

2398

# once all head linkrevs are below the minlink, we know there are

2402

# once all head linkrevs are below the minlink, we know there are

2399

# no more revs that could have a linkrev greater than minlink.

2403

# no more revs that could have a linkrev greater than minlink.

2400

# So we can stop walking.

2404

# So we can stop walking.

2401

while futurelargelinkrevs:

2405

while futurelargelinkrevs:

2402

strippoint -= 1

2406

strippoint -= 1

2403

linkrev = heads.pop(strippoint)

2407

linkrev = heads.pop(strippoint)

2404

2408

2405

if linkrev < minlink:

2409

if linkrev < minlink:

2406

brokenrevs.add(strippoint)

2410

brokenrevs.add(strippoint)

2407

else:

2411

else:

2408

futurelargelinkrevs.remove(linkrev)

2412

futurelargelinkrevs.remove(linkrev)

2409

2413

2410

for p in self.parentrevs(strippoint):

2414

for p in self.parentrevs(strippoint):

2411

if p != nullrev:

2415

if p != nullrev:

2412

plinkrev = self.linkrev(p)

2416

plinkrev = self.linkrev(p)

2413

heads[p] = plinkrev

2417

heads[p] = plinkrev

2414

if plinkrev >= minlink:

2418

if plinkrev >= minlink:

2415

futurelargelinkrevs.add(plinkrev)

2419

futurelargelinkrevs.add(plinkrev)

2416

2420

2417

return strippoint, brokenrevs

2421

return strippoint, brokenrevs

2418

2422

2419

def strip(self, minlink, transaction):

2423

def strip(self, minlink, transaction):

2420

"""truncate the revlog on the first revision with a linkrev >= minlink

2424

"""truncate the revlog on the first revision with a linkrev >= minlink

2421

2425

2422

This function is called when we're stripping revision minlink and

2426

This function is called when we're stripping revision minlink and

2423

its descendants from the repository.

2427

its descendants from the repository.

2424

2428

2425

We have to remove all revisions with linkrev >= minlink, because

2429

We have to remove all revisions with linkrev >= minlink, because

2426

the equivalent changelog revisions will be renumbered after the

2430

the equivalent changelog revisions will be renumbered after the

2427

strip.

2431

strip.

2428

2432

2429

So we truncate the revlog on the first of these revisions, and

2433

So we truncate the revlog on the first of these revisions, and

2430

trust that the caller has saved the revisions that shouldn't be

2434

trust that the caller has saved the revisions that shouldn't be

2431

removed and that it'll re-add them after this truncation.

2435

removed and that it'll re-add them after this truncation.

2432

"""

2436

"""

2433

if len(self) == 0:

2437

if len(self) == 0:

2434

return

2438

return

2435

2439

2436

rev, _ = self.getstrippoint(minlink)

2440

rev, _ = self.getstrippoint(minlink)

2437

if rev == len(self):

2441

if rev == len(self):

2438

return

2442

return

2439

2443

2440

# first truncate the files on disk

2444

# first truncate the files on disk

2441

end = self.start(rev)

2445

end = self.start(rev)

2442

if not self._inline:

2446

if not self._inline:

2443

transaction.add(self.datafile, end)

2447

transaction.add(self.datafile, end)

2444

end = rev * self._io.size

2448

end = rev * self._io.size

2445

else:

2449

else:

2446

end += rev * self._io.size

2450

end += rev * self._io.size

2447

2451

2448

transaction.add(self.indexfile, end)

2452

transaction.add(self.indexfile, end)

2449

2453

2450

# then reset internal state in memory to forget those revisions

2454

# then reset internal state in memory to forget those revisions

2451

self._cache = None

2455

self._cache = None

2452

self._chaininfocache = {}

2456

self._chaininfocache = {}

2453

self._chunkclear()

2457

self._chunkclear()

2454

for x in xrange(rev, len(self)):

2458

for x in xrange(rev, len(self)):

2455

del self.nodemap[self.node(x)]

2459

del self.nodemap[self.node(x)]

2456

2460

2457

del self.index[rev:-1]

2461

del self.index[rev:-1]

2458

self._nodepos = None

2462

self._nodepos = None

2459

2463

2460

def checksize(self):

2464

def checksize(self):

2461

expected = 0

2465

expected = 0

2462

if len(self):

2466

if len(self):

2463

expected = max(0, self.end(len(self) - 1))

2467

expected = max(0, self.end(len(self) - 1))

2464

2468

2465

try:

2469

try:

2466

with self._datafp() as f:

2470

with self._datafp() as f:

2467

f.seek(0, 2)

2471

f.seek(0, 2)

2468

actual = f.tell()

2472

actual = f.tell()

2469

dd = actual - expected

2473

dd = actual - expected

2470

except IOError as inst:

2474

except IOError as inst:

2471

if inst.errno != errno.ENOENT:

2475

if inst.errno != errno.ENOENT:

2472

raise

2476

raise

2473

dd = 0

2477

dd = 0

2474

2478

2475

try:

2479

try:

2476

f = self.opener(self.indexfile)

2480

f = self.opener(self.indexfile)

2477

f.seek(0, 2)

2481

f.seek(0, 2)

2478

actual = f.tell()

2482

actual = f.tell()

2479

f.close()

2483

f.close()

2480

s = self._io.size

2484

s = self._io.size

2481

i = max(0, actual // s)

2485

i = max(0, actual // s)

2482

di = actual - (i * s)

2486

di = actual - (i * s)

2483

if self._inline:

2487

if self._inline:

2484

databytes = 0

2488

databytes = 0

2485

for r in self:

2489

for r in self:

2486

databytes += max(0, self.length(r))

2490

databytes += max(0, self.length(r))

2487

dd = 0

2491

dd = 0

2488

di = actual - len(self) * s - databytes

2492

di = actual - len(self) * s - databytes

2489

except IOError as inst:

2493

except IOError as inst:

2490

if inst.errno != errno.ENOENT:

2494

if inst.errno != errno.ENOENT:

2491

raise

2495

raise

2492

di = 0

2496

di = 0

2493

2497

2494

return (dd, di)

2498

return (dd, di)

2495

2499

2496

def files(self):

2500

def files(self):

2497

res = [self.indexfile]

2501

res = [self.indexfile]

2498

if not self._inline:

2502

if not self._inline:

2499

res.append(self.datafile)

2503

res.append(self.datafile)

2500

return res

2504

return res

2501

2505

2502

DELTAREUSEALWAYS = 'always'

2506

DELTAREUSEALWAYS = 'always'

2503

DELTAREUSESAMEREVS = 'samerevs'

2507

DELTAREUSESAMEREVS = 'samerevs'

2504

DELTAREUSENEVER = 'never'

2508

DELTAREUSENEVER = 'never'

2505

2509

2506

DELTAREUSEFULLADD = 'fulladd'

2510

DELTAREUSEFULLADD = 'fulladd'

2507

2511

2508

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2512

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2509

2513

2510

def clone(self, tr, destrevlog, addrevisioncb=None,

2514

def clone(self, tr, destrevlog, addrevisioncb=None,

2511

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2515

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2512

"""Copy this revlog to another, possibly with format changes.

2516

"""Copy this revlog to another, possibly with format changes.

2513

2517

2514

The destination revlog will contain the same revisions and nodes.

2518

The destination revlog will contain the same revisions and nodes.

2515

However, it may not be bit-for-bit identical due to e.g. delta encoding

2519

However, it may not be bit-for-bit identical due to e.g. delta encoding

2516

differences.

2520

differences.

2517

2521

2518

The ``deltareuse`` argument control how deltas from the existing revlog

2522

The ``deltareuse`` argument control how deltas from the existing revlog

2519

are preserved in the destination revlog. The argument can have the

2523

are preserved in the destination revlog. The argument can have the

2520

following values:

2524

following values:

2521

2525

2522

DELTAREUSEALWAYS

2526

DELTAREUSEALWAYS

2523

Deltas will always be reused (if possible), even if the destination

2527

Deltas will always be reused (if possible), even if the destination

2524

revlog would not select the same revisions for the delta. This is the

2528

revlog would not select the same revisions for the delta. This is the

2525

fastest mode of operation.

2529

fastest mode of operation.

2526

DELTAREUSESAMEREVS

2530

DELTAREUSESAMEREVS

2527

Deltas will be reused if the destination revlog would pick the same

2531

Deltas will be reused if the destination revlog would pick the same

2528

revisions for the delta. This mode strikes a balance between speed

2532

revisions for the delta. This mode strikes a balance between speed

2529

and optimization.

2533

and optimization.

2530

DELTAREUSENEVER

2534

DELTAREUSENEVER

2531

Deltas will never be reused. This is the slowest mode of execution.

2535

Deltas will never be reused. This is the slowest mode of execution.

2532

This mode can be used to recompute deltas (e.g. if the diff/delta

2536

This mode can be used to recompute deltas (e.g. if the diff/delta

2533

algorithm changes).

2537

algorithm changes).

2534

2538

2535

Delta computation can be slow, so the choice of delta reuse policy can

2539

Delta computation can be slow, so the choice of delta reuse policy can

2536

significantly affect run time.

2540

significantly affect run time.

2537

2541

2538

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2542

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2539

two extremes. Deltas will be reused if they are appropriate. But if the

2543

two extremes. Deltas will be reused if they are appropriate. But if the

2540

delta could choose a better revision, it will do so. This means if you

2544

delta could choose a better revision, it will do so. This means if you

2541

are converting a non-generaldelta revlog to a generaldelta revlog,

2545

are converting a non-generaldelta revlog to a generaldelta revlog,

2542

deltas will be recomputed if the delta's parent isn't a parent of the

2546

deltas will be recomputed if the delta's parent isn't a parent of the

2543

revision.

2547

revision.

2544

2548

2545

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2549

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2546

controls whether to compute deltas against both parents for merges.

2550

controls whether to compute deltas against both parents for merges.

2547

By default, the current default is used.

2551

By default, the current default is used.

2548

"""

2552

"""

2549

if deltareuse not in self.DELTAREUSEALL:

2553

if deltareuse not in self.DELTAREUSEALL:

2550

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2554

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2551

2555

2552

if len(destrevlog):

2556

if len(destrevlog):

2553

raise ValueError(_('destination revlog is not empty'))

2557

raise ValueError(_('destination revlog is not empty'))

2554

2558

2555

if getattr(self, 'filteredrevs', None):

2559

if getattr(self, 'filteredrevs', None):

2556

raise ValueError(_('source revlog has filtered revisions'))

2560

raise ValueError(_('source revlog has filtered revisions'))

2557

if getattr(destrevlog, 'filteredrevs', None):

2561

if getattr(destrevlog, 'filteredrevs', None):

2558

raise ValueError(_('destination revlog has filtered revisions'))

2562

raise ValueError(_('destination revlog has filtered revisions'))

2559

2563

2560

# lazydeltabase controls whether to reuse a cached delta, if possible.

2564

# lazydeltabase controls whether to reuse a cached delta, if possible.

2561

oldlazydeltabase = destrevlog._lazydeltabase

2565

oldlazydeltabase = destrevlog._lazydeltabase

2562

oldamd = destrevlog._aggressivemergedeltas

2566

oldamd = destrevlog._aggressivemergedeltas

2563

2567

2564

try:

2568

try:

2565

if deltareuse == self.DELTAREUSEALWAYS:

2569

if deltareuse == self.DELTAREUSEALWAYS:

2566

destrevlog._lazydeltabase = True

2570

destrevlog._lazydeltabase = True

2567

elif deltareuse == self.DELTAREUSESAMEREVS:

2571

elif deltareuse == self.DELTAREUSESAMEREVS:

2568

destrevlog._lazydeltabase = False

2572

destrevlog._lazydeltabase = False

2569

2573

2570

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2574

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2571

2575

2572

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2576

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2573

self.DELTAREUSESAMEREVS)

2577

self.DELTAREUSESAMEREVS)

2574

2578

2575

deltacomputer = _deltacomputer(destrevlog)

2579

deltacomputer = _deltacomputer(destrevlog)

2576

index = self.index

2580

index = self.index

2577

for rev in self:

2581

for rev in self:

2578

entry = index[rev]

2582

entry = index[rev]

2579

2583

2580

# Some classes override linkrev to take filtered revs into

2584

# Some classes override linkrev to take filtered revs into

2581

# account. Use raw entry from index.

2585

# account. Use raw entry from index.

2582

flags = entry[0] & 0xffff

2586

flags = entry[0] & 0xffff

2583

linkrev = entry[4]

2587

linkrev = entry[4]

2584

p1 = index[entry[5]][7]

2588

p1 = index[entry[5]][7]

2585

p2 = index[entry[6]][7]

2589

p2 = index[entry[6]][7]

2586

node = entry[7]

2590

node = entry[7]

2587

2591

2588

# (Possibly) reuse the delta from the revlog if allowed and

2592

# (Possibly) reuse the delta from the revlog if allowed and

2589

# the revlog chunk is a delta.

2593

# the revlog chunk is a delta.

2590

cachedelta = None

2594

cachedelta = None

2591

rawtext = None

2595

rawtext = None

2592

if populatecachedelta:

2596

if populatecachedelta:

2593

dp = self.deltaparent(rev)

2597

dp = self.deltaparent(rev)

2594

if dp != nullrev:

2598

if dp != nullrev:

2595

cachedelta = (dp, bytes(self._chunk(rev)))

2599

cachedelta = (dp, bytes(self._chunk(rev)))

2596

2600

2597

if not cachedelta:

2601

if not cachedelta:

2598

rawtext = self.revision(rev, raw=True)

2602

rawtext = self.revision(rev, raw=True)

2599

2603

2600

2604

2601

if deltareuse == self.DELTAREUSEFULLADD:

2605

if deltareuse == self.DELTAREUSEFULLADD:

2602

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2606

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2603

cachedelta=cachedelta,

2607

cachedelta=cachedelta,

2604

node=node, flags=flags,

2608

node=node, flags=flags,

2605

deltacomputer=deltacomputer)

2609

deltacomputer=deltacomputer)

2606

else:

2610

else:

2607

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2611

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2608

checkambig=False)

2612

checkambig=False)

2609

dfh = None

2613

dfh = None

2610

if not destrevlog._inline:

2614

if not destrevlog._inline:

2611

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2615

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2612

try:

2616

try:

2613

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2617

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2614

p2, flags, cachedelta, ifh, dfh,

2618

p2, flags, cachedelta, ifh, dfh,

2615

deltacomputer=deltacomputer)

2619

deltacomputer=deltacomputer)

2616

finally:

2620

finally:

2617

if dfh:

2621

if dfh:

2618

dfh.close()

2622

dfh.close()

2619

ifh.close()

2623

ifh.close()

2620

2624

2621

if addrevisioncb:

2625

if addrevisioncb:

2622

addrevisioncb(self, rev, node)

2626

addrevisioncb(self, rev, node)

2623

finally:

2627

finally:

2624

destrevlog._lazydeltabase = oldlazydeltabase

2628

destrevlog._lazydeltabase = oldlazydeltabase

2625

destrevlog._aggressivemergedeltas = oldamd

2629

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # Trim empty revs at the end, but never the very first revision of a chain
                 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
                     endidx -= 1
                 return revs[startidx:endidx]
             def _slicechunk(revlog, revs):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 startbyte = start(revs[0])
                 endbyte = start(revs[-1]) + length(revs[-1])
                 readdata = deltachainspan = endbyte - startbyte
+                if deltachainspan < revlog._srmingapsize:
+                    yield revs
+                    return
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= revlog._srdensitythreshold:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > revlog._srmingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < revlog._srdensitythreshold:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._aggressivemergedeltas and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                      chainbase, chainlen, compresseddeltalen)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._withsparseread = bool(opts.get('with-sparse-read', False))
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         else:
                             assert p < len(i)
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     """True if revision 'end' is an descendant of revision 'start'
                     A revision is considered as a descendant of itself."""
                     if start == nullrev:
                         return True
                     elif start == end:
                         return True
                     return start in self._commonancestorsheads(start, end)
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     a, b = self.rev(a), self.rev(b)
                     return self.descendant(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = deltainfo.distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
                         deltainfo.compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd