upstream/mercurial-mirror Commit - r39186:f39efa88

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

95

CensoredNodeError = error.CensoredNodeError

95

CensoredNodeError = error.CensoredNodeError

96

ProgrammingError = error.ProgrammingError

96

ProgrammingError = error.ProgrammingError

97

98

# Store flag processors (cf. 'addflagprocessor()' to register)

98

# Store flag processors (cf. 'addflagprocessor()' to register)

99

_flagprocessors = {

99

_flagprocessors = {

100

REVIDX_ISCENSORED: None,

100

REVIDX_ISCENSORED: None,

101

}

101

}

102

103

_mdre = re.compile('\1\n')

103

_mdre = re.compile('\1\n')

104

def parsemeta(text):

104

def parsemeta(text):

105

"""return (metadatadict, metadatasize)"""

105

"""return (metadatadict, metadatasize)"""

106

# text can be buffer, so we can't use .startswith or .index

106

# text can be buffer, so we can't use .startswith or .index

107

if text[:2] != '\1\n':

107

if text[:2] != '\1\n':

108

return None, None

108

return None, None

109

s = _mdre.search(text, 2).start()

109

s = _mdre.search(text, 2).start()

110

mtext = text[2:s]

110

mtext = text[2:s]

111

meta = {}

111

meta = {}

112

for l in mtext.splitlines():

112

for l in mtext.splitlines():

113

k, v = l.split(": ", 1)

113

k, v = l.split(": ", 1)

114

meta[k] = v

114

meta[k] = v

115

return meta, (s + 2)

115

return meta, (s + 2)

116

117

def packmeta(meta, text):

117

def packmeta(meta, text):

118

keys = sorted(meta)

118

keys = sorted(meta)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

120

return "\1\n%s\1\n%s" % (metatext, text)

120

return "\1\n%s\1\n%s" % (metatext, text)

121

122

def _censoredtext(text):

122

def _censoredtext(text):

123

m, offs = parsemeta(text)

123

m, offs = parsemeta(text)

124

return m and "censored" in m

124

return m and "censored" in m

125

126

def addflagprocessor(flag, processor):

126

def addflagprocessor(flag, processor):

127

"""Register a flag processor on a revision data flag.

127

"""Register a flag processor on a revision data flag.

128

129

Invariant:

129

Invariant:

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

132

- Only one flag processor can be registered on a specific flag.

132

- Only one flag processor can be registered on a specific flag.

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

134

following signatures:

134

following signatures:

135

- (read) f(self, rawtext) -> text, bool

135

- (read) f(self, rawtext) -> text, bool

136

- (write) f(self, text) -> rawtext, bool

136

- (write) f(self, text) -> rawtext, bool

137

- (raw) f(self, rawtext) -> bool

137

- (raw) f(self, rawtext) -> bool

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

139

directly visible to the user.

139

directly visible to the user.

140

The boolean returned by these transforms is used to determine whether

140

The boolean returned by these transforms is used to determine whether

141

the returned text can be used for hash integrity checking. For example,

141

the returned text can be used for hash integrity checking. For example,

142

if "write" returns False, then "text" is used to generate hash. If

142

if "write" returns False, then "text" is used to generate hash. If

143

"write" returns True, that basically means "rawtext" returned by "write"

143

"write" returns True, that basically means "rawtext" returned by "write"

144

should be used to generate hash. Usually, "write" and "read" return

144

should be used to generate hash. Usually, "write" and "read" return

145

different booleans. And "raw" returns a same boolean as "write".

145

different booleans. And "raw" returns a same boolean as "write".

146

147

Note: The 'raw' transform is used for changegroup generation and in some

147

Note: The 'raw' transform is used for changegroup generation and in some

148

debug commands. In this case the transform only indicates whether the

148

debug commands. In this case the transform only indicates whether the

149

contents can be used for hash integrity checks.

149

contents can be used for hash integrity checks.

150

"""

150

"""

151

if not flag & REVIDX_KNOWN_FLAGS:

151

if not flag & REVIDX_KNOWN_FLAGS:

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

153

raise ProgrammingError(msg)

153

raise ProgrammingError(msg)

154

if flag not in REVIDX_FLAGS_ORDER:

154

if flag not in REVIDX_FLAGS_ORDER:

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

156

raise ProgrammingError(msg)

156

raise ProgrammingError(msg)

157

if flag in _flagprocessors:

157

if flag in _flagprocessors:

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

159

raise error.Abort(msg)

159

raise error.Abort(msg)

160

_flagprocessors[flag] = processor

160

_flagprocessors[flag] = processor

161

162

def getoffset(q):

162

def getoffset(q):

163

return int(q >> 16)

163

return int(q >> 16)

164

165

def gettype(q):

165

def gettype(q):

166

return int(q & 0xFFFF)

166

return int(q & 0xFFFF)

167

168

def offset_type(offset, type):

168

def offset_type(offset, type):

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

170

raise ValueError('unknown revlog index flags')

170

raise ValueError('unknown revlog index flags')

171

return int(int(offset) << 16 | type)

171

return int(int(offset) << 16 | type)

172

173

_nullhash = hashlib.sha1(nullid)

173

_nullhash = hashlib.sha1(nullid)

174

175

def hash(text, p1, p2):

175

def hash(text, p1, p2):

176

"""generate a hash from the given text and its parent hashes

176

"""generate a hash from the given text and its parent hashes

177

178

This hash combines both the current file contents and its history

178

This hash combines both the current file contents and its history

179

in a manner that makes it easy to distinguish nodes with the same

179

in a manner that makes it easy to distinguish nodes with the same

180

content in the revision graph.

180

content in the revision graph.

181

"""

181

"""

182

# As of now, if one of the parent node is null, p2 is null

182

# As of now, if one of the parent node is null, p2 is null

183

if p2 == nullid:

183

if p2 == nullid:

184

# deep copy of a hash is faster than creating one

184

# deep copy of a hash is faster than creating one

185

s = _nullhash.copy()

185

s = _nullhash.copy()

186

s.update(p1)

186

s.update(p1)

187

else:

187

else:

188

# none of the parent nodes are nullid

188

# none of the parent nodes are nullid

189

if p1 < p2:

189

if p1 < p2:

190

a = p1

190

a = p1

191

b = p2

191

b = p2

192

else:

192

else:

193

a = p2

193

a = p2

194

b = p1

194

b = p1

195

s = hashlib.sha1(a)

195

s = hashlib.sha1(a)

196

s.update(b)

196

s.update(b)

197

s.update(text)

197

s.update(text)

198

return s.digest()

198

return s.digest()

199

200

class _testrevlog(object):

200

class _testrevlog(object):

201

"""minimalist fake revlog to use in doctests"""

201

"""minimalist fake revlog to use in doctests"""

202

203

def __init__(self, data, density=0.5, mingap=0):

203

def __init__(self, data, density=0.5, mingap=0):

204

"""data is an list of revision payload boundaries"""

204

"""data is an list of revision payload boundaries"""

205

self._data = data

205

self._data = data

206

self._srdensitythreshold = density

206

self._srdensitythreshold = density

207

self._srmingapsize = mingap

207

self._srmingapsize = mingap

208

209

def start(self, rev):

209

def start(self, rev):

210

if rev == 0:

210

if rev == 0:

211

return 0

211

return 0

212

return self._data[rev - 1]

212

return self._data[rev - 1]

213

214

def end(self, rev):

214

def end(self, rev):

215

return self._data[rev]

215

return self._data[rev]

216

217

def length(self, rev):

217

def length(self, rev):

218

return self.end(rev) - self.start(rev)

218

return self.end(rev) - self.start(rev)

219

220

def __len__(self):

220

def __len__(self):

221

return len(self._data)

221

return len(self._data)

222

223

def _trimchunk(revlog, revs, startidx, endidx=None):

223

def _trimchunk(revlog, revs, startidx, endidx=None):

224

"""returns revs[startidx:endidx] without empty trailing revs

224

"""returns revs[startidx:endidx] without empty trailing revs

225

226

Doctest Setup

226

Doctest Setup

227

>>> revlog = _testrevlog([

227

>>> revlog = _testrevlog([

228

... 5, #0

228

... 5, #0

229

... 10, #1

229

... 10, #1

230

... 12, #2

230

... 12, #2

231

... 12, #3 (empty)

231

... 12, #3 (empty)

232

... 17, #4

232

... 17, #4

233

... 21, #5

233

... 21, #5

234

... 21, #6 (empty)

234

... 21, #6 (empty)

235

... ])

235

... ])

236

237

Contiguous cases:

237

Contiguous cases:

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

239

[0, 1, 2, 3, 4, 5]

239

[0, 1, 2, 3, 4, 5]

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

241

[0, 1, 2, 3, 4]

241

[0, 1, 2, 3, 4]

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

243

[0, 1, 2]

243

[0, 1, 2]

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

245

[2]

245

[2]

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

247

[3, 4, 5]

247

[3, 4, 5]

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

249

[3, 4]

249

[3, 4]

250

251

Discontiguous cases:

251

Discontiguous cases:

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

253

[1, 3, 5]

253

[1, 3, 5]

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

255

[1]

255

[1]

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

257

[3, 5]

257

[3, 5]

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

259

[3, 5]

259

[3, 5]

260

"""

260

"""

261

length = revlog.length

261

length = revlog.length

262

263

if endidx is None:

263

if endidx is None:

264

endidx = len(revs)

264

endidx = len(revs)

265

266

# If we have a non-emtpy delta candidate, there are nothing to trim

266

# If we have a non-emtpy delta candidate, there are nothing to trim

267

if revs[endidx - 1] < len(revlog):

267

if revs[endidx - 1] < len(revlog):

268

# Trim empty revs at the end, except the very first revision of a chain

268

# Trim empty revs at the end, except the very first revision of a chain

269

while (endidx > 1

269

while (endidx > 1

270

and endidx > startidx

270

and endidx > startidx

271

and length(revs[endidx - 1]) == 0):

271

and length(revs[endidx - 1]) == 0):

272

endidx -= 1

272

endidx -= 1

273

274

return revs[startidx:endidx]

274

return revs[startidx:endidx]

275

276

def _segmentspan(revlog, revs, deltainfo=None):

276

def _segmentspan(revlog, revs, deltainfo=None):

277

"""Get the byte span of a segment of revisions

277

"""Get the byte span of a segment of revisions

278

279

revs is a sorted array of revision numbers

279

revs is a sorted array of revision numbers

280

281

>>> revlog = _testrevlog([

281

>>> revlog = _testrevlog([

282

... 5, #0

282

... 5, #0

283

... 10, #1

283

... 10, #1

284

... 12, #2

284

... 12, #2

285

... 12, #3 (empty)

285

... 12, #3 (empty)

286

... 17, #4

286

... 17, #4

287

... ])

287

... ])

288

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

290

17

290

17

291

>>> _segmentspan(revlog, [0, 4])

291

>>> _segmentspan(revlog, [0, 4])

292

17

292

17

293

>>> _segmentspan(revlog, [3, 4])

293

>>> _segmentspan(revlog, [3, 4])

294

5

294

5

295

>>> _segmentspan(revlog, [1, 2, 3,])

295

>>> _segmentspan(revlog, [1, 2, 3,])

296

7

296

7

297

>>> _segmentspan(revlog, [1, 3])

297

>>> _segmentspan(revlog, [1, 3])

298

7

298

7

299

"""

299

"""

300

if not revs:

300

if not revs:

301

return 0

301

return 0

302

if deltainfo is not None and len(revlog) <= revs[-1]:

302

if deltainfo is not None and len(revlog) <= revs[-1]:

303

if len(revs) == 1:

303

if len(revs) == 1:

304

return deltainfo.deltalen

304

return deltainfo.deltalen

305

offset = revlog.end(len(revlog) - 1)

305

offset = revlog.end(len(revlog) - 1)

306

end = deltainfo.deltalen + offset

306

end = deltainfo.deltalen + offset

307

else:

307

else:

308

end = revlog.end(revs[-1])

308

end = revlog.end(revs[-1])

309

return end - revlog.start(revs[0])

309

return end - revlog.start(revs[0])

310

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

313

314

``revs`` is sliced into groups that should be read in one time.

314

``revs`` is sliced into groups that should be read in one time.

315

Assume that revs are sorted.

315

Assume that revs are sorted.

316

317

The initial chunk is sliced until the overall density (payload/chunks-span

317

The initial chunk is sliced until the overall density (payload/chunks-span

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

319

`revlog._srmingapsize` is skipped.

319

`revlog._srmingapsize` is skipped.

320

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

322

For consistency with other slicing choice, this limit won't go lower than

322

For consistency with other slicing choice, this limit won't go lower than

323

`revlog._srmingapsize`.

323

`revlog._srmingapsize`.

324

325

If individual revisions chunk are larger than this limit, they will still

325

If individual revisions chunk are larger than this limit, they will still

326

be raised individually.

326

be raised individually.

327

328

>>> revlog = _testrevlog([

328

>>> revlog = _testrevlog([

329

... 5, #00 (5)

329

... 5, #00 (5)

330

... 10, #01 (5)

330

... 10, #01 (5)

331

... 12, #02 (2)

331

... 12, #02 (2)

332

... 12, #03 (empty)

332

... 12, #03 (empty)

333

... 27, #04 (15)

333

... 27, #04 (15)

334

... 31, #05 (4)

334

... 31, #05 (4)

335

... 31, #06 (empty)

335

... 31, #06 (empty)

336

... 42, #07 (11)

336

... 42, #07 (11)

337

... 47, #08 (5)

337

... 47, #08 (5)

338

... 47, #09 (empty)

338

... 47, #09 (empty)

339

... 48, #10 (1)

339

... 48, #10 (1)

340

... 51, #11 (3)

340

... 51, #11 (3)

341

... 74, #12 (23)

341

... 74, #12 (23)

342

... 85, #13 (11)

342

... 85, #13 (11)

343

... 86, #14 (1)

343

... 86, #14 (1)

344

... 91, #15 (5)

344

... 91, #15 (5)

345

... ])

345

... ])

346

347

>>> list(_slicechunk(revlog, list(range(16))))

347

>>> list(_slicechunk(revlog, list(range(16))))

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

349

>>> list(_slicechunk(revlog, [0, 15]))

349

>>> list(_slicechunk(revlog, [0, 15]))

350

[[0], [15]]

350

[[0], [15]]

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

352

[[0], [11], [15]]

352

[[0], [11], [15]]

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

354

[[0], [11, 13, 15]]

354

[[0], [11, 13, 15]]

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

356

[[1, 2], [5, 8, 10, 11], [14]]

356

[[1, 2], [5, 8, 10, 11], [14]]

357

358

Slicing with a maximum chunk size

358

Slicing with a maximum chunk size

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

360

[[0], [11], [13], [15]]

360

[[0], [11], [13], [15]]

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

362

[[0], [11], [13, 15]]

362

[[0], [11], [13, 15]]

363

"""

363

"""

364

if targetsize is not None:

364

if targetsize is not None:

365

targetsize = max(targetsize, revlog._srmingapsize)

365

targetsize = max(targetsize, revlog._srmingapsize)

366

# targetsize should not be specified when evaluating delta candidates:

366

# targetsize should not be specified when evaluating delta candidates:

367

# * targetsize is used to ensure we stay within specification when reading,

367

# * targetsize is used to ensure we stay within specification when reading,

368

# * deltainfo is used to pick are good delta chain when writing.

368

# * deltainfo is used to pick are good delta chain when writing.

369

if not (deltainfo is None or targetsize is None):

369

if not (deltainfo is None or targetsize is None):

370

msg = 'cannot use `targetsize` with a `deltainfo`'

370

msg = 'cannot use `targetsize` with a `deltainfo`'

371

raise error.ProgrammingError(msg)

371

raise error.ProgrammingError(msg)

372

for chunk in _slicechunktodensity(revlog, revs,

372

for chunk in _slicechunktodensity(revlog, revs,

373

deltainfo,

373

deltainfo,

374

revlog._srdensitythreshold,

374

revlog._srdensitythreshold,

375

revlog._srmingapsize):

375

revlog._srmingapsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

377

yield subchunk

377

yield subchunk

378

379

def _slicechunktosize(revlog, revs, targetsize=None):

379

def _slicechunktosize(revlog, revs, targetsize=None):

380

"""slice revs to match the target size

380

"""slice revs to match the target size

381

382

This is intended to be used on chunk that density slicing selected by that

382

This is intended to be used on chunk that density slicing selected by that

383

are still too large compared to the read garantee of revlog. This might

383

are still too large compared to the read garantee of revlog. This might

384

happens when "minimal gap size" interrupted the slicing or when chain are

384

happens when "minimal gap size" interrupted the slicing or when chain are

385

built in a way that create large blocks next to each other.

385

built in a way that create large blocks next to each other.

386

387

>>> revlog = _testrevlog([

387

>>> revlog = _testrevlog([

388

... 3, #0 (3)

388

... 3, #0 (3)

389

... 5, #1 (2)

389

... 5, #1 (2)

390

... 6, #2 (1)

390

... 6, #2 (1)

391

... 8, #3 (2)

391

... 8, #3 (2)

392

... 8, #4 (empty)

392

... 8, #4 (empty)

393

... 11, #5 (3)

393

... 11, #5 (3)

394

... 12, #6 (1)

394

... 12, #6 (1)

395

... 13, #7 (1)

395

... 13, #7 (1)

396

... 14, #8 (1)

396

... 14, #8 (1)

397

... ])

397

... ])

398

399

Cases where chunk is already small enough

399

Cases where chunk is already small enough

400

>>> list(_slicechunktosize(revlog, [0], 3))

400

>>> list(_slicechunktosize(revlog, [0], 3))

401

[[0]]

401

[[0]]

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

403

[[6, 7]]

403

[[6, 7]]

404

>>> list(_slicechunktosize(revlog, [0], None))

404

>>> list(_slicechunktosize(revlog, [0], None))

405

[[0]]

405

[[0]]

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

407

[[6, 7]]

407

[[6, 7]]

408

409

cases where we need actual slicing

409

cases where we need actual slicing

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

411

[[0], [1]]

411

[[0], [1]]

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

413

[[1], [3]]

413

[[1], [3]]

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

415

[[1, 2], [3]]

415

[[1, 2], [3]]

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

417

[[3], [5]]

417

[[3], [5]]

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

419

[[3], [5]]

419

[[3], [5]]

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

421

[[5], [6, 7, 8]]

421

[[5], [6, 7, 8]]

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

424

425

Case with too large individual chunk (must return valid chunk)

425

Case with too large individual chunk (must return valid chunk)

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

427

[[0], [1]]

427

[[0], [1]]

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

429

[[1], [3]]

429

[[1], [3]]

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

431

[[3], [5]]

431

[[3], [5]]

432

"""

432

"""

433

assert targetsize is None or 0 <= targetsize

433

assert targetsize is None or 0 <= targetsize

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

435

yield revs

435

yield revs

436

return

436

return

437

438

startrevidx = 0

438

startrevidx = 0

439

startdata = revlog.start(revs[0])

439

startdata = revlog.start(revs[0])

440

endrevidx = 0

440

endrevidx = 0

441

iterrevs = enumerate(revs)

441

iterrevs = enumerate(revs)

442

next(iterrevs) # skip first rev.

442

next(iterrevs) # skip first rev.

443

for idx, r in iterrevs:

443

for idx, r in iterrevs:

444

span = revlog.end(r) - startdata

444

span = revlog.end(r) - startdata

445

if span <= targetsize:

445

if span <= targetsize:

446

endrevidx = idx

446

endrevidx = idx

447

else:

447

else:

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

449

if chunk:

449

if chunk:

450

yield chunk

450

yield chunk

451

startrevidx = idx

451

startrevidx = idx

452

startdata = revlog.start(r)

452

startdata = revlog.start(r)

453

endrevidx = idx

453

endrevidx = idx

454

yield _trimchunk(revlog, revs, startrevidx)

454

yield _trimchunk(revlog, revs, startrevidx)

455

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

457

mingapsize=0):

457

mingapsize=0):

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

459

460

``revs`` is sliced into groups that should be read in one time.

460

``revs`` is sliced into groups that should be read in one time.

461

Assume that revs are sorted.

461

Assume that revs are sorted.

462

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

464

to the top of the revlog.

464

to the top of the revlog.

465

466

The initial chunk is sliced until the overall density (payload/chunks-span

466

The initial chunk is sliced until the overall density (payload/chunks-span

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

468

skipped.

468

skipped.

469

470

>>> revlog = _testrevlog([

470

>>> revlog = _testrevlog([

471

... 5, #00 (5)

471

... 5, #00 (5)

472

... 10, #01 (5)

472

... 10, #01 (5)

473

... 12, #02 (2)

473

... 12, #02 (2)

474

... 12, #03 (empty)

474

... 12, #03 (empty)

475

... 27, #04 (15)

475

... 27, #04 (15)

476

... 31, #05 (4)

476

... 31, #05 (4)

477

... 31, #06 (empty)

477

... 31, #06 (empty)

478

... 42, #07 (11)

478

... 42, #07 (11)

479

... 47, #08 (5)

479

... 47, #08 (5)

480

... 47, #09 (empty)

480

... 47, #09 (empty)

481

... 48, #10 (1)

481

... 48, #10 (1)

482

... 51, #11 (3)

482

... 51, #11 (3)

483

... 74, #12 (23)

483

... 74, #12 (23)

484

... 85, #13 (11)

484

... 85, #13 (11)

485

... 86, #14 (1)

485

... 86, #14 (1)

486

... 91, #15 (5)

486

... 91, #15 (5)

487

... ])

487

... ])

488

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

492

[[0], [15]]

492

[[0], [15]]

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

494

[[0], [11], [15]]

494

[[0], [11], [15]]

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

496

[[0], [11, 13, 15]]

496

[[0], [11, 13, 15]]

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

498

[[1, 2], [5, 8, 10, 11], [14]]

498

[[1, 2], [5, 8, 10, 11], [14]]

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

500

... mingapsize=20))

500

... mingapsize=20))

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

503

... targetdensity=0.95))

503

... targetdensity=0.95))

504

[[1, 2], [5], [8, 10, 11], [14]]

504

[[1, 2], [5], [8, 10, 11], [14]]

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

506

... targetdensity=0.95, mingapsize=12))

506

... targetdensity=0.95, mingapsize=12))

507

[[1, 2], [5, 8, 10, 11], [14]]

507

[[1, 2], [5, 8, 10, 11], [14]]

508

"""

508

"""

509

start = revlog.start

509

start = revlog.start

510

length = revlog.length

510

length = revlog.length

511

512

if len(revs) <= 1:

512

if len(revs) <= 1:

513

yield revs

513

yield revs

514

return

514

return

515

516

nextrev = len(revlog)

516

nextrev = len(revlog)

517

nextoffset = revlog.end(nextrev - 1)

517

nextoffset = revlog.end(nextrev - 1)

518

519

if deltainfo is None:

519

if deltainfo is None:

520

deltachainspan = _segmentspan(revlog, revs)

520

deltachainspan = _segmentspan(revlog, revs)

521

chainpayload = sum(length(r) for r in revs)

521

chainpayload = sum(length(r) for r in revs)

522

else:

522

else:

523

deltachainspan = deltainfo.distance

523

deltachainspan = deltainfo.distance

524

chainpayload = deltainfo.compresseddeltalen

524

chainpayload = deltainfo.compresseddeltalen

525

526

if deltachainspan < mingapsize:

526

if deltachainspan < mingapsize:

527

yield revs

527

yield revs

528

return

528

return

529

530

readdata = deltachainspan

530

readdata = deltachainspan

531

532

if deltachainspan:

532

if deltachainspan:

533

density = chainpayload / float(deltachainspan)

533

density = chainpayload / float(deltachainspan)

534

else:

534

else:

535

density = 1.0

535

density = 1.0

536

537

if density >= targetdensity:

537

if density >= targetdensity:

538

yield revs

538

yield revs

539

return

539

return

540

541

if deltainfo is not None and deltainfo.deltalen:

541

if deltainfo is not None and deltainfo.deltalen:

542

revs = list(revs)

542

revs = list(revs)

543

revs.append(nextrev)

543

revs.append(nextrev)

544

545

# Store the gaps in a heap to have them sorted by decreasing size

545

# Store the gaps in a heap to have them sorted by decreasing size

546

gapsheap = []

546

gapsheap = []

547

heapq.heapify(gapsheap)

547

heapq.heapify(gapsheap)

548

prevend = None

548

prevend = None

549

for i, rev in enumerate(revs):

549

for i, rev in enumerate(revs):

550

if rev < nextrev:

550

if rev < nextrev:

551

revstart = start(rev)

551

revstart = start(rev)

552

revlen = length(rev)

552

revlen = length(rev)

553

else:

553

else:

554

revstart = nextoffset

554

revstart = nextoffset

555

revlen = deltainfo.deltalen

555

revlen = deltainfo.deltalen

556

557

# Skip empty revisions to form larger holes

557

# Skip empty revisions to form larger holes

558

if revlen == 0:

558

if revlen == 0:

559

continue

559

continue

560

561

if prevend is not None:

561

if prevend is not None:

562

gapsize = revstart - prevend

562

gapsize = revstart - prevend

563

# only consider holes that are large enough

563

# only consider holes that are large enough

564

if gapsize > mingapsize:

564

if gapsize > mingapsize:

565

heapq.heappush(gapsheap, (-gapsize, i))

565

heapq.heappush(gapsheap, (-gapsize, i))

566

567

prevend = revstart + revlen

567

prevend = revstart + revlen

568

569

# Collect the indices of the largest holes until the density is acceptable

569

# Collect the indices of the largest holes until the density is acceptable

570

indicesheap = []

570

indicesheap = []

571

heapq.heapify(indicesheap)

571

heapq.heapify(indicesheap)

572

while gapsheap and density < targetdensity:

572

while gapsheap and density < targetdensity:

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

574

575

heapq.heappush(indicesheap, gapidx)

575

heapq.heappush(indicesheap, gapidx)

576

577

# the gap sizes are stored as negatives to be sorted decreasingly

577

# the gap sizes are stored as negatives to be sorted decreasingly

578

# by the heap

578

# by the heap

579

readdata -= (-oppgapsize)

579

readdata -= (-oppgapsize)

580

if readdata > 0:

580

if readdata > 0:

581

density = chainpayload / float(readdata)

581

density = chainpayload / float(readdata)

582

else:

582

else:

583

density = 1.0

583

density = 1.0

584

585

# Cut the revs at collected indices

585

# Cut the revs at collected indices

586

previdx = 0

586

previdx = 0

587

while indicesheap:

587

while indicesheap:

588

idx = heapq.heappop(indicesheap)

588

idx = heapq.heappop(indicesheap)

589

590

chunk = _trimchunk(revlog, revs, previdx, idx)

590

chunk = _trimchunk(revlog, revs, previdx, idx)

591

if chunk:

591

if chunk:

592

yield chunk

592

yield chunk

593

594

previdx = idx

594

previdx = idx

595

596

chunk = _trimchunk(revlog, revs, previdx)

596

chunk = _trimchunk(revlog, revs, previdx)

597

if chunk:

597

if chunk:

598

yield chunk

598

yield chunk

599

600

@attr.s(slots=True, frozen=True)

600

@attr.s(slots=True, frozen=True)

601

class _deltainfo(object):

601

class _deltainfo(object):

602

distance = attr.ib()

602

distance = attr.ib()

603

deltalen = attr.ib()

603

deltalen = attr.ib()

604

data = attr.ib()

604

data = attr.ib()

605

base = attr.ib()

605

base = attr.ib()

606

chainbase = attr.ib()

606

chainbase = attr.ib()

607

chainlen = attr.ib()

607

chainlen = attr.ib()

608

compresseddeltalen = attr.ib()

608

compresseddeltalen = attr.ib()

609

610

class _deltacomputer(object):

610

class _deltacomputer(object):

611

def __init__(self, revlog):

611

def __init__(self, revlog):

612

self.revlog = revlog

612

self.revlog = revlog

613

614

def _getcandidaterevs(self, p1, p2, cachedelta):

614

def _getcandidaterevs(self, p1, p2, cachedelta):

615

"""

615

"""

616

Provides revisions that present an interest to be diffed against,

616

Provides revisions that present an interest to be diffed against,

617

grouped by level of easiness.

617

grouped by level of easiness.

618

"""

618

"""

619

revlog = self.revlog

619

revlog = self.revlog

620

gdelta = revlog._generaldelta

620

gdelta = revlog._generaldelta

621

curr = len(revlog)

621

curr = len(revlog)

622

prev = curr - 1

622

prev = curr - 1

623

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

623

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

624

625

# should we try to build a delta?

625

# should we try to build a delta?

626

if prev != nullrev and revlog.storedeltachains:

626

if prev != nullrev and revlog.storedeltachains:

627

tested = set()

627

tested = set()

628

# This condition is true most of the time when processing

628

# This condition is true most of the time when processing

629

# changegroup data into a generaldelta repo. The only time it

629

# changegroup data into a generaldelta repo. The only time it

630

# isn't true is if this is the first revision in a delta chain

630

# isn't true is if this is the first revision in a delta chain

631

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

631

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

632

if cachedelta and gdelta and revlog._lazydeltabase:

632

if cachedelta and gdelta and revlog._lazydeltabase:

633

# Assume what we received from the server is a good choice

633

# Assume what we received from the server is a good choice

634

# build delta will reuse the cache

634

# build delta will reuse the cache

635

yield (cachedelta[0],)

635

yield (cachedelta[0],)

636

tested.add(cachedelta[0])

636

tested.add(cachedelta[0])

637

638

if gdelta:

638

if gdelta:

639

# exclude already lazy tested base if any

639

# exclude already lazy tested base if any

640

parents = [p for p in (p1r, p2r)

640

parents = [p for p in (p1r, p2r)

641

if p != nullrev and p not in tested]

641

if p != nullrev and p not in tested]

642

643

if not revlog._deltabothparents and len(parents) == 2:

643

if not revlog._deltabothparents and len(parents) == 2:

644

parents.sort()

644

parents.sort()

645

# To minimize the chance of having to build a fulltext,

645

# To minimize the chance of having to build a fulltext,

646

# pick first whichever parent is closest to us (max rev)

646

# pick first whichever parent is closest to us (max rev)

647

yield (parents[1],)

647

yield (parents[1],)

648

# then the other one (min rev) if the first did not fit

648

# then the other one (min rev) if the first did not fit

649

yield (parents[0],)

649

yield (parents[0],)

650

tested.update(parents)

650

tested.update(parents)

651

elif len(parents) > 0:

651

elif len(parents) > 0:

652

# Test all parents (1 or 2), and keep the best candidate

652

# Test all parents (1 or 2), and keep the best candidate

653

yield parents

653

yield parents

654

tested.update(parents)

654

tested.update(parents)

655

656

if prev not in tested:

656

if prev not in tested:

657

# other approach failed try against prev to hopefully save us a

657

# other approach failed try against prev to hopefully save us a

658

# fulltext.

658

# fulltext.

659

yield (prev,)

659

yield (prev,)

660

tested.add(prev)

660

tested.add(prev)

661

662

def buildtext(self, revinfo, fh):

662

def buildtext(self, revinfo, fh):

663

"""Builds a fulltext version of a revision

663

"""Builds a fulltext version of a revision

664

665

revinfo: _revisioninfo instance that contains all needed info

665

revinfo: _revisioninfo instance that contains all needed info

666

fh: file handle to either the .i or the .d revlog file,

666

fh: file handle to either the .i or the .d revlog file,

667

depending on whether it is inlined or not

667

depending on whether it is inlined or not

668

"""

668

"""

669

btext = revinfo.btext

669

btext = revinfo.btext

670

if btext[0] is not None:

670

if btext[0] is not None:

671

return btext[0]

671

return btext[0]

672

673

revlog = self.revlog

673

revlog = self.revlog

674

cachedelta = revinfo.cachedelta

674

cachedelta = revinfo.cachedelta

675

flags = revinfo.flags

675

flags = revinfo.flags

676

node = revinfo.node

676

node = revinfo.node

677

678

baserev = cachedelta[0]

678

baserev = cachedelta[0]

679

delta = cachedelta[1]

679

delta = cachedelta[1]

680

# special case deltas which replace entire base; no need to decode

680

# special case deltas which replace entire base; no need to decode

681

# base revision. this neatly avoids censored bases, which throw when

681

# base revision. this neatly avoids censored bases, which throw when

682

# they're decoded.

682

# they're decoded.

683

hlen = struct.calcsize(">lll")

683

hlen = struct.calcsize(">lll")

684

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

684

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

685

len(delta) - hlen):

685

len(delta) - hlen):

686

btext[0] = delta[hlen:]

686

btext[0] = delta[hlen:]

687

else:

687

else:

688

# deltabase is rawtext before changed by flag processors, which is

688

# deltabase is rawtext before changed by flag processors, which is

689

# equivalent to non-raw text

689

# equivalent to non-raw text

690

basetext = revlog.revision(baserev, _df=fh, raw=False)

690

basetext = revlog.revision(baserev, _df=fh, raw=False)

691

btext[0] = mdiff.patch(basetext, delta)

691

btext[0] = mdiff.patch(basetext, delta)

692

693

try:

693

try:

694

res = revlog._processflags(btext[0], flags, 'read', raw=True)

694

res = revlog._processflags(btext[0], flags, 'read', raw=True)

695

btext[0], validatehash = res

695

btext[0], validatehash = res

696

if validatehash:

696

if validatehash:

697

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

697

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

698

if flags & REVIDX_ISCENSORED:

698

if flags & REVIDX_ISCENSORED:

699

raise RevlogError(_('node %s is not censored') % node)

699

raise RevlogError(_('node %s is not censored') % node)

700

except CensoredNodeError:

700

except CensoredNodeError:

701

# must pass the censored index flag to add censored revisions

701

# must pass the censored index flag to add censored revisions

702

if not flags & REVIDX_ISCENSORED:

702

if not flags & REVIDX_ISCENSORED:

703

raise

703

raise

704

return btext[0]

704

return btext[0]

705

706

def _builddeltadiff(self, base, revinfo, fh):

706

def _builddeltadiff(self, base, revinfo, fh):

707

revlog = self.revlog

707

revlog = self.revlog

708

t = self.buildtext(revinfo, fh)

708

t = self.buildtext(revinfo, fh)

709

if revlog.iscensored(base):

709

if revlog.iscensored(base):

710

# deltas based on a censored revision must replace the

710

# deltas based on a censored revision must replace the

711

# full content in one patch, so delta works everywhere

711

# full content in one patch, so delta works everywhere

712

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

712

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

713

delta = header + t

713

delta = header + t

714

else:

714

else:

715

ptext = revlog.revision(base, _df=fh, raw=True)

715

ptext = revlog.revision(base, _df=fh, raw=True)

716

delta = mdiff.textdiff(ptext, t)

716

delta = mdiff.textdiff(ptext, t)

717

718

return delta

718

return delta

719

720

def _builddeltainfo(self, revinfo, base, fh):

720

def _builddeltainfo(self, revinfo, base, fh):

721

# can we use the cached delta?

721

# can we use the cached delta?

722

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

722

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

723

delta = revinfo.cachedelta[1]

723

delta = revinfo.cachedelta[1]

724

else:

724

else:

725

delta = self._builddeltadiff(base, revinfo, fh)

725

delta = self._builddeltadiff(base, revinfo, fh)

726

revlog = self.revlog

726

revlog = self.revlog

727

header, data = revlog.compress(delta)

727

header, data = revlog.compress(delta)

728

deltalen = len(header) + len(data)

728

deltalen = len(header) + len(data)

729

chainbase = revlog.chainbase(base)

729

chainbase = revlog.chainbase(base)

730

offset = revlog.end(len(revlog) - 1)

730

offset = revlog.end(len(revlog) - 1)

731

dist = deltalen + offset - revlog.start(chainbase)

731

dist = deltalen + offset - revlog.start(chainbase)

732

if revlog._generaldelta:

732

if revlog._generaldelta:

733

deltabase = base

733

deltabase = base

734

else:

734

else:

735

deltabase = chainbase

735

deltabase = chainbase

736

chainlen, compresseddeltalen = revlog._chaininfo(base)

736

chainlen, compresseddeltalen = revlog._chaininfo(base)

737

chainlen += 1

737

chainlen += 1

738

compresseddeltalen += deltalen

738

compresseddeltalen += deltalen

739

return _deltainfo(dist, deltalen, (header, data), deltabase,

739

return _deltainfo(dist, deltalen, (header, data), deltabase,

740

chainbase, chainlen, compresseddeltalen)

740

chainbase, chainlen, compresseddeltalen)

741

742

def finddeltainfo(self, revinfo, fh):

742

def finddeltainfo(self, revinfo, fh):

743

"""Find an acceptable delta against a candidate revision

743

"""Find an acceptable delta against a candidate revision

744

745

revinfo: information about the revision (instance of _revisioninfo)

745

revinfo: information about the revision (instance of _revisioninfo)

746

fh: file handle to either the .i or the .d revlog file,

746

fh: file handle to either the .i or the .d revlog file,

747

depending on whether it is inlined or not

747

depending on whether it is inlined or not

748

749

Returns the first acceptable candidate revision, as ordered by

749

Returns the first acceptable candidate revision, as ordered by

750

_getcandidaterevs

750

_getcandidaterevs

751

"""

751

"""

752

if not revinfo.textlen:

752

if not revinfo.textlen:

753

return None # empty file do not need delta

753

return None # empty file do not need delta

754

755

cachedelta = revinfo.cachedelta

755

cachedelta = revinfo.cachedelta

756

p1 = revinfo.p1

756

p1 = revinfo.p1

757

p2 = revinfo.p2

757

p2 = revinfo.p2

758

revlog = self.revlog

758

revlog = self.revlog

759

760

deltalength = self.revlog.length

760

deltalength = self.revlog.length

761

deltaparent = self.revlog.deltaparent

761

deltaparent = self.revlog.deltaparent

762

763

deltainfo = None

763

deltainfo = None

764

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

764

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

765

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

765

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

766

# filter out delta base that will never produce good delta

766

# filter out delta base that will never produce good delta

767

candidaterevs = [r for r in candidaterevs

767

candidaterevs = [r for r in candidaterevs

768

if self.revlog.length(r) <= deltas_limit]

768

if self.revlog.length(r) <= deltas_limit]

769

nominateddeltas = []

769

nominateddeltas = []

770

for candidaterev in candidaterevs:

770

for candidaterev in candidaterevs:

771

# skip over empty delta (no need to include them in a chain)

771

# skip over empty delta (no need to include them in a chain)

772

while candidaterev != nullrev and not deltalength(candidaterev):

772

while candidaterev != nullrev and not deltalength(candidaterev):

773

candidaterev = deltaparent(candidaterev)

773

candidaterev = deltaparent(candidaterev)

774

# no need to try a delta against nullid, this will be handled

774

# no need to try a delta against nullid, this will be handled

775

# by fulltext later.

775

# by fulltext later.

776

if candidaterev == nullrev:

776

if candidaterev == nullrev:

777

continue

777

continue

778

# no delta for rawtext-changing revs (see "candelta" for why)

778

# no delta for rawtext-changing revs (see "candelta" for why)

779

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

779

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

780

continue

780

continue

781

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

781

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

782

if revlog._isgooddeltainfo(candidatedelta, revinfo):

782

if revlog._isgooddeltainfo(candidatedelta, revinfo):

783

nominateddeltas.append(candidatedelta)

783

nominateddeltas.append(candidatedelta)

784

if nominateddeltas:

784

if nominateddeltas:

785

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

785

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

786

break

786

break

787

788

return deltainfo

788

return deltainfo

789

790

@attr.s(slots=True, frozen=True)

790

@attr.s(slots=True, frozen=True)

791

class _revisioninfo(object):

791

class _revisioninfo(object):

792

"""Information about a revision that allows building its fulltext

792

"""Information about a revision that allows building its fulltext

793

node: expected hash of the revision

793

node: expected hash of the revision

794

p1, p2: parent revs of the revision

794

p1, p2: parent revs of the revision

795

btext: built text cache consisting of a one-element list

795

btext: built text cache consisting of a one-element list

796

cachedelta: (baserev, uncompressed_delta) or None

796

cachedelta: (baserev, uncompressed_delta) or None

797

flags: flags associated to the revision storage

797

flags: flags associated to the revision storage

798

799

One of btext[0] or cachedelta must be set.

799

One of btext[0] or cachedelta must be set.

800

"""

800

"""

801

node = attr.ib()

801

node = attr.ib()

802

p1 = attr.ib()

802

p1 = attr.ib()

803

p2 = attr.ib()

803

p2 = attr.ib()

804

btext = attr.ib()

804

btext = attr.ib()

805

textlen = attr.ib()

805

textlen = attr.ib()

806

cachedelta = attr.ib()

806

cachedelta = attr.ib()

807

flags = attr.ib()

807

flags = attr.ib()

808

809

# index v0:

809

# index v0:

810

# 4 bytes: offset

810

# 4 bytes: offset

811

# 4 bytes: compressed length

811

# 4 bytes: compressed length

812

# 4 bytes: base rev

812

# 4 bytes: base rev

813

# 4 bytes: link rev

813

# 4 bytes: link rev

814

# 20 bytes: parent 1 nodeid

814

# 20 bytes: parent 1 nodeid

815

# 20 bytes: parent 2 nodeid

815

# 20 bytes: parent 2 nodeid

816

# 20 bytes: nodeid

816

# 20 bytes: nodeid

817

indexformatv0 = struct.Struct(">4l20s20s20s")

817

indexformatv0 = struct.Struct(">4l20s20s20s")

818

indexformatv0_pack = indexformatv0.pack

818

indexformatv0_pack = indexformatv0.pack

819

indexformatv0_unpack = indexformatv0.unpack

819

indexformatv0_unpack = indexformatv0.unpack

820

821

class revlogoldindex(list):

821

class revlogoldindex(list):

822

def __getitem__(self, i):

822

def __getitem__(self, i):

823

if i == -1:

823

if i == -1:

824

return (0, 0, 0, -1, -1, -1, -1, nullid)

824

return (0, 0, 0, -1, -1, -1, -1, nullid)

825

return list.__getitem__(self, i)

825

return list.__getitem__(self, i)

826

827

# maximum <delta-chain-data>/<revision-text-length> ratio

827

# maximum <delta-chain-data>/<revision-text-length> ratio

828

LIMIT_DELTA2TEXT = 2

828

LIMIT_DELTA2TEXT = 2

829

830

class revlogoldio(object):

830

class revlogoldio(object):

831

def __init__(self):

831

def __init__(self):

832

self.size = indexformatv0.size

832

self.size = indexformatv0.size

833

834

def parseindex(self, data, inline):

834

def parseindex(self, data, inline):

835

s = self.size

835

s = self.size

836

index = []

836

index = []

837

nodemap = {nullid: nullrev}

837

nodemap = {nullid: nullrev}

838

n = off = 0

838

n = off = 0

839

l = len(data)

839

l = len(data)

840

while off + s <= l:

840

while off + s <= l:

841

cur = data[off:off + s]

841

cur = data[off:off + s]

842

off += s

842

off += s

843

e = indexformatv0_unpack(cur)

843

e = indexformatv0_unpack(cur)

844

# transform to revlogv1 format

844

# transform to revlogv1 format

845

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

845

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

846

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

846

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

847

index.append(e2)

847

index.append(e2)

848

nodemap[e[6]] = n

848

nodemap[e[6]] = n

849

n += 1

849

n += 1

850

851

return revlogoldindex(index), nodemap, None

851

return revlogoldindex(index), nodemap, None

852

853

def packentry(self, entry, node, version, rev):

853

def packentry(self, entry, node, version, rev):

854

if gettype(entry[0]):

854

if gettype(entry[0]):

855

raise RevlogError(_('index entry flags need revlog version 1'))

855

raise RevlogError(_('index entry flags need revlog version 1'))

856

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

856

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

857

node(entry[5]), node(entry[6]), entry[7])

857

node(entry[5]), node(entry[6]), entry[7])

858

return indexformatv0_pack(*e2)

858

return indexformatv0_pack(*e2)

859

860

# index ng:

860

# index ng:

861

# 6 bytes: offset

861

# 6 bytes: offset

862

# 2 bytes: flags

862

# 2 bytes: flags

863

# 4 bytes: compressed length

863

# 4 bytes: compressed length

864

# 4 bytes: uncompressed length

864

# 4 bytes: uncompressed length

865

# 4 bytes: base rev

865

# 4 bytes: base rev

866

# 4 bytes: link rev

866

# 4 bytes: link rev

867

# 4 bytes: parent 1 rev

867

# 4 bytes: parent 1 rev

868

# 4 bytes: parent 2 rev

868

# 4 bytes: parent 2 rev

869

# 32 bytes: nodeid

869

# 32 bytes: nodeid

870

indexformatng = struct.Struct(">Qiiiiii20s12x")

870

indexformatng = struct.Struct(">Qiiiiii20s12x")

871

indexformatng_pack = indexformatng.pack

871

indexformatng_pack = indexformatng.pack

872

versionformat = struct.Struct(">I")

872

versionformat = struct.Struct(">I")

873

versionformat_pack = versionformat.pack

873

versionformat_pack = versionformat.pack

874

versionformat_unpack = versionformat.unpack

874

versionformat_unpack = versionformat.unpack

875

876

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

876

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

877

# signed integer)

877

# signed integer)

878

_maxentrysize = 0x7fffffff

878

_maxentrysize = 0x7fffffff

879

880

class revlogio(object):

880

class revlogio(object):

881

def __init__(self):

881

def __init__(self):

882

self.size = indexformatng.size

882

self.size = indexformatng.size

883

884

def parseindex(self, data, inline):

884

def parseindex(self, data, inline):

885

# call the C implementation to parse the index data

885

# call the C implementation to parse the index data

886

index, cache = parsers.parse_index2(data, inline)

886

index, cache = parsers.parse_index2(data, inline)

887

return index, getattr(index, 'nodemap', None), cache

887

return index, getattr(index, 'nodemap', None), cache

888

889

def packentry(self, entry, node, version, rev):

889

def packentry(self, entry, node, version, rev):

890

p = indexformatng_pack(*entry)

890

p = indexformatng_pack(*entry)

891

if rev == 0:

891

if rev == 0:

892

p = versionformat_pack(version) + p[4:]

892

p = versionformat_pack(version) + p[4:]

893

return p

893

return p

894

895

class revlog(object):

895

class revlog(object):

896

"""

896

"""

897

the underlying revision storage object

897

the underlying revision storage object

898

899

A revlog consists of two parts, an index and the revision data.

899

A revlog consists of two parts, an index and the revision data.

900

901

The index is a file with a fixed record size containing

901

The index is a file with a fixed record size containing

902

information on each revision, including its nodeid (hash), the

902

information on each revision, including its nodeid (hash), the

903

nodeids of its parents, the position and offset of its data within

903

nodeids of its parents, the position and offset of its data within

904

the data file, and the revision it's based on. Finally, each entry

904

the data file, and the revision it's based on. Finally, each entry

905

contains a linkrev entry that can serve as a pointer to external

905

contains a linkrev entry that can serve as a pointer to external

906

data.

906

data.

907

908

The revision data itself is a linear collection of data chunks.

908

The revision data itself is a linear collection of data chunks.

909

Each chunk represents a revision and is usually represented as a

909

Each chunk represents a revision and is usually represented as a

910

delta against the previous chunk. To bound lookup time, runs of

910

delta against the previous chunk. To bound lookup time, runs of

911

deltas are limited to about 2 times the length of the original

911

deltas are limited to about 2 times the length of the original

912

version data. This makes retrieval of a version proportional to

912

version data. This makes retrieval of a version proportional to

913

its size, or O(1) relative to the number of revisions.

913

its size, or O(1) relative to the number of revisions.

914

915

Both pieces of the revlog are written to in an append-only

915

Both pieces of the revlog are written to in an append-only

916

fashion, which means we never need to rewrite a file to insert or

916

fashion, which means we never need to rewrite a file to insert or

917

remove data, and can use some simple techniques to avoid the need

917

remove data, and can use some simple techniques to avoid the need

918

for locking while reading.

918

for locking while reading.

919

920

If checkambig, indexfile is opened with checkambig=True at

920

If checkambig, indexfile is opened with checkambig=True at

921

writing, to avoid file stat ambiguity.

921

writing, to avoid file stat ambiguity.

922

923

If mmaplargeindex is True, and an mmapindexthreshold is set, the

923

If mmaplargeindex is True, and an mmapindexthreshold is set, the

924

index will be mmapped rather than read if it is larger than the

924

index will be mmapped rather than read if it is larger than the

925

configured threshold.

925

configured threshold.

926

927

If censorable is True, the revlog can have censored revisions.

927

If censorable is True, the revlog can have censored revisions.

928

"""

928

"""

929

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

929

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

930

mmaplargeindex=False, censorable=False):

930

mmaplargeindex=False, censorable=False):

931

"""

931

"""

932

create a revlog object

932

create a revlog object

933

934

opener is a function that abstracts the file opening operation

934

opener is a function that abstracts the file opening operation

935

and can be used to implement COW semantics or the like.

935

and can be used to implement COW semantics or the like.

936

"""

936

"""

937

self.indexfile = indexfile

937

self.indexfile = indexfile

938

self.datafile = datafile or (indexfile[:-2] + ".d")

938

self.datafile = datafile or (indexfile[:-2] + ".d")

939

self.opener = opener

939

self.opener = opener

940

# When True, indexfile is opened with checkambig=True at writing, to

940

# When True, indexfile is opened with checkambig=True at writing, to

941

# avoid file stat ambiguity.

941

# avoid file stat ambiguity.

942

self._checkambig = checkambig

942

self._checkambig = checkambig

943

self._censorable = censorable

943

self._censorable = censorable

944

# 3-tuple of (node, rev, text) for a raw revision.

944

# 3-tuple of (node, rev, text) for a raw revision.

945

self._cache = None

945

self._cache = None

946

# Maps rev to chain base rev.

946

# Maps rev to chain base rev.

947

self._chainbasecache = util.lrucachedict(100)

947

self._chainbasecache = util.lrucachedict(100)

948

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

948

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

949

self._chunkcache = (0, '')

949

self._chunkcache = (0, '')

950

# How much data to read and cache into the raw revlog data cache.

950

# How much data to read and cache into the raw revlog data cache.

951

self._chunkcachesize = 65536

951

self._chunkcachesize = 65536

952

self._maxchainlen = None

952

self._maxchainlen = None

953

self._deltabothparents = True

953

self._deltabothparents = True

954

self.index = []

954

self.index = []

955

# Mapping of partial identifiers to full nodes.

955

# Mapping of partial identifiers to full nodes.

956

self._pcache = {}

956

self._pcache = {}

957

# Mapping of revision integer to full node.

957

# Mapping of revision integer to full node.

958

self._nodecache = {nullid: nullrev}

958

self._nodecache = {nullid: nullrev}

959

self._nodepos = None

959

self._nodepos = None

960

self._compengine = 'zlib'

960

self._compengine = 'zlib'

961

self._maxdeltachainspan = -1

961

self._maxdeltachainspan = -1

962

self._withsparseread = False

962

self._withsparseread = False

963

self._sparserevlog = False

963

self._sparserevlog = False

964

self._srdensitythreshold = 0.50

964

self._srdensitythreshold = 0.50

965

self._srmingapsize = 262144

965

self._srmingapsize = 262144

966

967

mmapindexthreshold = None

967

mmapindexthreshold = None

968

v = REVLOG_DEFAULT_VERSION

968

v = REVLOG_DEFAULT_VERSION

969

opts = getattr(opener, 'options', None)

969

opts = getattr(opener, 'options', None)

970

if opts is not None:

970

if opts is not None:

971

if 'revlogv2' in opts:

971

if 'revlogv2' in opts:

972

# version 2 revlogs always use generaldelta.

972

# version 2 revlogs always use generaldelta.

973

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

973

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

974

elif 'revlogv1' in opts:

974

elif 'revlogv1' in opts:

975

if 'generaldelta' in opts:

975

if 'generaldelta' in opts:

976

v |= FLAG_GENERALDELTA

976

v |= FLAG_GENERALDELTA

977

else:

977

else:

978

v = 0

978

v = 0

979

if 'chunkcachesize' in opts:

979

if 'chunkcachesize' in opts:

980

self._chunkcachesize = opts['chunkcachesize']

980

self._chunkcachesize = opts['chunkcachesize']

981

if 'maxchainlen' in opts:

981

if 'maxchainlen' in opts:

982

self._maxchainlen = opts['maxchainlen']

982

self._maxchainlen = opts['maxchainlen']

983

if 'deltabothparents' in opts:

983

if 'deltabothparents' in opts:

984

self._deltabothparents = opts['deltabothparents']

984

self._deltabothparents = opts['deltabothparents']

985

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

985

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

986

if 'compengine' in opts:

986

if 'compengine' in opts:

987

self._compengine = opts['compengine']

987

self._compengine = opts['compengine']

988

if 'maxdeltachainspan' in opts:

988

if 'maxdeltachainspan' in opts:

989

self._maxdeltachainspan = opts['maxdeltachainspan']

989

self._maxdeltachainspan = opts['maxdeltachainspan']

990

if mmaplargeindex and 'mmapindexthreshold' in opts:

990

if mmaplargeindex and 'mmapindexthreshold' in opts:

991

mmapindexthreshold = opts['mmapindexthreshold']

991

mmapindexthreshold = opts['mmapindexthreshold']

992

self._sparserevlog = bool(opts.get('sparse-revlog', False))

992

self._sparserevlog = bool(opts.get('sparse-revlog', False))

993

withsparseread = bool(opts.get('with-sparse-read', False))

993

withsparseread = bool(opts.get('with-sparse-read', False))

994

# sparse-revlog forces sparse-read

994

# sparse-revlog forces sparse-read

995

self._withsparseread = self._sparserevlog or withsparseread

995

self._withsparseread = self._sparserevlog or withsparseread

996

if 'sparse-read-density-threshold' in opts:

996

if 'sparse-read-density-threshold' in opts:

997

self._srdensitythreshold = opts['sparse-read-density-threshold']

997

self._srdensitythreshold = opts['sparse-read-density-threshold']

998

if 'sparse-read-min-gap-size' in opts:

998

if 'sparse-read-min-gap-size' in opts:

999

self._srmingapsize = opts['sparse-read-min-gap-size']

999

self._srmingapsize = opts['sparse-read-min-gap-size']

1000

1001

if self._chunkcachesize <= 0:

1001

if self._chunkcachesize <= 0:

1002

raise RevlogError(_('revlog chunk cache size %r is not greater '

1002

raise RevlogError(_('revlog chunk cache size %r is not greater '

1003

'than 0') % self._chunkcachesize)

1003

'than 0') % self._chunkcachesize)

1004

elif self._chunkcachesize & (self._chunkcachesize - 1):

1004

elif self._chunkcachesize & (self._chunkcachesize - 1):

1005

raise RevlogError(_('revlog chunk cache size %r is not a power '

1005

raise RevlogError(_('revlog chunk cache size %r is not a power '

1006

'of 2') % self._chunkcachesize)

1006

'of 2') % self._chunkcachesize)

1007

1008

indexdata = ''

1008

indexdata = ''

1009

self._initempty = True

1009

self._initempty = True

1010

try:

1010

try:

1011

with self._indexfp() as f:

1011

with self._indexfp() as f:

1012

if (mmapindexthreshold is not None and

1012

if (mmapindexthreshold is not None and

1013

self.opener.fstat(f).st_size >= mmapindexthreshold):

1013

self.opener.fstat(f).st_size >= mmapindexthreshold):

1014

indexdata = util.buffer(util.mmapread(f))

1014

indexdata = util.buffer(util.mmapread(f))

1015

else:

1015

else:

1016

indexdata = f.read()

1016

indexdata = f.read()

1017

if len(indexdata) > 0:

1017

if len(indexdata) > 0:

1018

v = versionformat_unpack(indexdata[:4])[0]

1018

v = versionformat_unpack(indexdata[:4])[0]

1019

self._initempty = False

1019

self._initempty = False

1020

except IOError as inst:

1020

except IOError as inst:

1021

if inst.errno != errno.ENOENT:

1021

if inst.errno != errno.ENOENT:

1022

raise

1022

raise

1023

1024

self.version = v

1024

self.version = v

1025

self._inline = v & FLAG_INLINE_DATA

1025

self._inline = v & FLAG_INLINE_DATA

1026

self._generaldelta = v & FLAG_GENERALDELTA

1026

self._generaldelta = v & FLAG_GENERALDELTA

1027

flags = v & ~0xFFFF

1027

flags = v & ~0xFFFF

1028

fmt = v & 0xFFFF

1028

fmt = v & 0xFFFF

1029

if fmt == REVLOGV0:

1029

if fmt == REVLOGV0:

1030

if flags:

1030

if flags:

1031

raise RevlogError(_('unknown flags (%#04x) in version %d '

1031

raise RevlogError(_('unknown flags (%#04x) in version %d '

1032

'revlog %s') %

1032

'revlog %s') %

1033

(flags >> 16, fmt, self.indexfile))

1033

(flags >> 16, fmt, self.indexfile))

1034

elif fmt == REVLOGV1:

1034

elif fmt == REVLOGV1:

1035

if flags & ~REVLOGV1_FLAGS:

1035

if flags & ~REVLOGV1_FLAGS:

1036

raise RevlogError(_('unknown flags (%#04x) in version %d '

1036

raise RevlogError(_('unknown flags (%#04x) in version %d '

1037

'revlog %s') %

1037

'revlog %s') %

1038

(flags >> 16, fmt, self.indexfile))

1038

(flags >> 16, fmt, self.indexfile))

1039

elif fmt == REVLOGV2:

1039

elif fmt == REVLOGV2:

1040

if flags & ~REVLOGV2_FLAGS:

1040

if flags & ~REVLOGV2_FLAGS:

1041

raise RevlogError(_('unknown flags (%#04x) in version %d '

1041

raise RevlogError(_('unknown flags (%#04x) in version %d '

1042

'revlog %s') %

1042

'revlog %s') %

1043

(flags >> 16, fmt, self.indexfile))

1043

(flags >> 16, fmt, self.indexfile))

1044

else:

1044

else:

1045

raise RevlogError(_('unknown version (%d) in revlog %s') %

1045

raise RevlogError(_('unknown version (%d) in revlog %s') %

1046

(fmt, self.indexfile))

1046

(fmt, self.indexfile))

1047

1048

self.storedeltachains = True

1048

self.storedeltachains = True

1049

1050

self._io = revlogio()

1050

self._io = revlogio()

1051

if self.version == REVLOGV0:

1051

if self.version == REVLOGV0:

1052

self._io = revlogoldio()

1052

self._io = revlogoldio()

1053

try:

1053

try:

1054

d = self._io.parseindex(indexdata, self._inline)

1054

d = self._io.parseindex(indexdata, self._inline)

1055

except (ValueError, IndexError):

1055

except (ValueError, IndexError):

1056

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1056

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1057

self.index, nodemap, self._chunkcache = d

1057

self.index, nodemap, self._chunkcache = d

1058

if nodemap is not None:

1058

if nodemap is not None:

1059

self.nodemap = self._nodecache = nodemap

1059

self.nodemap = self._nodecache = nodemap

1060

if not self._chunkcache:

1060

if not self._chunkcache:

1061

self._chunkclear()

1061

self._chunkclear()

1062

# revnum -> (chain-length, sum-delta-length)

1062

# revnum -> (chain-length, sum-delta-length)

1063

self._chaininfocache = {}

1063

self._chaininfocache = {}

1064

# revlog header -> revlog compressor

1064

# revlog header -> revlog compressor

1065

self._decompressors = {}

1065

self._decompressors = {}

1066

1067

@util.propertycache

1067

@util.propertycache

1068

def _compressor(self):

1068

def _compressor(self):

1069

return util.compengines[self._compengine].revlogcompressor()

1069

return util.compengines[self._compengine].revlogcompressor()

1070

1071

def _indexfp(self, mode='r'):

1071

def _indexfp(self, mode='r'):

1072

"""file object for the revlog's index file"""

1072

"""file object for the revlog's index file"""

1073

args = {r'mode': mode}

1073

args = {r'mode': mode}

1074

if mode != 'r':

1074

if mode != 'r':

1075

args[r'checkambig'] = self._checkambig

1075

args[r'checkambig'] = self._checkambig

1076

if mode == 'w':

1076

if mode == 'w':

1077

args[r'atomictemp'] = True

1077

args[r'atomictemp'] = True

1078

return self.opener(self.indexfile, **args)

1078

return self.opener(self.indexfile, **args)

1079

1080

def _datafp(self, mode='r'):

1080

def _datafp(self, mode='r'):

1081

"""file object for the revlog's data file"""

1081

"""file object for the revlog's data file"""

1082

return self.opener(self.datafile, mode=mode)

1082

return self.opener(self.datafile, mode=mode)

1083

1084

@contextlib.contextmanager

1084

@contextlib.contextmanager

1085

def _datareadfp(self, existingfp=None):

1085

def _datareadfp(self, existingfp=None):

1086

"""file object suitable to read data"""

1086

"""file object suitable to read data"""

1087

if existingfp is not None:

1087

if existingfp is not None:

1088

yield existingfp

1088

yield existingfp

1089

else:

1089

else:

1090

if self._inline:

1090

if self._inline:

1091

func = self._indexfp

1091

func = self._indexfp

1092

else:

1092

else:

1093

func = self._datafp

1093

func = self._datafp

1094

with func() as fp:

1094

with func() as fp:

1095

yield fp

1095

yield fp

1096

1097

def tip(self):

1097

def tip(self):

1098

return self.node(len(self.index) - 1)

1098

return self.node(len(self.index) - 1)

1099

def __contains__(self, rev):

1099

def __contains__(self, rev):

1100

return 0 <= rev < len(self)

1100

return 0 <= rev < len(self)

1101

def __len__(self):

1101

def __len__(self):

1102

return len(self.index)

1102

return len(self.index)

1103

def __iter__(self):

1103

def __iter__(self):

1104

return iter(pycompat.xrange(len(self)))

1104

return iter(pycompat.xrange(len(self)))

1105

def revs(self, start=0, stop=None):

1105

def revs(self, start=0, stop=None):

1106

"""iterate over all rev in this revlog (from start to stop)"""

1106

"""iterate over all rev in this revlog (from start to stop)"""

1107

step = 1

1107

step = 1

1108

length = len(self)

1108

length = len(self)

1109

if stop is not None:

1109

if stop is not None:

1110

if start > stop:

1110

if start > stop:

1111

step = -1

1111

step = -1

1112

stop += step

1112

stop += step

1113

if stop > length:

1113

if stop > length:

1114

stop = length

1114

stop = length

1115

else:

1115

else:

1116

stop = length

1116

stop = length

1117

return pycompat.xrange(start, stop, step)

1117

return pycompat.xrange(start, stop, step)

1118

1119

@util.propertycache

1119

@util.propertycache

1120

def nodemap(self):

1120

def nodemap(self):

1121

if self.index:

1121

if self.index:

1122

# populate mapping down to the initial node

1122

# populate mapping down to the initial node

1123

node0 = self.index[0][7] # get around changelog filtering

1123

node0 = self.index[0][7] # get around changelog filtering

1124

self.rev(node0)

1124

self.rev(node0)

1125

return self._nodecache

1125

return self._nodecache

1126

1127

def hasnode(self, node):

1127

def hasnode(self, node):

1128

try:

1128

try:

1129

self.rev(node)

1129

self.rev(node)

1130

return True

1130

return True

1131

except KeyError:

1131

except KeyError:

1132

return False

1132

return False

1133

1134

def candelta(self, baserev, rev):

1134

def candelta(self, baserev, rev):

1135

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1135

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1136

# Disable delta if either rev requires a content-changing flag

1136

# Disable delta if either rev requires a content-changing flag

1137

# processor (ex. LFS). This is because such flag processor can alter

1137

# processor (ex. LFS). This is because such flag processor can alter

1138

# the rawtext content that the delta will be based on, and two clients

1138

# the rawtext content that the delta will be based on, and two clients

1139

# could have a same revlog node with different flags (i.e. different

1139

# could have a same revlog node with different flags (i.e. different

1140

# rawtext contents) and the delta could be incompatible.

1140

# rawtext contents) and the delta could be incompatible.

1141

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1141

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1142

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1142

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1143

return False

1143

return False

1144

return True

1144

return True

1145

1146

def clearcaches(self):

1146

def clearcaches(self):

1147

self._cache = None

1147

self._cache = None

1148

self._chainbasecache.clear()

1148

self._chainbasecache.clear()

1149

self._chunkcache = (0, '')

1149

self._chunkcache = (0, '')

1150

self._pcache = {}

1150

self._pcache = {}

1151

1152

try:

1152

try:

1153

self._nodecache.clearcaches()

1153

self._nodecache.clearcaches()

1154

except AttributeError:

1154

except AttributeError:

1155

self._nodecache = {nullid: nullrev}

1155

self._nodecache = {nullid: nullrev}

1156

self._nodepos = None

1156

self._nodepos = None

1157

1158

def rev(self, node):

1158

def rev(self, node):

1159

try:

1159

try:

1160

return self._nodecache[node]

1160

return self._nodecache[node]

1161

except TypeError:

1161

except TypeError:

1162

raise

1162

raise

1163

except RevlogError:

1163

except RevlogError:

1164

# parsers.c radix tree lookup failed

1164

# parsers.c radix tree lookup failed

1165

if node == wdirid or node in wdirfilenodeids:

1165

if node == wdirid or node in wdirfilenodeids:

1166

raise error.WdirUnsupported

1166

raise error.WdirUnsupported

1167

raise LookupError(node, self.indexfile, _('no node'))

1167

raise LookupError(node, self.indexfile, _('no node'))

1168

except KeyError:

1168

except KeyError:

1169

# pure python cache lookup failed

1169

# pure python cache lookup failed

1170

n = self._nodecache

1170

n = self._nodecache

1171

i = self.index

1171

i = self.index

1172

p = self._nodepos

1172

p = self._nodepos

1173

if p is None:

1173

if p is None:

1174

p = len(i) - 1

1174

p = len(i) - 1

1175

else:

1175

else:

1176

assert p < len(i)

1176

assert p < len(i)

1177

for r in pycompat.xrange(p, -1, -1):

1177

for r in pycompat.xrange(p, -1, -1):

1178

v = i[r][7]

1178

v = i[r][7]

1179

n[v] = r

1179

n[v] = r

1180

if v == node:

1180

if v == node:

1181

self._nodepos = r - 1

1181

self._nodepos = r - 1

1182

return r

1182

return r

1183

if node == wdirid or node in wdirfilenodeids:

1183

if node == wdirid or node in wdirfilenodeids:

1184

raise error.WdirUnsupported

1184

raise error.WdirUnsupported

1185

raise LookupError(node, self.indexfile, _('no node'))

1185

raise LookupError(node, self.indexfile, _('no node'))

1186

1187

# Accessors for index entries.

1187

# Accessors for index entries.

1188

1189

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1189

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1190

# are flags.

1190

# are flags.

1191

def start(self, rev):

1191

def start(self, rev):

1192

return int(self.index[rev][0] >> 16)

1192

return int(self.index[rev][0] >> 16)

1193

1194

def flags(self, rev):

1194

def flags(self, rev):

1195

return self.index[rev][0] & 0xFFFF

1195

return self.index[rev][0] & 0xFFFF

1196

1197

def length(self, rev):

1197

def length(self, rev):

1198

return self.index[rev][1]

1198

return self.index[rev][1]

1199

1200

def rawsize(self, rev):

1200

def rawsize(self, rev):

1201

"""return the length of the uncompressed text for a given revision"""

1201

"""return the length of the uncompressed text for a given revision"""

1202

l = self.index[rev][2]

1202

l = self.index[rev][2]

1203

if l >= 0:

1203

if l >= 0:

1204

return l

1204

return l

1205

1206

t = self.revision(rev, raw=True)

1206

t = self.revision(rev, raw=True)

1207

return len(t)

1207

return len(t)

1208

1209

def size(self, rev):

1209

def size(self, rev):

1210

"""length of non-raw text (processed by a "read" flag processor)"""

1210

"""length of non-raw text (processed by a "read" flag processor)"""

1211

# fast path: if no "read" flag processor could change the content,

1211

# fast path: if no "read" flag processor could change the content,

1212

# size is rawsize. note: ELLIPSIS is known to not change the content.

1212

# size is rawsize. note: ELLIPSIS is known to not change the content.

1213

flags = self.flags(rev)

1213

flags = self.flags(rev)

1214

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1214

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1215

return self.rawsize(rev)

1215

return self.rawsize(rev)

1216

1217

return len(self.revision(rev, raw=False))

1217

return len(self.revision(rev, raw=False))

1218

1219

def chainbase(self, rev):

1219

def chainbase(self, rev):

1220

base = self._chainbasecache.get(rev)

1220

base = self._chainbasecache.get(rev)

1221

if base is not None:

1221

if base is not None:

1222

return base

1222

return base

1223

1224

index = self.index

1224

index = self.index

1225

iterrev = rev

1225

iterrev = rev

1226

base = index[iterrev][3]

1226

base = index[iterrev][3]

1227

while base != iterrev:

1227

while base != iterrev:

1228

iterrev = base

1228

iterrev = base

1229

base = index[iterrev][3]

1229

base = index[iterrev][3]

1230

1231

self._chainbasecache[rev] = base

1231

self._chainbasecache[rev] = base

1232

return base

1232

return base

1233

1234

def linkrev(self, rev):

1234

def linkrev(self, rev):

1235

return self.index[rev][4]

1235

return self.index[rev][4]

1236

1237

def parentrevs(self, rev):

1237

def parentrevs(self, rev):

1238

try:

1238

try:

1239

entry = self.index[rev]

1239

entry = self.index[rev]

1240

except IndexError:

1240

except IndexError:

1241

if rev == wdirrev:

1241

if rev == wdirrev:

1242

raise error.WdirUnsupported

1242

raise error.WdirUnsupported

1243

raise

1243

raise

1244

1245

return entry[5], entry[6]

1245

return entry[5], entry[6]

1246

1247

def node(self, rev):

1247

def node(self, rev):

1248

try:

1248

try:

1249

return self.index[rev][7]

1249

return self.index[rev][7]

1250

except IndexError:

1250

except IndexError:

1251

if rev == wdirrev:

1251

if rev == wdirrev:

1252

raise error.WdirUnsupported

1252

raise error.WdirUnsupported

1253

raise

1253

raise

1254

1255

# Derived from index values.

1255

# Derived from index values.

1256

1257

def end(self, rev):

1257

def end(self, rev):

1258

return self.start(rev) + self.length(rev)

1258

return self.start(rev) + self.length(rev)

1259

1260

def parents(self, node):

1260

def parents(self, node):

1261

i = self.index

1261

i = self.index

1262

d = i[self.rev(node)]

1262

d = i[self.rev(node)]

1263

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1263

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1264

1265

def chainlen(self, rev):

1265

def chainlen(self, rev):

1266

return self._chaininfo(rev)[0]

1266

return self._chaininfo(rev)[0]

1267

1268

def _chaininfo(self, rev):

1268

def _chaininfo(self, rev):

1269

chaininfocache = self._chaininfocache

1269

chaininfocache = self._chaininfocache

1270

if rev in chaininfocache:

1270

if rev in chaininfocache:

1271

return chaininfocache[rev]

1271

return chaininfocache[rev]

1272

index = self.index

1272

index = self.index

1273

generaldelta = self._generaldelta

1273

generaldelta = self._generaldelta

1274

iterrev = rev

1274

iterrev = rev

1275

e = index[iterrev]

1275

e = index[iterrev]

1276

clen = 0

1276

clen = 0

1277

compresseddeltalen = 0

1277

compresseddeltalen = 0

1278

while iterrev != e[3]:

1278

while iterrev != e[3]:

1279

clen += 1

1279

clen += 1

1280

compresseddeltalen += e[1]

1280

compresseddeltalen += e[1]

1281

if generaldelta:

1281

if generaldelta:

1282

iterrev = e[3]

1282

iterrev = e[3]

1283

else:

1283

else:

1284

iterrev -= 1

1284

iterrev -= 1

1285

if iterrev in chaininfocache:

1285

if iterrev in chaininfocache:

1286

t = chaininfocache[iterrev]

1286

t = chaininfocache[iterrev]

1287

clen += t[0]

1287

clen += t[0]

1288

compresseddeltalen += t[1]

1288

compresseddeltalen += t[1]

1289

break

1289

break

1290

e = index[iterrev]

1290

e = index[iterrev]

1291

else:

1291

else:

1292

# Add text length of base since decompressing that also takes

1292

# Add text length of base since decompressing that also takes

1293

# work. For cache hits the length is already included.

1293

# work. For cache hits the length is already included.

1294

compresseddeltalen += e[1]

1294

compresseddeltalen += e[1]

1295

r = (clen, compresseddeltalen)

1295

r = (clen, compresseddeltalen)

1296

chaininfocache[rev] = r

1296

chaininfocache[rev] = r

1297

return r

1297

return r

1298

1299

def _deltachain(self, rev, stoprev=None):

1299

def _deltachain(self, rev, stoprev=None):

1300

"""Obtain the delta chain for a revision.

1300

"""Obtain the delta chain for a revision.

1301

1302

``stoprev`` specifies a revision to stop at. If not specified, we

1302

``stoprev`` specifies a revision to stop at. If not specified, we

1303

stop at the base of the chain.

1303

stop at the base of the chain.

1304

1305

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1305

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1306

revs in ascending order and ``stopped`` is a bool indicating whether

1306

revs in ascending order and ``stopped`` is a bool indicating whether

1307

``stoprev`` was hit.

1307

``stoprev`` was hit.

1308

"""

1308

"""

1309

# Try C implementation.

1309

# Try C implementation.

1310

try:

1310

try:

1311

return self.index.deltachain(rev, stoprev, self._generaldelta)

1311

return self.index.deltachain(rev, stoprev, self._generaldelta)

1312

except AttributeError:

1312

except AttributeError:

1313

pass

1313

pass

1314

1315

chain = []

1315

chain = []

1316

1317

# Alias to prevent attribute lookup in tight loop.

1317

# Alias to prevent attribute lookup in tight loop.

1318

index = self.index

1318

index = self.index

1319

generaldelta = self._generaldelta

1319

generaldelta = self._generaldelta

1320

1321

iterrev = rev

1321

iterrev = rev

1322

e = index[iterrev]

1322

e = index[iterrev]

1323

while iterrev != e[3] and iterrev != stoprev:

1323

while iterrev != e[3] and iterrev != stoprev:

1324

chain.append(iterrev)

1324

chain.append(iterrev)

1325

if generaldelta:

1325

if generaldelta:

1326

iterrev = e[3]

1326

iterrev = e[3]

1327

else:

1327

else:

1328

iterrev -= 1

1328

iterrev -= 1

1329

e = index[iterrev]

1329

e = index[iterrev]

1330

1331

if iterrev == stoprev:

1331

if iterrev == stoprev:

1332

stopped = True

1332

stopped = True

1333

else:

1333

else:

1334

chain.append(iterrev)

1334

chain.append(iterrev)

1335

stopped = False

1335

stopped = False

1336

1337

chain.reverse()

1337

chain.reverse()

1338

return chain, stopped

1338

return chain, stopped

1339

1340

def ancestors(self, revs, stoprev=0, inclusive=False):

1340

def ancestors(self, revs, stoprev=0, inclusive=False):

1341

"""Generate the ancestors of 'revs' in reverse topological order.

1341

"""Generate the ancestors of 'revs' in reverse topological order.

1342

Does not generate revs lower than stoprev.

1342

Does not generate revs lower than stoprev.

1343

1344

See the documentation for ancestor.lazyancestors for more details."""

1344

See the documentation for ancestor.lazyancestors for more details."""

1345

1346

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1346

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1347

inclusive=inclusive)

1347

inclusive=inclusive)

1348

1349

def descendants(self, revs):

1349

def descendants(self, revs):

1350

"""Generate the descendants of 'revs' in revision order.

1350

"""Generate the descendants of 'revs' in revision order.

1351

1352

Yield a sequence of revision numbers starting with a child of

1352

Yield a sequence of revision numbers starting with a child of

1353

some rev in revs, i.e., each revision is *not* considered a

1353

some rev in revs, i.e., each revision is *not* considered a

1354

descendant of itself. Results are ordered by revision number (a

1354

descendant of itself. Results are ordered by revision number (a

1355

topological sort)."""

1355

topological sort)."""

1356

first = min(revs)

1356

first = min(revs)

1357

if first == nullrev:

1357

if first == nullrev:

1358

for i in self:

1358

for i in self:

1359

yield i

1359

yield i

1360

return

1360

return

1361

1362

seen = set(revs)

1362

seen = set(revs)

1363

for i in self.revs(start=first + 1):

1363

for i in self.revs(start=first + 1):

1364

for x in self.parentrevs(i):

1364

for x in self.parentrevs(i):

1365

if x != nullrev and x in seen:

1365

if x != nullrev and x in seen:

1366

seen.add(i)

1366

seen.add(i)

1367

yield i

1367

yield i

1368

break

1368

break

1369

1370

def findcommonmissing(self, common=None, heads=None):

1370

def findcommonmissing(self, common=None, heads=None):

1371

"""Return a tuple of the ancestors of common and the ancestors of heads

1371

"""Return a tuple of the ancestors of common and the ancestors of heads

1372

that are not ancestors of common. In revset terminology, we return the

1372

that are not ancestors of common. In revset terminology, we return the

1373

tuple:

1373

tuple:

1374

1375

::common, (::heads) - (::common)

1375

::common, (::heads) - (::common)

1376

1377

The list is sorted by revision number, meaning it is

1377

The list is sorted by revision number, meaning it is

1378

topologically sorted.

1378

topologically sorted.

1379

1380

'heads' and 'common' are both lists of node IDs. If heads is

1380

'heads' and 'common' are both lists of node IDs. If heads is

1381

not supplied, uses all of the revlog's heads. If common is not

1381

not supplied, uses all of the revlog's heads. If common is not

1382

supplied, uses nullid."""

1382

supplied, uses nullid."""

1383

if common is None:

1383

if common is None:

1384

common = [nullid]

1384

common = [nullid]

1385

if heads is None:

1385

if heads is None:

1386

heads = self.heads()

1386

heads = self.heads()

1387

1388

common = [self.rev(n) for n in common]

1388

common = [self.rev(n) for n in common]

1389

heads = [self.rev(n) for n in heads]

1389

heads = [self.rev(n) for n in heads]

1390

1391

# we want the ancestors, but inclusive

1391

# we want the ancestors, but inclusive

1392

class lazyset(object):

1392

class lazyset(object):

1393

def __init__(self, lazyvalues):

1393

def __init__(self, lazyvalues):

1394

self.addedvalues = set()

1394

self.addedvalues = set()

1395

self.lazyvalues = lazyvalues

1395

self.lazyvalues = lazyvalues

1396

1397

def __contains__(self, value):

1397

def __contains__(self, value):

1398

return value in self.addedvalues or value in self.lazyvalues

1398

return value in self.addedvalues or value in self.lazyvalues

1399

1400

def __iter__(self):

1400

def __iter__(self):

1401

added = self.addedvalues

1401

added = self.addedvalues

1402

for r in added:

1402

for r in added:

1403

yield r

1403

yield r

1404

for r in self.lazyvalues:

1404

for r in self.lazyvalues:

1405

if not r in added:

1405

if not r in added:

1406

yield r

1406

yield r

1407

1408

def add(self, value):

1408

def add(self, value):

1409

self.addedvalues.add(value)

1409

self.addedvalues.add(value)

1410

1411

def update(self, values):

1411

def update(self, values):

1412

self.addedvalues.update(values)

1412

self.addedvalues.update(values)

1413

1414

has = lazyset(self.ancestors(common))

1414

has = lazyset(self.ancestors(common))

1415

has.add(nullrev)

1415

has.add(nullrev)

1416

has.update(common)

1416

has.update(common)

1417

1418

# take all ancestors from heads that aren't in has

1418

# take all ancestors from heads that aren't in has

1419

missing = set()

1419

missing = set()

1420

visit = collections.deque(r for r in heads if r not in has)

1420

visit = collections.deque(r for r in heads if r not in has)

1421

while visit:

1421

while visit:

1422

r = visit.popleft()

1422

r = visit.popleft()

1423

if r in missing:

1423

if r in missing:

1424

continue

1424

continue

1425

else:

1425

else:

1426

missing.add(r)

1426

missing.add(r)

1427

for p in self.parentrevs(r):

1427

for p in self.parentrevs(r):

1428

if p not in has:

1428

if p not in has:

1429

visit.append(p)

1429

visit.append(p)

1430

missing = list(missing)

1430

missing = list(missing)

1431

missing.sort()

1431

missing.sort()

1432

return has, [self.node(miss) for miss in missing]

1432

return has, [self.node(miss) for miss in missing]

1433

1434

def incrementalmissingrevs(self, common=None):

1434

def incrementalmissingrevs(self, common=None):

1435

"""Return an object that can be used to incrementally compute the

1435

"""Return an object that can be used to incrementally compute the

1436

revision numbers of the ancestors of arbitrary sets that are not

1436

revision numbers of the ancestors of arbitrary sets that are not

1437

ancestors of common. This is an ancestor.incrementalmissingancestors

1437

ancestors of common. This is an ancestor.incrementalmissingancestors

1438

object.

1438

object.

1439

1440

'common' is a list of revision numbers. If common is not supplied, uses

1440

'common' is a list of revision numbers. If common is not supplied, uses

1441

nullrev.

1441

nullrev.

1442

"""

1442

"""

1443

if common is None:

1443

if common is None:

1444

common = [nullrev]

1444

common = [nullrev]

1445

1446

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1446

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1447

1448

def findmissingrevs(self, common=None, heads=None):

1448

def findmissingrevs(self, common=None, heads=None):

1449

"""Return the revision numbers of the ancestors of heads that

1449

"""Return the revision numbers of the ancestors of heads that

1450

are not ancestors of common.

1450

are not ancestors of common.

1451

1452

More specifically, return a list of revision numbers corresponding to

1452

More specifically, return a list of revision numbers corresponding to

1453

nodes N such that every N satisfies the following constraints:

1453

nodes N such that every N satisfies the following constraints:

1454

1455

1. N is an ancestor of some node in 'heads'

1455

1. N is an ancestor of some node in 'heads'

1456

2. N is not an ancestor of any node in 'common'

1456

2. N is not an ancestor of any node in 'common'

1457

1458

The list is sorted by revision number, meaning it is

1458

The list is sorted by revision number, meaning it is

1459

topologically sorted.

1459

topologically sorted.

1460

1461

'heads' and 'common' are both lists of revision numbers. If heads is

1461

'heads' and 'common' are both lists of revision numbers. If heads is

1462

not supplied, uses all of the revlog's heads. If common is not

1462

not supplied, uses all of the revlog's heads. If common is not

1463

supplied, uses nullid."""

1463

supplied, uses nullid."""

1464

if common is None:

1464

if common is None:

1465

common = [nullrev]

1465

common = [nullrev]

1466

if heads is None:

1466

if heads is None:

1467

heads = self.headrevs()

1467

heads = self.headrevs()

1468

1469

inc = self.incrementalmissingrevs(common=common)

1469

inc = self.incrementalmissingrevs(common=common)

1470

return inc.missingancestors(heads)

1470

return inc.missingancestors(heads)

1471

1472

def findmissing(self, common=None, heads=None):

1472

def findmissing(self, common=None, heads=None):

1473

"""Return the ancestors of heads that are not ancestors of common.

1473

"""Return the ancestors of heads that are not ancestors of common.

1474

1475

More specifically, return a list of nodes N such that every N

1475

More specifically, return a list of nodes N such that every N

1476

satisfies the following constraints:

1476

satisfies the following constraints:

1477

1478

1. N is an ancestor of some node in 'heads'

1478

1. N is an ancestor of some node in 'heads'

1479

2. N is not an ancestor of any node in 'common'

1479

2. N is not an ancestor of any node in 'common'

1480

1481

The list is sorted by revision number, meaning it is

1481

The list is sorted by revision number, meaning it is

1482

topologically sorted.

1482

topologically sorted.

1483

1484

'heads' and 'common' are both lists of node IDs. If heads is

1484

'heads' and 'common' are both lists of node IDs. If heads is

1485

not supplied, uses all of the revlog's heads. If common is not

1485

not supplied, uses all of the revlog's heads. If common is not

1486

supplied, uses nullid."""

1486

supplied, uses nullid."""

1487

if common is None:

1487

if common is None:

1488

common = [nullid]

1488

common = [nullid]

1489

if heads is None:

1489

if heads is None:

1490

heads = self.heads()

1490

heads = self.heads()

1491

1492

common = [self.rev(n) for n in common]

1492

common = [self.rev(n) for n in common]

1493

heads = [self.rev(n) for n in heads]

1493

heads = [self.rev(n) for n in heads]

1494

1495

inc = self.incrementalmissingrevs(common=common)

1495

inc = self.incrementalmissingrevs(common=common)

1496

return [self.node(r) for r in inc.missingancestors(heads)]

1496

return [self.node(r) for r in inc.missingancestors(heads)]

1497

1498

def nodesbetween(self, roots=None, heads=None):

1498

def nodesbetween(self, roots=None, heads=None):

1499

"""Return a topological path from 'roots' to 'heads'.

1499

"""Return a topological path from 'roots' to 'heads'.

1500

1501

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1501

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1502

topologically sorted list of all nodes N that satisfy both of

1502

topologically sorted list of all nodes N that satisfy both of

1503

these constraints:

1503

these constraints:

1504

1505

1. N is a descendant of some node in 'roots'

1505

1. N is a descendant of some node in 'roots'

1506

2. N is an ancestor of some node in 'heads'

1506

2. N is an ancestor of some node in 'heads'

1507

1508

Every node is considered to be both a descendant and an ancestor

1508

Every node is considered to be both a descendant and an ancestor

1509

of itself, so every reachable node in 'roots' and 'heads' will be

1509

of itself, so every reachable node in 'roots' and 'heads' will be

1510

included in 'nodes'.

1510

included in 'nodes'.

1511

1512

'outroots' is the list of reachable nodes in 'roots', i.e., the

1512

'outroots' is the list of reachable nodes in 'roots', i.e., the

1513

subset of 'roots' that is returned in 'nodes'. Likewise,

1513

subset of 'roots' that is returned in 'nodes'. Likewise,

1514

'outheads' is the subset of 'heads' that is also in 'nodes'.

1514

'outheads' is the subset of 'heads' that is also in 'nodes'.

1515

1516

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1516

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1517

unspecified, uses nullid as the only root. If 'heads' is

1517

unspecified, uses nullid as the only root. If 'heads' is

1518

unspecified, uses list of all of the revlog's heads."""

1518

unspecified, uses list of all of the revlog's heads."""

1519

nonodes = ([], [], [])

1519

nonodes = ([], [], [])

1520

if roots is not None:

1520

if roots is not None:

1521

roots = list(roots)

1521

roots = list(roots)

1522

if not roots:

1522

if not roots:

1523

return nonodes

1523

return nonodes

1524

lowestrev = min([self.rev(n) for n in roots])

1524

lowestrev = min([self.rev(n) for n in roots])

1525

else:

1525

else:

1526

roots = [nullid] # Everybody's a descendant of nullid

1526

roots = [nullid] # Everybody's a descendant of nullid

1527

lowestrev = nullrev

1527

lowestrev = nullrev

1528

if (lowestrev == nullrev) and (heads is None):

1528

if (lowestrev == nullrev) and (heads is None):

1529

# We want _all_ the nodes!

1529

# We want _all_ the nodes!

1530

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1530

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1531

if heads is None:

1531

if heads is None:

1532

# All nodes are ancestors, so the latest ancestor is the last

1532

# All nodes are ancestors, so the latest ancestor is the last

1533

# node.

1533

# node.

1534

highestrev = len(self) - 1

1534

highestrev = len(self) - 1

1535

# Set ancestors to None to signal that every node is an ancestor.

1535

# Set ancestors to None to signal that every node is an ancestor.

1536

ancestors = None

1536

ancestors = None

1537

# Set heads to an empty dictionary for later discovery of heads

1537

# Set heads to an empty dictionary for later discovery of heads

1538

heads = {}

1538

heads = {}

1539

else:

1539

else:

1540

heads = list(heads)

1540

heads = list(heads)

1541

if not heads:

1541

if not heads:

1542

return nonodes

1542

return nonodes

1543

ancestors = set()

1543

ancestors = set()

1544

# Turn heads into a dictionary so we can remove 'fake' heads.

1544

# Turn heads into a dictionary so we can remove 'fake' heads.

1545

# Also, later we will be using it to filter out the heads we can't

1545

# Also, later we will be using it to filter out the heads we can't

1546

# find from roots.

1546

# find from roots.

1547

heads = dict.fromkeys(heads, False)

1547

heads = dict.fromkeys(heads, False)

1548

# Start at the top and keep marking parents until we're done.

1548

# Start at the top and keep marking parents until we're done.

1549

nodestotag = set(heads)

1549

nodestotag = set(heads)

1550

# Remember where the top was so we can use it as a limit later.

1550

# Remember where the top was so we can use it as a limit later.

1551

highestrev = max([self.rev(n) for n in nodestotag])

1551

highestrev = max([self.rev(n) for n in nodestotag])

1552

while nodestotag:

1552

while nodestotag:

1553

# grab a node to tag

1553

# grab a node to tag

1554

n = nodestotag.pop()

1554

n = nodestotag.pop()

1555

# Never tag nullid

1555

# Never tag nullid

1556

if n == nullid:

1556

if n == nullid:

1557

continue

1557

continue

1558

# A node's revision number represents its place in a

1558

# A node's revision number represents its place in a

1559

# topologically sorted list of nodes.

1559

# topologically sorted list of nodes.

1560

r = self.rev(n)

1560

r = self.rev(n)

1561

if r >= lowestrev:

1561

if r >= lowestrev:

1562

if n not in ancestors:

1562

if n not in ancestors:

1563

# If we are possibly a descendant of one of the roots

1563

# If we are possibly a descendant of one of the roots

1564

# and we haven't already been marked as an ancestor

1564

# and we haven't already been marked as an ancestor

1565

ancestors.add(n) # Mark as ancestor

1565

ancestors.add(n) # Mark as ancestor

1566

# Add non-nullid parents to list of nodes to tag.

1566

# Add non-nullid parents to list of nodes to tag.

1567

nodestotag.update([p for p in self.parents(n) if

1567

nodestotag.update([p for p in self.parents(n) if

1568

p != nullid])

1568

p != nullid])

1569

elif n in heads: # We've seen it before, is it a fake head?

1569

elif n in heads: # We've seen it before, is it a fake head?

1570

# So it is, real heads should not be the ancestors of

1570

# So it is, real heads should not be the ancestors of

1571

# any other heads.

1571

# any other heads.

1572

heads.pop(n)

1572

heads.pop(n)

1573

if not ancestors:

1573

if not ancestors:

1574

return nonodes

1574

return nonodes

1575

# Now that we have our set of ancestors, we want to remove any

1575

# Now that we have our set of ancestors, we want to remove any

1576

# roots that are not ancestors.

1576

# roots that are not ancestors.

1577

1578

# If one of the roots was nullid, everything is included anyway.

1578

# If one of the roots was nullid, everything is included anyway.

1579

if lowestrev > nullrev:

1579

if lowestrev > nullrev:

1580

# But, since we weren't, let's recompute the lowest rev to not

1580

# But, since we weren't, let's recompute the lowest rev to not

1581

# include roots that aren't ancestors.

1581

# include roots that aren't ancestors.

1582

1583

# Filter out roots that aren't ancestors of heads

1583

# Filter out roots that aren't ancestors of heads

1584

roots = [root for root in roots if root in ancestors]

1584

roots = [root for root in roots if root in ancestors]

1585

# Recompute the lowest revision

1585

# Recompute the lowest revision

1586

if roots:

1586

if roots:

1587

lowestrev = min([self.rev(root) for root in roots])

1587

lowestrev = min([self.rev(root) for root in roots])

1588

else:

1588

else:

1589

# No more roots? Return empty list

1589

# No more roots? Return empty list

1590

return nonodes

1590

return nonodes

1591

else:

1591

else:

1592

# We are descending from nullid, and don't need to care about

1592

# We are descending from nullid, and don't need to care about

1593

# any other roots.

1593

# any other roots.

1594

lowestrev = nullrev

1594

lowestrev = nullrev

1595

roots = [nullid]

1595

roots = [nullid]

1596

# Transform our roots list into a set.

1596

# Transform our roots list into a set.

1597

descendants = set(roots)

1597

descendants = set(roots)

1598

# Also, keep the original roots so we can filter out roots that aren't

1598

# Also, keep the original roots so we can filter out roots that aren't

1599

# 'real' roots (i.e. are descended from other roots).

1599

# 'real' roots (i.e. are descended from other roots).

1600

roots = descendants.copy()

1600

roots = descendants.copy()

1601

# Our topologically sorted list of output nodes.

1601

# Our topologically sorted list of output nodes.

1602

orderedout = []

1602

orderedout = []

1603

# Don't start at nullid since we don't want nullid in our output list,

1603

# Don't start at nullid since we don't want nullid in our output list,

1604

# and if nullid shows up in descendants, empty parents will look like

1604

# and if nullid shows up in descendants, empty parents will look like

1605

# they're descendants.

1605

# they're descendants.

1606

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1606

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1607

n = self.node(r)

1607

n = self.node(r)

1608

isdescendant = False

1608

isdescendant = False

1609

if lowestrev == nullrev: # Everybody is a descendant of nullid

1609

if lowestrev == nullrev: # Everybody is a descendant of nullid

1610

isdescendant = True

1610

isdescendant = True

1611

elif n in descendants:

1611

elif n in descendants:

1612

# n is already a descendant

1612

# n is already a descendant

1613

isdescendant = True

1613

isdescendant = True

1614

# This check only needs to be done here because all the roots

1614

# This check only needs to be done here because all the roots

1615

# will start being marked is descendants before the loop.

1615

# will start being marked is descendants before the loop.

1616

if n in roots:

1616

if n in roots:

1617

# If n was a root, check if it's a 'real' root.

1617

# If n was a root, check if it's a 'real' root.

1618

p = tuple(self.parents(n))

1618

p = tuple(self.parents(n))

1619

# If any of its parents are descendants, it's not a root.

1619

# If any of its parents are descendants, it's not a root.

1620

if (p[0] in descendants) or (p[1] in descendants):

1620

if (p[0] in descendants) or (p[1] in descendants):

1621

roots.remove(n)

1621

roots.remove(n)

1622

else:

1622

else:

1623

p = tuple(self.parents(n))

1623

p = tuple(self.parents(n))

1624

# A node is a descendant if either of its parents are

1624

# A node is a descendant if either of its parents are

1625

# descendants. (We seeded the dependents list with the roots

1625

# descendants. (We seeded the dependents list with the roots

1626

# up there, remember?)

1626

# up there, remember?)

1627

if (p[0] in descendants) or (p[1] in descendants):

1627

if (p[0] in descendants) or (p[1] in descendants):

1628

descendants.add(n)

1628

descendants.add(n)

1629

isdescendant = True

1629

isdescendant = True

1630

if isdescendant and ((ancestors is None) or (n in ancestors)):

1630

if isdescendant and ((ancestors is None) or (n in ancestors)):

1631

# Only include nodes that are both descendants and ancestors.

1631

# Only include nodes that are both descendants and ancestors.

1632

orderedout.append(n)

1632

orderedout.append(n)

1633

if (ancestors is not None) and (n in heads):

1633

if (ancestors is not None) and (n in heads):

1634

# We're trying to figure out which heads are reachable

1634

# We're trying to figure out which heads are reachable

1635

# from roots.

1635

# from roots.

1636

# Mark this head as having been reached

1636

# Mark this head as having been reached

1637

heads[n] = True

1637

heads[n] = True

1638

elif ancestors is None:

1638

elif ancestors is None:

1639

# Otherwise, we're trying to discover the heads.

1639

# Otherwise, we're trying to discover the heads.

1640

# Assume this is a head because if it isn't, the next step

1640

# Assume this is a head because if it isn't, the next step

1641

# will eventually remove it.

1641

# will eventually remove it.

1642

heads[n] = True

1642

heads[n] = True

1643

# But, obviously its parents aren't.

1643

# But, obviously its parents aren't.

1644

for p in self.parents(n):

1644

for p in self.parents(n):

1645

heads.pop(p, None)

1645

heads.pop(p, None)

1646

heads = [head for head, flag in heads.iteritems() if flag]

1646

heads = [head for head, flag in heads.iteritems() if flag]

1647

roots = list(roots)

1647

roots = list(roots)

1648

assert orderedout

1648

assert orderedout

1649

assert roots

1649

assert roots

1650

assert heads

1650

assert heads

1651

return (orderedout, roots, heads)

1651

return (orderedout, roots, heads)

1652

1653

def headrevs(self):

1653

def headrevs(self):

1654

try:

1654

try:

1655

return self.index.headrevs()

1655

return self.index.headrevs()

1656

except AttributeError:

1656

except AttributeError:

1657

return self._headrevs()

1657

return self._headrevs()

1658

1659

def computephases(self, roots):

1659

def computephases(self, roots):

1660

return self.index.computephasesmapsets(roots)

1660

return self.index.computephasesmapsets(roots)

1661

1662

def _headrevs(self):

1662

def _headrevs(self):

1663

count = len(self)

1663

count = len(self)

1664

if not count:

1664

if not count:

1665

return [nullrev]

1665

return [nullrev]

1666

# we won't iter over filtered rev so nobody is a head at start

1666

# we won't iter over filtered rev so nobody is a head at start

1667

ishead = [0] * (count + 1)

1667

ishead = [0] * (count + 1)

1668

index = self.index

1668

index = self.index

1669

for r in self:

1669

for r in self:

1670

ishead[r] = 1 # I may be an head

1670

ishead[r] = 1 # I may be an head

1671

e = index[r]

1671

e = index[r]

1672

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1672

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1673

return [r for r, val in enumerate(ishead) if val]

1673

return [r for r, val in enumerate(ishead) if val]

1674

1675

def heads(self, start=None, stop=None):

1675

def heads(self, start=None, stop=None):

1676

"""return the list of all nodes that have no children

1676

"""return the list of all nodes that have no children

1677

1678

if start is specified, only heads that are descendants of

1678

if start is specified, only heads that are descendants of

1679

start will be returned

1679

start will be returned

1680

if stop is specified, it will consider all the revs from stop

1680

if stop is specified, it will consider all the revs from stop

1681

as if they had no children

1681

as if they had no children

1682

"""

1682

"""

1683

if start is None and stop is None:

1683

if start is None and stop is None:

1684

if not len(self):

1684

if not len(self):

1685

return [nullid]

1685

return [nullid]

1686

return [self.node(r) for r in self.headrevs()]

1686

return [self.node(r) for r in self.headrevs()]

1687

1688

if start is None:

1688

if start is None:

1689

start = nullid

1689

start = nullid

1690

if stop is None:

1690

if stop is None:

1691

stop = []

1691

stop = []

1692

stoprevs = set([self.rev(n) for n in stop])

1692

stoprevs = set([self.rev(n) for n in stop])

1693

startrev = self.rev(start)

1693

startrev = self.rev(start)

1694

reachable = {startrev}

1694

reachable = {startrev}

1695

heads = {startrev}

1695

heads = {startrev}

1696

1697

parentrevs = self.parentrevs

1697

parentrevs = self.parentrevs

1698

for r in self.revs(start=startrev + 1):

1698

for r in self.revs(start=startrev + 1):

1699

for p in parentrevs(r):

1699

for p in parentrevs(r):

1700

if p in reachable:

1700

if p in reachable:

1701

if r not in stoprevs:

1701

if r not in stoprevs:

1702

reachable.add(r)

1702

reachable.add(r)

1703

heads.add(r)

1703

heads.add(r)

1704

if p in heads and p not in stoprevs:

1704

if p in heads and p not in stoprevs:

1705

heads.remove(p)

1705

heads.remove(p)

1706

1707

return [self.node(r) for r in heads]

1707

return [self.node(r) for r in heads]

1708

1709

def children(self, node):

1709

def children(self, node):

1710

"""find the children of a given node"""

1710

"""find the children of a given node"""

1711

c = []

1711

c = []

1712

p = self.rev(node)

1712

p = self.rev(node)

1713

for r in self.revs(start=p + 1):

1713

for r in self.revs(start=p + 1):

1714

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1714

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1715

if prevs:

1715

if prevs:

1716

for pr in prevs:

1716

for pr in prevs:

1717

if pr == p:

1717

if pr == p:

1718

c.append(self.node(r))

1718

c.append(self.node(r))

1719

elif p == nullrev:

1719

elif p == nullrev:

1720

c.append(self.node(r))

1720

c.append(self.node(r))

1721

return c

1721

return c

1722

1723

def commonancestorsheads(self, a, b):

1723

def commonancestorsheads(self, a, b):

1724

"""calculate all the heads of the common ancestors of nodes a and b"""

1724

"""calculate all the heads of the common ancestors of nodes a and b"""

1725

a, b = self.rev(a), self.rev(b)

1725

a, b = self.rev(a), self.rev(b)

1726

ancs = self._commonancestorsheads(a, b)

1726

ancs = self._commonancestorsheads(a, b)

1727

return pycompat.maplist(self.node, ancs)

1727

return pycompat.maplist(self.node, ancs)

1728

1729

def _commonancestorsheads(self, *revs):

1729

def _commonancestorsheads(self, *revs):

1730

"""calculate all the heads of the common ancestors of revs"""

1730

"""calculate all the heads of the common ancestors of revs"""

1731

try:

1731

try:

1732

ancs = self.index.commonancestorsheads(*revs)

1732

ancs = self.index.commonancestorsheads(*revs)

1733

except (AttributeError, OverflowError): # C implementation failed

1733

except (AttributeError, OverflowError): # C implementation failed

1734

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1734

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1735

return ancs

1735

return ancs

1736

1737

def isancestor(self, a, b):

1737

def isancestor(self, a, b):

1738

"""return True if node a is an ancestor of node b

1738

"""return True if node a is an ancestor of node b

1739

1740

A revision is considered an ancestor of itself."""

1740

A revision is considered an ancestor of itself."""

1741

a, b = self.rev(a), self.rev(b)

1741

a, b = self.rev(a), self.rev(b)

1742

return self.isancestorrev(a, b)

1742

return self.isancestorrev(a, b)

1743

1744

def isancestorrev(self, a, b):

1744

def isancestorrev(self, a, b):

1745

"""return True if revision a is an ancestor of revision b

1745

"""return True if revision a is an ancestor of revision b

1746

1747

A revision is considered an ancestor of itself.

1747

A revision is considered an ancestor of itself.

1748

1749

The implementation of this is trivial but the use of

1749

The implementation of this is trivial but the use of

1750

commonancestorsheads is not."""

1750

commonancestorsheads is not."""

1751

if a == nullrev:

1751

if a == nullrev:

1752

return True

1752

return True

1753

elif a == b:

1753

elif a == b:

1754

return True

1754

return True

1755

elif a > b:

1755

elif a > b:

1756

return False

1756

return False

1757

return a in self._commonancestorsheads(a, b)

1757

return a in self._commonancestorsheads(a, b)

1758

1759

def ancestor(self, a, b):

1759

def ancestor(self, a, b):

1760

"""calculate the "best" common ancestor of nodes a and b"""

1760

"""calculate the "best" common ancestor of nodes a and b"""

1761

1762

a, b = self.rev(a), self.rev(b)

1762

a, b = self.rev(a), self.rev(b)

1763

try:

1763

try:

1764

ancs = self.index.ancestors(a, b)

1764

ancs = self.index.ancestors(a, b)

1765

except (AttributeError, OverflowError):

1765

except (AttributeError, OverflowError):

1766

ancs = ancestor.ancestors(self.parentrevs, a, b)

1766

ancs = ancestor.ancestors(self.parentrevs, a, b)

1767

if ancs:

1767

if ancs:

1768

# choose a consistent winner when there's a tie

1768

# choose a consistent winner when there's a tie

1769

return min(map(self.node, ancs))

1769

return min(map(self.node, ancs))

1770

return nullid

1770

return nullid

1771

1772

def _match(self, id):

1772

def _match(self, id):

1773

if isinstance(id, int):

1773

if isinstance(id, int):

1774

# rev

1774

# rev

1775

return self.node(id)

1775

return self.node(id)

1776

if len(id) == 20:

1776

if len(id) == 20:

1777

# possibly a binary node

1777

# possibly a binary node

1778

# odds of a binary node being all hex in ASCII are 1 in 10**25

1778

# odds of a binary node being all hex in ASCII are 1 in 10**25

1779

try:

1779

try:

1780

node = id

1780

node = id

1781

self.rev(node) # quick search the index

1781

self.rev(node) # quick search the index

1782

return node

1782

return node

1783

except LookupError:

1783

except LookupError:

1784

pass # may be partial hex id

1784

pass # may be partial hex id

1785

try:

1785

try:

1786

# str(rev)

1786

# str(rev)

1787

rev = int(id)

1787

rev = int(id)

1788

if "%d" % rev != id:

1788

if "%d" % rev != id:

1789

raise ValueError

1789

raise ValueError

1790

if rev < 0:

1790

if rev < 0:

1791

rev = len(self) + rev

1791

rev = len(self) + rev

1792

if rev < 0 or rev >= len(self):

1792

if rev < 0 or rev >= len(self):

1793

raise ValueError

1793

raise ValueError

1794

return self.node(rev)

1794

return self.node(rev)

1795

except (ValueError, OverflowError):

1795

except (ValueError, OverflowError):

1796

pass

1796

pass

1797

if len(id) == 40:

1797

if len(id) == 40:

1798

try:

1798

try:

1799

# a full hex nodeid?

1799

# a full hex nodeid?

1800

node = bin(id)

1800

node = bin(id)

1801

self.rev(node)

1801

self.rev(node)

1802

return node

1802

return node

1803

except (TypeError, LookupError):

1803

except (TypeError, LookupError):

1804

pass

1804

pass

1805

1806

def _partialmatch(self, id):

1806

def _partialmatch(self, id):

1807

# we don't care wdirfilenodeids as they should be always full hash

1807

# we don't care wdirfilenodeids as they should be always full hash

1808

maybewdir = wdirhex.startswith(id)

1808

maybewdir = wdirhex.startswith(id)

1809

try:

1809

try:

1810

partial = self.index.partialmatch(id)

1810

partial = self.index.partialmatch(id)

1811

if partial and self.hasnode(partial):

1811

if partial and self.hasnode(partial):

1812

if maybewdir:

1812

if maybewdir:

1813

# single 'ff...' match in radix tree, ambiguous with wdir

1813

# single 'ff...' match in radix tree, ambiguous with wdir

1814

raise RevlogError

1814

raise RevlogError

1815

return partial

1815

return partial

1816

if maybewdir:

1816

if maybewdir:

1817

# no 'ff...' match in radix tree, wdir identified

1817

# no 'ff...' match in radix tree, wdir identified

1818

raise error.WdirUnsupported

1818

raise error.WdirUnsupported

1819

return None

1819

return None

1820

except RevlogError:

1820

except RevlogError:

1821

# parsers.c radix tree lookup gave multiple matches

1821

# parsers.c radix tree lookup gave multiple matches

1822

# fast path: for unfiltered changelog, radix tree is accurate

1822

# fast path: for unfiltered changelog, radix tree is accurate

1823

if not getattr(self, 'filteredrevs', None):

1823

if not getattr(self, 'filteredrevs', None):

1824

raise AmbiguousPrefixLookupError(id, self.indexfile,

1824

raise AmbiguousPrefixLookupError(id, self.indexfile,

1825

_('ambiguous identifier'))

1825

_('ambiguous identifier'))

1826

# fall through to slow path that filters hidden revisions

1826

# fall through to slow path that filters hidden revisions

1827

except (AttributeError, ValueError):

1827

except (AttributeError, ValueError):

1828

# we are pure python, or key was too short to search radix tree

1828

# we are pure python, or key was too short to search radix tree

1829

pass

1829

pass

1830

1831

if id in self._pcache:

1831

if id in self._pcache:

1832

return self._pcache[id]

1832

return self._pcache[id]

1833

1834

if len(id) <= 40:

1834

if len(id) <= 40:

1835

try:

1835

try:

1836

# hex(node)[:...]

1836

# hex(node)[:...]

1837

l = len(id) // 2 # grab an even number of digits

1837

l = len(id) // 2 # grab an even number of digits

1838

prefix = bin(id[:l * 2])

1838

prefix = bin(id[:l * 2])

1839

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1839

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1840

nl = [n for n in nl if hex(n).startswith(id) and

1840

nl = [n for n in nl if hex(n).startswith(id) and

1841

self.hasnode(n)]

1841

self.hasnode(n)]

1842

if len(nl) > 0:

1842

if len(nl) > 0:

1843

if len(nl) == 1 and not maybewdir:

1843

if len(nl) == 1 and not maybewdir:

1844

self._pcache[id] = nl[0]

1844

self._pcache[id] = nl[0]

1845

return nl[0]

1845

return nl[0]

1846

raise AmbiguousPrefixLookupError(id, self.indexfile,

1846

raise AmbiguousPrefixLookupError(id, self.indexfile,

1847

_('ambiguous identifier'))

1847

_('ambiguous identifier'))

1848

if maybewdir:

1848

if maybewdir:

1849

raise error.WdirUnsupported

1849

raise error.WdirUnsupported

1850

return None

1850

return None

1851

except TypeError:

1851

except TypeError:

1852

pass

1852

pass

1853

1854

def lookup(self, id):

1854

def lookup(self, id):

1855

"""locate a node based on:

1855

"""locate a node based on:

1856

- revision number or str(revision number)

1856

- revision number or str(revision number)

1857

- nodeid or subset of hex nodeid

1857

- nodeid or subset of hex nodeid

1858

"""

1858

"""

1859

n = self._match(id)

1859

n = self._match(id)

1860

if n is not None:

1860

if n is not None:

1861

return n

1861

return n

1862

n = self._partialmatch(id)

1862

n = self._partialmatch(id)

1863

if n:

1863

if n:

1864

return n

1864

return n

1865

1866

raise LookupError(id, self.indexfile, _('no match found'))

1866

raise LookupError(id, self.indexfile, _('no match found'))

1867

1868

def shortest(self, node, minlength=1):

1868

def shortest(self, node, minlength=1):

1869

"""Find the shortest unambiguous prefix that matches node."""

1869

"""Find the shortest unambiguous prefix that matches node."""

1870

def isvalid(prefix):

1870

def isvalid(prefix):

1871

try:

1871

try:

1872

node = self._partialmatch(prefix)

1872

node = self._partialmatch(prefix)

1873

except error.RevlogError:

1873

except error.RevlogError:

1874

return False

1874

return False

1875

except error.WdirUnsupported:

1875

except error.WdirUnsupported:

1876

# single 'ff...' match

1876

# single 'ff...' match

1877

return True

1877

return True

1878

if node is None:

1878

if node is None:

1879

raise LookupError(node, self.indexfile, _('no node'))

1879

raise LookupError(node, self.indexfile, _('no node'))

1880

return True

1880

return True

1881

1882

def maybewdir(prefix):

1882

def maybewdir(prefix):

1883

return all(c == 'f' for c in prefix)

1883

return all(c == 'f' for c in prefix)

1884

1885

hexnode = hex(node)

1885

hexnode = hex(node)

1886

1887

def disambiguate(hexnode, minlength):

1887

def disambiguate(hexnode, minlength):

1888

"""Disambiguate against wdirid."""

1888

"""Disambiguate against wdirid."""

1889

for length in range(minlength, 41):

1889

for length in range(minlength, 41):

1890

prefix = hexnode[:length]

1890

prefix = hexnode[:length]

1891

if not maybewdir(prefix):

1891

if not maybewdir(prefix):

1892

return prefix

1892

return prefix

1893

1894

if not getattr(self, 'filteredrevs', None):

1894

if not getattr(self, 'filteredrevs', None):

1895

try:

1895

try:

1896

length = max(self.index.shortest(node), minlength)

1896

length = max(self.index.shortest(node), minlength)

1897

return disambiguate(hexnode, length)

1897

return disambiguate(hexnode, length)

1898

except RevlogError:

1898

except RevlogError:

1899

if node != wdirid:

1899

if node != wdirid:

1900

raise LookupError(node, self.indexfile, _('no node'))

1900

raise LookupError(node, self.indexfile, _('no node'))

1901

except AttributeError:

1901

except AttributeError:

1902

# Fall through to pure code

1902

# Fall through to pure code

1903

pass

1903

pass

1904

1905

if node == wdirid:

1905

if node == wdirid:

1906

for length in range(minlength, 41):

1906

for length in range(minlength, 41):

1907

prefix = hexnode[:length]

1907

prefix = hexnode[:length]

1908

if isvalid(prefix):

1908

if isvalid(prefix):

1909

return prefix

1909

return prefix

1910

1911

for length in range(minlength, 41):

1911

for length in range(minlength, 41):

1912

prefix = hexnode[:length]

1912

prefix = hexnode[:length]

1913

if isvalid(prefix):

1913

if isvalid(prefix):

1914

return disambiguate(hexnode, length)

1914

return disambiguate(hexnode, length)

1915

1916

def cmp(self, node, text):

1916

def cmp(self, node, text):

1917

"""compare text with a given file revision

1917

"""compare text with a given file revision

1918

1919

returns True if text is different than what is stored.

1919

returns True if text is different than what is stored.

1920

"""

1920

"""

1921

p1, p2 = self.parents(node)

1921

p1, p2 = self.parents(node)

1922

return hash(text, p1, p2) != node

1922

return hash(text, p1, p2) != node

1923

1924

def _cachesegment(self, offset, data):

1924

def _cachesegment(self, offset, data):

1925

"""Add a segment to the revlog cache.

1925

"""Add a segment to the revlog cache.

1926

1927

Accepts an absolute offset and the data that is at that location.

1927

Accepts an absolute offset and the data that is at that location.

1928

"""

1928

"""

1929

o, d = self._chunkcache

1929

o, d = self._chunkcache

1930

# try to add to existing cache

1930

# try to add to existing cache

1931

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1931

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1932

self._chunkcache = o, d + data

1932

self._chunkcache = o, d + data

1933

else:

1933

else:

1934

self._chunkcache = offset, data

1934

self._chunkcache = offset, data

1935

1936

def _readsegment(self, offset, length, df=None):

1936

def _readsegment(self, offset, length, df=None):

1937

"""Load a segment of raw data from the revlog.

1937

"""Load a segment of raw data from the revlog.

1938

1939

Accepts an absolute offset, length to read, and an optional existing

1939

Accepts an absolute offset, length to read, and an optional existing

1940

file handle to read from.

1940

file handle to read from.

1941

1942

If an existing file handle is passed, it will be seeked and the

1942

If an existing file handle is passed, it will be seeked and the

1943

original seek position will NOT be restored.

1943

original seek position will NOT be restored.

1944

1945

Returns a str or buffer of raw byte data.

1945

Returns a str or buffer of raw byte data.

1946

"""

1946

"""

1947

# Cache data both forward and backward around the requested

1947

# Cache data both forward and backward around the requested

1948

# data, in a fixed size window. This helps speed up operations

1948

# data, in a fixed size window. This helps speed up operations

1949

# involving reading the revlog backwards.

1949

# involving reading the revlog backwards.

1950

cachesize = self._chunkcachesize

1950

cachesize = self._chunkcachesize

1951

realoffset = offset & ~(cachesize - 1)

1951

realoffset = offset & ~(cachesize - 1)

1952

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1952

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1953

- realoffset)

1953

- realoffset)

1954

with self._datareadfp(df) as df:

1954

with self._datareadfp(df) as df:

1955

df.seek(realoffset)

1955

df.seek(realoffset)

1956

d = df.read(reallength)

1956

d = df.read(reallength)

1957

self._cachesegment(realoffset, d)

1957

self._cachesegment(realoffset, d)

1958

if offset != realoffset or reallength != length:

1958

if offset != realoffset or reallength != length:

1959

return util.buffer(d, offset - realoffset, length)

1959

return util.buffer(d, offset - realoffset, length)

1960

return d

1960

return d

1961

1962

def _getsegment(self, offset, length, df=None):

1962

def _getsegment(self, offset, length, df=None):

1963

"""Obtain a segment of raw data from the revlog.

1963

"""Obtain a segment of raw data from the revlog.

1964

1965

Accepts an absolute offset, length of bytes to obtain, and an

1965

Accepts an absolute offset, length of bytes to obtain, and an

1966

optional file handle to the already-opened revlog. If the file

1966

optional file handle to the already-opened revlog. If the file

1967

handle is used, it's original seek position will not be preserved.

1967

handle is used, it's original seek position will not be preserved.

1968

1969

Requests for data may be returned from a cache.

1969

Requests for data may be returned from a cache.

1970

1971

Returns a str or a buffer instance of raw byte data.

1971

Returns a str or a buffer instance of raw byte data.

1972

"""

1972

"""

1973

o, d = self._chunkcache

1973

o, d = self._chunkcache

1974

l = len(d)

1974

l = len(d)

1975

1976

# is it in the cache?

1976

# is it in the cache?

1977

cachestart = offset - o

1977

cachestart = offset - o

1978

cacheend = cachestart + length

1978

cacheend = cachestart + length

1979

if cachestart >= 0 and cacheend <= l:

1979

if cachestart >= 0 and cacheend <= l:

1980

if cachestart == 0 and cacheend == l:

1980

if cachestart == 0 and cacheend == l:

1981

return d # avoid a copy

1981

return d # avoid a copy

1982

return util.buffer(d, cachestart, cacheend - cachestart)

1982

return util.buffer(d, cachestart, cacheend - cachestart)

1983

1984

return self._readsegment(offset, length, df=df)

1984

return self._readsegment(offset, length, df=df)

1985

1986

def _getsegmentforrevs(self, startrev, endrev, df=None):

1986

def _getsegmentforrevs(self, startrev, endrev, df=None):

1987

"""Obtain a segment of raw data corresponding to a range of revisions.

1987

"""Obtain a segment of raw data corresponding to a range of revisions.

1988

1989

Accepts the start and end revisions and an optional already-open

1989

Accepts the start and end revisions and an optional already-open

1990

file handle to be used for reading. If the file handle is read, its

1990

file handle to be used for reading. If the file handle is read, its

1991

seek position will not be preserved.

1991

seek position will not be preserved.

1992

1993

Requests for data may be satisfied by a cache.

1993

Requests for data may be satisfied by a cache.

1994

1995

Returns a 2-tuple of (offset, data) for the requested range of

1995

Returns a 2-tuple of (offset, data) for the requested range of

1996

revisions. Offset is the integer offset from the beginning of the

1996

revisions. Offset is the integer offset from the beginning of the

1997

revlog and data is a str or buffer of the raw byte data.

1997

revlog and data is a str or buffer of the raw byte data.

1998

1999

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1999

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2000

to determine where each revision's data begins and ends.

2000

to determine where each revision's data begins and ends.

2001

"""

2001

"""

2002

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2002

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2003

# (functions are expensive).

2003

# (functions are expensive).

2004

index = self.index

2004

index = self.index

2005

istart = index[startrev]

2005

istart = index[startrev]

2006

start = int(istart[0] >> 16)

2006

start = int(istart[0] >> 16)

2007

if startrev == endrev:

2007

if startrev == endrev:

2008

end = start + istart[1]

2008

end = start + istart[1]

2009

else:

2009

else:

2010

iend = index[endrev]

2010

iend = index[endrev]

2011

end = int(iend[0] >> 16) + iend[1]

2011

end = int(iend[0] >> 16) + iend[1]

2012

2013

if self._inline:

2013

if self._inline:

2014

start += (startrev + 1) * self._io.size

2014

start += (startrev + 1) * self._io.size

2015

end += (endrev + 1) * self._io.size

2015

end += (endrev + 1) * self._io.size

2016

length = end - start

2016

length = end - start

2017

2018

return start, self._getsegment(start, length, df=df)

2018

return start, self._getsegment(start, length, df=df)

2019

2020

def _chunk(self, rev, df=None):

2020

def _chunk(self, rev, df=None):

2021

"""Obtain a single decompressed chunk for a revision.

2021

"""Obtain a single decompressed chunk for a revision.

2022

2023

Accepts an integer revision and an optional already-open file handle

2023

Accepts an integer revision and an optional already-open file handle

2024

to be used for reading. If used, the seek position of the file will not

2024

to be used for reading. If used, the seek position of the file will not

2025

be preserved.

2025

be preserved.

2026

2027

Returns a str holding uncompressed data for the requested revision.

2027

Returns a str holding uncompressed data for the requested revision.

2028

"""

2028

"""

2029

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2029

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2030

2031

def _chunks(self, revs, df=None, targetsize=None):

2031

def _chunks(self, revs, df=None, targetsize=None):

2032

"""Obtain decompressed chunks for the specified revisions.

2032

"""Obtain decompressed chunks for the specified revisions.

2033

2034

Accepts an iterable of numeric revisions that are assumed to be in

2034

Accepts an iterable of numeric revisions that are assumed to be in

2035

ascending order. Also accepts an optional already-open file handle

2035

ascending order. Also accepts an optional already-open file handle

2036

to be used for reading. If used, the seek position of the file will

2036

to be used for reading. If used, the seek position of the file will

2037

not be preserved.

2037

not be preserved.

2038

2039

This function is similar to calling ``self._chunk()`` multiple times,

2039

This function is similar to calling ``self._chunk()`` multiple times,

2040

but is faster.

2040

but is faster.

2041

2042

Returns a list with decompressed data for each requested revision.

2042

Returns a list with decompressed data for each requested revision.

2043

"""

2043

"""

2044

if not revs:

2044

if not revs:

2045

return []

2045

return []

2046

start = self.start

2046

start = self.start

2047

length = self.length

2047

length = self.length

2048

inline = self._inline

2048

inline = self._inline

2049

iosize = self._io.size

2049

iosize = self._io.size

2050

buffer = util.buffer

2050

buffer = util.buffer

2051

2052

l = []

2052

l = []

2053

ladd = l.append

2053

ladd = l.append

2054

2055

if not self._withsparseread:

2055

if not self._withsparseread:

2056

slicedchunks = (revs,)

2056

slicedchunks = (revs,)

2057

else:

2057

else:

2058

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2058

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2059

2060

for revschunk in slicedchunks:

2060

for revschunk in slicedchunks:

2061

firstrev = revschunk[0]

2061

firstrev = revschunk[0]

2062

# Skip trailing revisions with empty diff

2062

# Skip trailing revisions with empty diff

2063

for lastrev in revschunk[::-1]:

2063

for lastrev in revschunk[::-1]:

2064

if length(lastrev) != 0:

2064

if length(lastrev) != 0:

2065

break

2065

break

2066

2067

try:

2067

try:

2068

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2068

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2069

except OverflowError:

2069

except OverflowError:

2070

# issue4215 - we can't cache a run of chunks greater than

2070

# issue4215 - we can't cache a run of chunks greater than

2071

# 2G on Windows

2071

# 2G on Windows

2072

return [self._chunk(rev, df=df) for rev in revschunk]

2072

return [self._chunk(rev, df=df) for rev in revschunk]

2073

2074

decomp = self.decompress

2074

decomp = self.decompress

2075

for rev in revschunk:

2075

for rev in revschunk:

2076

chunkstart = start(rev)

2076

chunkstart = start(rev)

2077

if inline:

2077

if inline:

2078

chunkstart += (rev + 1) * iosize

2078

chunkstart += (rev + 1) * iosize

2079

chunklength = length(rev)

2079

chunklength = length(rev)

2080

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2080

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2081

2082

return l

2082

return l

2083

2084

def _chunkclear(self):

2084

def _chunkclear(self):

2085

"""Clear the raw chunk cache."""

2085

"""Clear the raw chunk cache."""

2086

self._chunkcache = (0, '')

2086

self._chunkcache = (0, '')

2087

2088

def deltaparent(self, rev):

2088

def deltaparent(self, rev):

2089

"""return deltaparent of the given revision"""

2089

"""return deltaparent of the given revision"""

2090

base = self.index[rev][3]

2090

base = self.index[rev][3]

2091

if base == rev:

2091

if base == rev:

2092

return nullrev

2092

return nullrev

2093

elif self._generaldelta:

2093

elif self._generaldelta:

2094

return base

2094

return base

2095

else:

2095

else:

2096

return rev - 1

2096

return rev - 1

2097

2098

def issnapshot(self, rev):

2098

def issnapshot(self, rev):

2099

"""tells whether rev is a snapshot

2099

"""tells whether rev is a snapshot

2100

"""

2100

"""

2101

if rev == nullrev:

2101

if rev == nullrev:

2102

return True

2102

return True

2103

deltap = self.deltaparent(rev)

2103

deltap = self.deltaparent(rev)

2104

if deltap == nullrev:

2104

if deltap == nullrev:

2105

return True

2105

return True

2106

return False

2106

p1, p2 = self.parentrevs(rev)

2107

if deltap in (p1, p2):

2108

return False

2109

return self.issnapshot(deltap)

2107

2110

2108

def revdiff(self, rev1, rev2):

2111

def revdiff(self, rev1, rev2):

2109

"""return or calculate a delta between two revisions

2112

"""return or calculate a delta between two revisions

2110

2113

2111

The delta calculated is in binary form and is intended to be written to

2114

The delta calculated is in binary form and is intended to be written to

2112

revlog data directly. So this function needs raw revision data.

2115

revlog data directly. So this function needs raw revision data.

2113

"""

2116

"""

2114

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2117

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2115

return bytes(self._chunk(rev2))

2118

return bytes(self._chunk(rev2))

2116

2119

2117

return mdiff.textdiff(self.revision(rev1, raw=True),

2120

return mdiff.textdiff(self.revision(rev1, raw=True),

2118

self.revision(rev2, raw=True))

2121

self.revision(rev2, raw=True))

2119

2122

2120

def revision(self, nodeorrev, _df=None, raw=False):

2123

def revision(self, nodeorrev, _df=None, raw=False):

2121

"""return an uncompressed revision of a given node or revision

2124

"""return an uncompressed revision of a given node or revision

2122

number.

2125

number.

2123

2126

2124

_df - an existing file handle to read from. (internal-only)

2127

_df - an existing file handle to read from. (internal-only)

2125

raw - an optional argument specifying if the revision data is to be

2128

raw - an optional argument specifying if the revision data is to be

2126

treated as raw data when applying flag transforms. 'raw' should be set

2129

treated as raw data when applying flag transforms. 'raw' should be set

2127

to True when generating changegroups or in debug commands.

2130

to True when generating changegroups or in debug commands.

2128

"""

2131

"""

2129

if isinstance(nodeorrev, int):

2132

if isinstance(nodeorrev, int):

2130

rev = nodeorrev

2133

rev = nodeorrev

2131

node = self.node(rev)

2134

node = self.node(rev)

2132

else:

2135

else:

2133

node = nodeorrev

2136

node = nodeorrev

2134

rev = None

2137

rev = None

2135

2138

2136

cachedrev = None

2139

cachedrev = None

2137

flags = None

2140

flags = None

2138

rawtext = None

2141

rawtext = None

2139

if node == nullid:

2142

if node == nullid:

2140

return ""

2143

return ""

2141

if self._cache:

2144

if self._cache:

2142

if self._cache[0] == node:

2145

if self._cache[0] == node:

2143

# _cache only stores rawtext

2146

# _cache only stores rawtext

2144

if raw:

2147

if raw:

2145

return self._cache[2]

2148

return self._cache[2]

2146

# duplicated, but good for perf

2149

# duplicated, but good for perf

2147

if rev is None:

2150

if rev is None:

2148

rev = self.rev(node)

2151

rev = self.rev(node)

2149

if flags is None:

2152

if flags is None:

2150

flags = self.flags(rev)

2153

flags = self.flags(rev)

2151

# no extra flags set, no flag processor runs, text = rawtext

2154

# no extra flags set, no flag processor runs, text = rawtext

2152

if flags == REVIDX_DEFAULT_FLAGS:

2155

if flags == REVIDX_DEFAULT_FLAGS:

2153

return self._cache[2]

2156

return self._cache[2]

2154

# rawtext is reusable. need to run flag processor

2157

# rawtext is reusable. need to run flag processor

2155

rawtext = self._cache[2]

2158

rawtext = self._cache[2]

2156

2159

2157

cachedrev = self._cache[1]

2160

cachedrev = self._cache[1]

2158

2161

2159

# look up what we need to read

2162

# look up what we need to read

2160

if rawtext is None:

2163

if rawtext is None:

2161

if rev is None:

2164

if rev is None:

2162

rev = self.rev(node)

2165

rev = self.rev(node)

2163

2166

2164

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2167

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2165

if stopped:

2168

if stopped:

2166

rawtext = self._cache[2]

2169

rawtext = self._cache[2]

2167

2170

2168

# drop cache to save memory

2171

# drop cache to save memory

2169

self._cache = None

2172

self._cache = None

2170

2173

2171

targetsize = None

2174

targetsize = None

2172

rawsize = self.index[rev][2]

2175

rawsize = self.index[rev][2]

2173

if 0 <= rawsize:

2176

if 0 <= rawsize:

2174

targetsize = 4 * rawsize

2177

targetsize = 4 * rawsize

2175

2178

2176

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2179

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2177

if rawtext is None:

2180

if rawtext is None:

2178

rawtext = bytes(bins[0])

2181

rawtext = bytes(bins[0])

2179

bins = bins[1:]

2182

bins = bins[1:]

2180

2183

2181

rawtext = mdiff.patches(rawtext, bins)

2184

rawtext = mdiff.patches(rawtext, bins)

2182

self._cache = (node, rev, rawtext)

2185

self._cache = (node, rev, rawtext)

2183

2186

2184

if flags is None:

2187

if flags is None:

2185

if rev is None:

2188

if rev is None:

2186

rev = self.rev(node)

2189

rev = self.rev(node)

2187

flags = self.flags(rev)

2190

flags = self.flags(rev)

2188

2191

2189

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2192

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2190

if validatehash:

2193

if validatehash:

2191

self.checkhash(text, node, rev=rev)

2194

self.checkhash(text, node, rev=rev)

2192

2195

2193

return text

2196

return text

2194

2197

2195

def hash(self, text, p1, p2):

2198

def hash(self, text, p1, p2):

2196

"""Compute a node hash.

2199

"""Compute a node hash.

2197

2200

2198

Available as a function so that subclasses can replace the hash

2201

Available as a function so that subclasses can replace the hash

2199

as needed.

2202

as needed.

2200

"""

2203

"""

2201

return hash(text, p1, p2)

2204

return hash(text, p1, p2)

2202

2205

2203

def _processflags(self, text, flags, operation, raw=False):

2206

def _processflags(self, text, flags, operation, raw=False):

2204

"""Inspect revision data flags and applies transforms defined by

2207

"""Inspect revision data flags and applies transforms defined by

2205

registered flag processors.

2208

registered flag processors.

2206

2209

2207

``text`` - the revision data to process

2210

``text`` - the revision data to process

2208

``flags`` - the revision flags

2211

``flags`` - the revision flags

2209

``operation`` - the operation being performed (read or write)

2212

``operation`` - the operation being performed (read or write)

2210

``raw`` - an optional argument describing if the raw transform should be

2213

``raw`` - an optional argument describing if the raw transform should be

2211

applied.

2214

applied.

2212

2215

2213

This method processes the flags in the order (or reverse order if

2216

This method processes the flags in the order (or reverse order if

2214

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2217

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2215

flag processors registered for present flags. The order of flags defined

2218

flag processors registered for present flags. The order of flags defined

2216

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2219

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2217

2220

2218

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2221

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2219

processed text and ``validatehash`` is a bool indicating whether the

2222

processed text and ``validatehash`` is a bool indicating whether the

2220

returned text should be checked for hash integrity.

2223

returned text should be checked for hash integrity.

2221

2224

2222

Note: If the ``raw`` argument is set, it has precedence over the

2225

Note: If the ``raw`` argument is set, it has precedence over the

2223

operation and will only update the value of ``validatehash``.

2226

operation and will only update the value of ``validatehash``.

2224

"""

2227

"""

2225

# fast path: no flag processors will run

2228

# fast path: no flag processors will run

2226

if flags == 0:

2229

if flags == 0:

2227

return text, True

2230

return text, True

2228

if not operation in ('read', 'write'):

2231

if not operation in ('read', 'write'):

2229

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2232

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2230

# Check all flags are known.

2233

# Check all flags are known.

2231

if flags & ~REVIDX_KNOWN_FLAGS:

2234

if flags & ~REVIDX_KNOWN_FLAGS:

2232

raise RevlogError(_("incompatible revision flag '%#x'") %

2235

raise RevlogError(_("incompatible revision flag '%#x'") %

2233

(flags & ~REVIDX_KNOWN_FLAGS))

2236

(flags & ~REVIDX_KNOWN_FLAGS))

2234

validatehash = True

2237

validatehash = True

2235

# Depending on the operation (read or write), the order might be

2238

# Depending on the operation (read or write), the order might be

2236

# reversed due to non-commutative transforms.

2239

# reversed due to non-commutative transforms.

2237

orderedflags = REVIDX_FLAGS_ORDER

2240

orderedflags = REVIDX_FLAGS_ORDER

2238

if operation == 'write':

2241

if operation == 'write':

2239

orderedflags = reversed(orderedflags)

2242

orderedflags = reversed(orderedflags)

2240

2243

2241

for flag in orderedflags:

2244

for flag in orderedflags:

2242

# If a flagprocessor has been registered for a known flag, apply the

2245

# If a flagprocessor has been registered for a known flag, apply the

2243

# related operation transform and update result tuple.

2246

# related operation transform and update result tuple.

2244

if flag & flags:

2247

if flag & flags:

2245

vhash = True

2248

vhash = True

2246

2249

2247

if flag not in _flagprocessors:

2250

if flag not in _flagprocessors:

2248

message = _("missing processor for flag '%#x'") % (flag)

2251

message = _("missing processor for flag '%#x'") % (flag)

2249

raise RevlogError(message)

2252

raise RevlogError(message)

2250

2253

2251

processor = _flagprocessors[flag]

2254

processor = _flagprocessors[flag]

2252

if processor is not None:

2255

if processor is not None:

2253

readtransform, writetransform, rawtransform = processor

2256

readtransform, writetransform, rawtransform = processor

2254

2257

2255

if raw:

2258

if raw:

2256

vhash = rawtransform(self, text)

2259

vhash = rawtransform(self, text)

2257

elif operation == 'read':

2260

elif operation == 'read':

2258

text, vhash = readtransform(self, text)

2261

text, vhash = readtransform(self, text)

2259

else: # write operation

2262

else: # write operation

2260

text, vhash = writetransform(self, text)

2263

text, vhash = writetransform(self, text)

2261

validatehash = validatehash and vhash

2264

validatehash = validatehash and vhash

2262

2265

2263

return text, validatehash

2266

return text, validatehash

2264

2267

2265

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2268

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2266

"""Check node hash integrity.

2269

"""Check node hash integrity.

2267

2270

2268

Available as a function so that subclasses can extend hash mismatch

2271

Available as a function so that subclasses can extend hash mismatch

2269

behaviors as needed.

2272

behaviors as needed.

2270

"""

2273

"""

2271

try:

2274

try:

2272

if p1 is None and p2 is None:

2275

if p1 is None and p2 is None:

2273

p1, p2 = self.parents(node)

2276

p1, p2 = self.parents(node)

2274

if node != self.hash(text, p1, p2):

2277

if node != self.hash(text, p1, p2):

2275

revornode = rev

2278

revornode = rev

2276

if revornode is None:

2279

if revornode is None:

2277

revornode = templatefilters.short(hex(node))

2280

revornode = templatefilters.short(hex(node))

2278

raise RevlogError(_("integrity check failed on %s:%s")

2281

raise RevlogError(_("integrity check failed on %s:%s")

2279

% (self.indexfile, pycompat.bytestr(revornode)))

2282

% (self.indexfile, pycompat.bytestr(revornode)))

2280

except RevlogError:

2283

except RevlogError:

2281

if self._censorable and _censoredtext(text):

2284

if self._censorable and _censoredtext(text):

2282

raise error.CensoredNodeError(self.indexfile, node, text)

2285

raise error.CensoredNodeError(self.indexfile, node, text)

2283

raise

2286

raise

2284

2287

2285

def _enforceinlinesize(self, tr, fp=None):

2288

def _enforceinlinesize(self, tr, fp=None):

2286

"""Check if the revlog is too big for inline and convert if so.

2289

"""Check if the revlog is too big for inline and convert if so.

2287

2290

2288

This should be called after revisions are added to the revlog. If the

2291

This should be called after revisions are added to the revlog. If the

2289

revlog has grown too large to be an inline revlog, it will convert it

2292

revlog has grown too large to be an inline revlog, it will convert it

2290

to use multiple index and data files.

2293

to use multiple index and data files.

2291

"""

2294

"""

2292

tiprev = len(self) - 1

2295

tiprev = len(self) - 1

2293

if (not self._inline or

2296

if (not self._inline or

2294

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2297

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2295

return

2298

return

2296

2299

2297

trinfo = tr.find(self.indexfile)

2300

trinfo = tr.find(self.indexfile)

2298

if trinfo is None:

2301

if trinfo is None:

2299

raise RevlogError(_("%s not found in the transaction")

2302

raise RevlogError(_("%s not found in the transaction")

2300

% self.indexfile)

2303

% self.indexfile)

2301

2304

2302

trindex = trinfo[2]

2305

trindex = trinfo[2]

2303

if trindex is not None:

2306

if trindex is not None:

2304

dataoff = self.start(trindex)

2307

dataoff = self.start(trindex)

2305

else:

2308

else:

2306

# revlog was stripped at start of transaction, use all leftover data

2309

# revlog was stripped at start of transaction, use all leftover data

2307

trindex = len(self) - 1

2310

trindex = len(self) - 1

2308

dataoff = self.end(tiprev)

2311

dataoff = self.end(tiprev)

2309

2312

2310

tr.add(self.datafile, dataoff)

2313

tr.add(self.datafile, dataoff)

2311

2314

2312

if fp:

2315

if fp:

2313

fp.flush()

2316

fp.flush()

2314

fp.close()

2317

fp.close()

2315

2318

2316

with self._datafp('w') as df:

2319

with self._datafp('w') as df:

2317

for r in self:

2320

for r in self:

2318

df.write(self._getsegmentforrevs(r, r)[1])

2321

df.write(self._getsegmentforrevs(r, r)[1])

2319

2322

2320

with self._indexfp('w') as fp:

2323

with self._indexfp('w') as fp:

2321

self.version &= ~FLAG_INLINE_DATA

2324

self.version &= ~FLAG_INLINE_DATA

2322

self._inline = False

2325

self._inline = False

2323

io = self._io

2326

io = self._io

2324

for i in self:

2327

for i in self:

2325

e = io.packentry(self.index[i], self.node, self.version, i)

2328

e = io.packentry(self.index[i], self.node, self.version, i)

2326

fp.write(e)

2329

fp.write(e)

2327

2330

2328

# the temp file replace the real index when we exit the context

2331

# the temp file replace the real index when we exit the context

2329

# manager

2332

# manager

2330

2333

2331

tr.replace(self.indexfile, trindex * self._io.size)

2334

tr.replace(self.indexfile, trindex * self._io.size)

2332

self._chunkclear()

2335

self._chunkclear()

2333

2336

2334

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2337

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2335

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2338

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2336

"""add a revision to the log

2339

"""add a revision to the log

2337

2340

2338

text - the revision data to add

2341

text - the revision data to add

2339

transaction - the transaction object used for rollback

2342

transaction - the transaction object used for rollback

2340

link - the linkrev data to add

2343

link - the linkrev data to add

2341

p1, p2 - the parent nodeids of the revision

2344

p1, p2 - the parent nodeids of the revision

2342

cachedelta - an optional precomputed delta

2345

cachedelta - an optional precomputed delta

2343

node - nodeid of revision; typically node is not specified, and it is

2346

node - nodeid of revision; typically node is not specified, and it is

2344

computed by default as hash(text, p1, p2), however subclasses might

2347

computed by default as hash(text, p1, p2), however subclasses might

2345

use different hashing method (and override checkhash() in such case)

2348

use different hashing method (and override checkhash() in such case)

2346

flags - the known flags to set on the revision

2349

flags - the known flags to set on the revision

2347

deltacomputer - an optional _deltacomputer instance shared between

2350

deltacomputer - an optional _deltacomputer instance shared between

2348

multiple calls

2351

multiple calls

2349

"""

2352

"""

2350

if link == nullrev:

2353

if link == nullrev:

2351

raise RevlogError(_("attempted to add linkrev -1 to %s")

2354

raise RevlogError(_("attempted to add linkrev -1 to %s")

2352

% self.indexfile)

2355

% self.indexfile)

2353

2356

2354

if flags:

2357

if flags:

2355

node = node or self.hash(text, p1, p2)

2358

node = node or self.hash(text, p1, p2)

2356

2359

2357

rawtext, validatehash = self._processflags(text, flags, 'write')

2360

rawtext, validatehash = self._processflags(text, flags, 'write')

2358

2361

2359

# If the flag processor modifies the revision data, ignore any provided

2362

# If the flag processor modifies the revision data, ignore any provided

2360

# cachedelta.

2363

# cachedelta.

2361

if rawtext != text:

2364

if rawtext != text:

2362

cachedelta = None

2365

cachedelta = None

2363

2366

2364

if len(rawtext) > _maxentrysize:

2367

if len(rawtext) > _maxentrysize:

2365

raise RevlogError(

2368

raise RevlogError(

2366

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2369

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2367

% (self.indexfile, len(rawtext)))

2370

% (self.indexfile, len(rawtext)))

2368

2371

2369

node = node or self.hash(rawtext, p1, p2)

2372

node = node or self.hash(rawtext, p1, p2)

2370

if node in self.nodemap:

2373

if node in self.nodemap:

2371

return node

2374

return node

2372

2375

2373

if validatehash:

2376

if validatehash:

2374

self.checkhash(rawtext, node, p1=p1, p2=p2)

2377

self.checkhash(rawtext, node, p1=p1, p2=p2)

2375

2378

2376

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2379

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2377

flags, cachedelta=cachedelta,

2380

flags, cachedelta=cachedelta,

2378

deltacomputer=deltacomputer)

2381

deltacomputer=deltacomputer)

2379

2382

2380

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2383

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2381

cachedelta=None, deltacomputer=None):

2384

cachedelta=None, deltacomputer=None):

2382

"""add a raw revision with known flags, node and parents

2385

"""add a raw revision with known flags, node and parents

2383

useful when reusing a revision not stored in this revlog (ex: received

2386

useful when reusing a revision not stored in this revlog (ex: received

2384

over wire, or read from an external bundle).

2387

over wire, or read from an external bundle).

2385

"""

2388

"""

2386

dfh = None

2389

dfh = None

2387

if not self._inline:

2390

if not self._inline:

2388

dfh = self._datafp("a+")

2391

dfh = self._datafp("a+")

2389

ifh = self._indexfp("a+")

2392

ifh = self._indexfp("a+")

2390

try:

2393

try:

2391

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2394

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2392

flags, cachedelta, ifh, dfh,

2395

flags, cachedelta, ifh, dfh,

2393

deltacomputer=deltacomputer)

2396

deltacomputer=deltacomputer)

2394

finally:

2397

finally:

2395

if dfh:

2398

if dfh:

2396

dfh.close()

2399

dfh.close()

2397

ifh.close()

2400

ifh.close()

2398

2401

2399

def compress(self, data):

2402

def compress(self, data):

2400

"""Generate a possibly-compressed representation of data."""

2403

"""Generate a possibly-compressed representation of data."""

2401

if not data:

2404

if not data:

2402

return '', data

2405

return '', data

2403

2406

2404

compressed = self._compressor.compress(data)

2407

compressed = self._compressor.compress(data)

2405

2408

2406

if compressed:

2409

if compressed:

2407

# The revlog compressor added the header in the returned data.

2410

# The revlog compressor added the header in the returned data.

2408

return '', compressed

2411

return '', compressed

2409

2412

2410

if data[0:1] == '\0':

2413

if data[0:1] == '\0':

2411

return '', data

2414

return '', data

2412

return 'u', data

2415

return 'u', data

2413

2416

2414

def decompress(self, data):

2417

def decompress(self, data):

2415

"""Decompress a revlog chunk.

2418

"""Decompress a revlog chunk.

2416

2419

2417

The chunk is expected to begin with a header identifying the

2420

The chunk is expected to begin with a header identifying the

2418

format type so it can be routed to an appropriate decompressor.

2421

format type so it can be routed to an appropriate decompressor.

2419

"""

2422

"""

2420

if not data:

2423

if not data:

2421

return data

2424

return data

2422

2425

2423

# Revlogs are read much more frequently than they are written and many

2426

# Revlogs are read much more frequently than they are written and many

2424

# chunks only take microseconds to decompress, so performance is

2427

# chunks only take microseconds to decompress, so performance is

2425

# important here.

2428

# important here.

2426

#

2429

#

2427

# We can make a few assumptions about revlogs:

2430

# We can make a few assumptions about revlogs:

2428

#

2431

#

2429

# 1) the majority of chunks will be compressed (as opposed to inline

2432

# 1) the majority of chunks will be compressed (as opposed to inline

2430

# raw data).

2433

# raw data).

2431

# 2) decompressing *any* data will likely by at least 10x slower than

2434

# 2) decompressing *any* data will likely by at least 10x slower than

2432

# returning raw inline data.

2435

# returning raw inline data.

2433

# 3) we want to prioritize common and officially supported compression

2436

# 3) we want to prioritize common and officially supported compression

2434

# engines

2437

# engines

2435

#

2438

#

2436

# It follows that we want to optimize for "decompress compressed data

2439

# It follows that we want to optimize for "decompress compressed data

2437

# when encoded with common and officially supported compression engines"

2440

# when encoded with common and officially supported compression engines"

2438

# case over "raw data" and "data encoded by less common or non-official

2441

# case over "raw data" and "data encoded by less common or non-official

2439

# compression engines." That is why we have the inline lookup first

2442

# compression engines." That is why we have the inline lookup first

2440

# followed by the compengines lookup.

2443

# followed by the compengines lookup.

2441

#

2444

#

2442

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2445

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2443

# compressed chunks. And this matters for changelog and manifest reads.

2446

# compressed chunks. And this matters for changelog and manifest reads.

2444

t = data[0:1]

2447

t = data[0:1]

2445

2448

2446

if t == 'x':

2449

if t == 'x':

2447

try:

2450

try:

2448

return _zlibdecompress(data)

2451

return _zlibdecompress(data)

2449

except zlib.error as e:

2452

except zlib.error as e:

2450

raise RevlogError(_('revlog decompress error: %s') %

2453

raise RevlogError(_('revlog decompress error: %s') %

2451

stringutil.forcebytestr(e))

2454

stringutil.forcebytestr(e))

2452

# '\0' is more common than 'u' so it goes first.

2455

# '\0' is more common than 'u' so it goes first.

2453

elif t == '\0':

2456

elif t == '\0':

2454

return data

2457

return data

2455

elif t == 'u':

2458

elif t == 'u':

2456

return util.buffer(data, 1)

2459

return util.buffer(data, 1)

2457

2460

2458

try:

2461

try:

2459

compressor = self._decompressors[t]

2462

compressor = self._decompressors[t]

2460

except KeyError:

2463

except KeyError:

2461

try:

2464

try:

2462

engine = util.compengines.forrevlogheader(t)

2465

engine = util.compengines.forrevlogheader(t)

2463

compressor = engine.revlogcompressor()

2466

compressor = engine.revlogcompressor()

2464

self._decompressors[t] = compressor

2467

self._decompressors[t] = compressor

2465

except KeyError:

2468

except KeyError:

2466

raise RevlogError(_('unknown compression type %r') % t)

2469

raise RevlogError(_('unknown compression type %r') % t)

2467

2470

2468

return compressor.decompress(data)

2471

return compressor.decompress(data)

2469

2472

2470

def _isgooddeltainfo(self, deltainfo, revinfo):

2473

def _isgooddeltainfo(self, deltainfo, revinfo):

2471

"""Returns True if the given delta is good. Good means that it is within

2474

"""Returns True if the given delta is good. Good means that it is within

2472

the disk span, disk size, and chain length bounds that we know to be

2475

the disk span, disk size, and chain length bounds that we know to be

2473

performant."""

2476

performant."""

2474

if deltainfo is None:

2477

if deltainfo is None:

2475

return False

2478

return False

2476

2479

2477

# - 'deltainfo.distance' is the distance from the base revision --

2480

# - 'deltainfo.distance' is the distance from the base revision --

2478

# bounding it limits the amount of I/O we need to do.

2481

# bounding it limits the amount of I/O we need to do.

2479

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2482

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2480

# deltas we need to apply -- bounding it limits the amount of CPU

2483

# deltas we need to apply -- bounding it limits the amount of CPU

2481

# we consume.

2484

# we consume.

2482

2485

2483

if self._sparserevlog:

2486

if self._sparserevlog:

2484

# As sparse-read will be used, we can consider that the distance,

2487

# As sparse-read will be used, we can consider that the distance,

2485

# instead of being the span of the whole chunk,

2488

# instead of being the span of the whole chunk,

2486

# is the span of the largest read chunk

2489

# is the span of the largest read chunk

2487

base = deltainfo.base

2490

base = deltainfo.base

2488

2491

2489

if base != nullrev:

2492

if base != nullrev:

2490

deltachain = self._deltachain(base)[0]

2493

deltachain = self._deltachain(base)[0]

2491

else:

2494

else:

2492

deltachain = []

2495

deltachain = []

2493

2496

2494

chunks = _slicechunk(self, deltachain, deltainfo)

2497

chunks = _slicechunk(self, deltachain, deltainfo)

2495

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2498

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2496

distance = max(all_span)

2499

distance = max(all_span)

2497

else:

2500

else:

2498

distance = deltainfo.distance

2501

distance = deltainfo.distance

2499

2502

2500

textlen = revinfo.textlen

2503

textlen = revinfo.textlen

2501

defaultmax = textlen * 4

2504

defaultmax = textlen * 4

2502

maxdist = self._maxdeltachainspan

2505

maxdist = self._maxdeltachainspan

2503

if not maxdist:

2506

if not maxdist:

2504

maxdist = distance # ensure the conditional pass

2507

maxdist = distance # ensure the conditional pass

2505

maxdist = max(maxdist, defaultmax)

2508

maxdist = max(maxdist, defaultmax)

2506

if self._sparserevlog and maxdist < self._srmingapsize:

2509

if self._sparserevlog and maxdist < self._srmingapsize:

2507

# In multiple place, we are ignoring irrelevant data range below a

2510

# In multiple place, we are ignoring irrelevant data range below a

2508

# certain size. Be also apply this tradeoff here and relax span

2511

# certain size. Be also apply this tradeoff here and relax span

2509

# constraint for small enought content.

2512

# constraint for small enought content.

2510

maxdist = self._srmingapsize

2513

maxdist = self._srmingapsize

2511

2514

2512

# Bad delta from read span:

2515

# Bad delta from read span:

2513

#

2516

#

2514

# If the span of data read is larger than the maximum allowed.

2517

# If the span of data read is larger than the maximum allowed.

2515

if maxdist < distance:

2518

if maxdist < distance:

2516

return False

2519

return False

2517

2520

2518

# Bad delta from new delta size:

2521

# Bad delta from new delta size:

2519

#

2522

#

2520

# If the delta size is larger than the target text, storing the

2523

# If the delta size is larger than the target text, storing the

2521

# delta will be inefficient.

2524

# delta will be inefficient.

2522

if textlen < deltainfo.deltalen:

2525

if textlen < deltainfo.deltalen:

2523

return False

2526

return False

2524

2527

2525

# Bad delta from cumulated payload size:

2528

# Bad delta from cumulated payload size:

2526

#

2529

#

2527

# If the sum of delta get larger than K * target text length.

2530

# If the sum of delta get larger than K * target text length.

2528

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2531

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2529

return False

2532

return False

2530

2533

2531

# Bad delta from chain length:

2534

# Bad delta from chain length:

2532

#

2535

#

2533

# If the number of delta in the chain gets too high.

2536

# If the number of delta in the chain gets too high.

2534

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2537

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2535

return False

2538

return False

2536

2539

2537

return True

2540

return True

2538

2541

2539

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2542

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2540

cachedelta, ifh, dfh, alwayscache=False,

2543

cachedelta, ifh, dfh, alwayscache=False,

2541

deltacomputer=None):

2544

deltacomputer=None):

2542

"""internal function to add revisions to the log

2545

"""internal function to add revisions to the log

2543

2546

2544

see addrevision for argument descriptions.

2547

see addrevision for argument descriptions.

2545

2548

2546

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2549

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2547

2550

2548

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2551

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2549

be used.

2552

be used.

2550

2553

2551

invariants:

2554

invariants:

2552

- rawtext is optional (can be None); if not set, cachedelta must be set.

2555

- rawtext is optional (can be None); if not set, cachedelta must be set.

2553

if both are set, they must correspond to each other.

2556

if both are set, they must correspond to each other.

2554

"""

2557

"""

2555

if node == nullid:

2558

if node == nullid:

2556

raise RevlogError(_("%s: attempt to add null revision") %

2559

raise RevlogError(_("%s: attempt to add null revision") %

2557

(self.indexfile))

2560

(self.indexfile))

2558

if node == wdirid or node in wdirfilenodeids:

2561

if node == wdirid or node in wdirfilenodeids:

2559

raise RevlogError(_("%s: attempt to add wdir revision") %

2562

raise RevlogError(_("%s: attempt to add wdir revision") %

2560

(self.indexfile))

2563

(self.indexfile))

2561

2564

2562

if self._inline:

2565

if self._inline:

2563

fh = ifh

2566

fh = ifh

2564

else:

2567

else:

2565

fh = dfh

2568

fh = dfh

2566

2569

2567

btext = [rawtext]

2570

btext = [rawtext]

2568

2571

2569

curr = len(self)

2572

curr = len(self)

2570

prev = curr - 1

2573

prev = curr - 1

2571

offset = self.end(prev)

2574

offset = self.end(prev)

2572

p1r, p2r = self.rev(p1), self.rev(p2)

2575

p1r, p2r = self.rev(p1), self.rev(p2)

2573

2576

2574

# full versions are inserted when the needed deltas

2577

# full versions are inserted when the needed deltas

2575

# become comparable to the uncompressed text

2578

# become comparable to the uncompressed text

2576

if rawtext is None:

2579

if rawtext is None:

2577

# need rawtext size, before changed by flag processors, which is

2580

# need rawtext size, before changed by flag processors, which is

2578

# the non-raw size. use revlog explicitly to avoid filelog's extra

2581

# the non-raw size. use revlog explicitly to avoid filelog's extra

2579

# logic that might remove metadata size.

2582

# logic that might remove metadata size.

2580

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2583

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2581

cachedelta[1])

2584

cachedelta[1])

2582

else:

2585

else:

2583

textlen = len(rawtext)

2586

textlen = len(rawtext)

2584

2587

2585

if deltacomputer is None:

2588

if deltacomputer is None:

2586

deltacomputer = _deltacomputer(self)

2589

deltacomputer = _deltacomputer(self)

2587

2590

2588

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2591

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2589

2592

2590

# no delta for flag processor revision (see "candelta" for why)

2593

# no delta for flag processor revision (see "candelta" for why)

2591

# not calling candelta since only one revision needs test, also to

2594

# not calling candelta since only one revision needs test, also to

2592

# avoid overhead fetching flags again.

2595

# avoid overhead fetching flags again.

2593

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2596

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2594

deltainfo = None

2597

deltainfo = None

2595

else:

2598

else:

2596

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2599

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2597

2600

2598

if deltainfo is not None:

2601

if deltainfo is not None:

2599

base = deltainfo.base

2602

base = deltainfo.base

2600

chainbase = deltainfo.chainbase

2603

chainbase = deltainfo.chainbase

2601

data = deltainfo.data

2604

data = deltainfo.data

2602

l = deltainfo.deltalen

2605

l = deltainfo.deltalen

2603

else:

2606

else:

2604

rawtext = deltacomputer.buildtext(revinfo, fh)

2607

rawtext = deltacomputer.buildtext(revinfo, fh)

2605

data = self.compress(rawtext)

2608

data = self.compress(rawtext)

2606

l = len(data[1]) + len(data[0])

2609

l = len(data[1]) + len(data[0])

2607

base = chainbase = curr

2610

base = chainbase = curr

2608

2611

2609

e = (offset_type(offset, flags), l, textlen,

2612

e = (offset_type(offset, flags), l, textlen,

2610

base, link, p1r, p2r, node)

2613

base, link, p1r, p2r, node)

2611

self.index.append(e)

2614

self.index.append(e)

2612

self.nodemap[node] = curr

2615

self.nodemap[node] = curr

2613

2616

2614

entry = self._io.packentry(e, self.node, self.version, curr)

2617

entry = self._io.packentry(e, self.node, self.version, curr)

2615

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2618

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2616

2619

2617

if alwayscache and rawtext is None:

2620

if alwayscache and rawtext is None:

2618

rawtext = deltacomputer._buildtext(revinfo, fh)

2621

rawtext = deltacomputer._buildtext(revinfo, fh)

2619

2622

2620

if type(rawtext) == bytes: # only accept immutable objects

2623

if type(rawtext) == bytes: # only accept immutable objects

2621

self._cache = (node, curr, rawtext)

2624

self._cache = (node, curr, rawtext)

2622

self._chainbasecache[curr] = chainbase

2625

self._chainbasecache[curr] = chainbase

2623

return node

2626

return node

2624

2627

2625

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2628

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2626

# Files opened in a+ mode have inconsistent behavior on various

2629

# Files opened in a+ mode have inconsistent behavior on various

2627

# platforms. Windows requires that a file positioning call be made

2630

# platforms. Windows requires that a file positioning call be made

2628

# when the file handle transitions between reads and writes. See

2631

# when the file handle transitions between reads and writes. See

2629

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2632

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2630

# platforms, Python or the platform itself can be buggy. Some versions

2633

# platforms, Python or the platform itself can be buggy. Some versions

2631

# of Solaris have been observed to not append at the end of the file

2634

# of Solaris have been observed to not append at the end of the file

2632

# if the file was seeked to before the end. See issue4943 for more.

2635

# if the file was seeked to before the end. See issue4943 for more.

2633

#

2636

#

2634

# We work around this issue by inserting a seek() before writing.

2637

# We work around this issue by inserting a seek() before writing.

2635

# Note: This is likely not necessary on Python 3.

2638

# Note: This is likely not necessary on Python 3.

2636

ifh.seek(0, os.SEEK_END)

2639

ifh.seek(0, os.SEEK_END)

2637

if dfh:

2640

if dfh:

2638

dfh.seek(0, os.SEEK_END)

2641

dfh.seek(0, os.SEEK_END)

2639

2642

2640

curr = len(self) - 1

2643

curr = len(self) - 1

2641

if not self._inline:

2644

if not self._inline:

2642

transaction.add(self.datafile, offset)

2645

transaction.add(self.datafile, offset)

2643

transaction.add(self.indexfile, curr * len(entry))

2646

transaction.add(self.indexfile, curr * len(entry))

2644

if data[0]:

2647

if data[0]:

2645

dfh.write(data[0])

2648

dfh.write(data[0])

2646

dfh.write(data[1])

2649

dfh.write(data[1])

2647

ifh.write(entry)

2650

ifh.write(entry)

2648

else:

2651

else:

2649

offset += curr * self._io.size

2652

offset += curr * self._io.size

2650

transaction.add(self.indexfile, offset, curr)

2653

transaction.add(self.indexfile, offset, curr)

2651

ifh.write(entry)

2654

ifh.write(entry)

2652

ifh.write(data[0])

2655

ifh.write(data[0])

2653

ifh.write(data[1])

2656

ifh.write(data[1])

2654

self._enforceinlinesize(transaction, ifh)

2657

self._enforceinlinesize(transaction, ifh)

2655

2658

2656

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2659

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2657

"""

2660

"""

2658

add a delta group

2661

add a delta group

2659

2662

2660

given a set of deltas, add them to the revision log. the

2663

given a set of deltas, add them to the revision log. the

2661

first delta is against its parent, which should be in our

2664

first delta is against its parent, which should be in our

2662

log, the rest are against the previous delta.

2665

log, the rest are against the previous delta.

2663

2666

2664

If ``addrevisioncb`` is defined, it will be called with arguments of

2667

If ``addrevisioncb`` is defined, it will be called with arguments of

2665

this revlog and the node that was added.

2668

this revlog and the node that was added.

2666

"""

2669

"""

2667

2670

2668

nodes = []

2671

nodes = []

2669

2672

2670

r = len(self)

2673

r = len(self)

2671

end = 0

2674

end = 0

2672

if r:

2675

if r:

2673

end = self.end(r - 1)

2676

end = self.end(r - 1)

2674

ifh = self._indexfp("a+")

2677

ifh = self._indexfp("a+")

2675

isize = r * self._io.size

2678

isize = r * self._io.size

2676

if self._inline:

2679

if self._inline:

2677

transaction.add(self.indexfile, end + isize, r)

2680

transaction.add(self.indexfile, end + isize, r)

2678

dfh = None

2681

dfh = None

2679

else:

2682

else:

2680

transaction.add(self.indexfile, isize, r)

2683

transaction.add(self.indexfile, isize, r)

2681

transaction.add(self.datafile, end)

2684

transaction.add(self.datafile, end)

2682

dfh = self._datafp("a+")

2685

dfh = self._datafp("a+")

2683

def flush():

2686

def flush():

2684

if dfh:

2687

if dfh:

2685

dfh.flush()

2688

dfh.flush()

2686

ifh.flush()

2689

ifh.flush()

2687

try:

2690

try:

2688

deltacomputer = _deltacomputer(self)

2691

deltacomputer = _deltacomputer(self)

2689

# loop through our set of deltas

2692

# loop through our set of deltas

2690

for data in deltas:

2693

for data in deltas:

2691

node, p1, p2, linknode, deltabase, delta, flags = data

2694

node, p1, p2, linknode, deltabase, delta, flags = data

2692

link = linkmapper(linknode)

2695

link = linkmapper(linknode)

2693

flags = flags or REVIDX_DEFAULT_FLAGS

2696

flags = flags or REVIDX_DEFAULT_FLAGS

2694

2697

2695

nodes.append(node)

2698

nodes.append(node)

2696

2699

2697

if node in self.nodemap:

2700

if node in self.nodemap:

2698

# this can happen if two branches make the same change

2701

# this can happen if two branches make the same change

2699

continue

2702

continue

2700

2703

2701

for p in (p1, p2):

2704

for p in (p1, p2):

2702

if p not in self.nodemap:

2705

if p not in self.nodemap:

2703

raise LookupError(p, self.indexfile,

2706

raise LookupError(p, self.indexfile,

2704

_('unknown parent'))

2707

_('unknown parent'))

2705

2708

2706

if deltabase not in self.nodemap:

2709

if deltabase not in self.nodemap:

2707

raise LookupError(deltabase, self.indexfile,

2710

raise LookupError(deltabase, self.indexfile,

2708

_('unknown delta base'))

2711

_('unknown delta base'))

2709

2712

2710

baserev = self.rev(deltabase)

2713

baserev = self.rev(deltabase)

2711

2714

2712

if baserev != nullrev and self.iscensored(baserev):

2715

if baserev != nullrev and self.iscensored(baserev):

2713

# if base is censored, delta must be full replacement in a

2716

# if base is censored, delta must be full replacement in a

2714

# single patch operation

2717

# single patch operation

2715

hlen = struct.calcsize(">lll")

2718

hlen = struct.calcsize(">lll")

2716

oldlen = self.rawsize(baserev)

2719

oldlen = self.rawsize(baserev)

2717

newlen = len(delta) - hlen

2720

newlen = len(delta) - hlen

2718

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2721

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2719

raise error.CensoredBaseError(self.indexfile,

2722

raise error.CensoredBaseError(self.indexfile,

2720

self.node(baserev))

2723

self.node(baserev))

2721

2724

2722

if not flags and self._peek_iscensored(baserev, delta, flush):

2725

if not flags and self._peek_iscensored(baserev, delta, flush):

2723

flags |= REVIDX_ISCENSORED

2726

flags |= REVIDX_ISCENSORED

2724

2727

2725

# We assume consumers of addrevisioncb will want to retrieve

2728

# We assume consumers of addrevisioncb will want to retrieve

2726

# the added revision, which will require a call to

2729

# the added revision, which will require a call to

2727

# revision(). revision() will fast path if there is a cache

2730

# revision(). revision() will fast path if there is a cache

2728

# hit. So, we tell _addrevision() to always cache in this case.

2731

# hit. So, we tell _addrevision() to always cache in this case.

2729

# We're only using addgroup() in the context of changegroup

2732

# We're only using addgroup() in the context of changegroup

2730

# generation so the revision data can always be handled as raw

2733

# generation so the revision data can always be handled as raw

2731

# by the flagprocessor.

2734

# by the flagprocessor.

2732

self._addrevision(node, None, transaction, link,

2735

self._addrevision(node, None, transaction, link,

2733

p1, p2, flags, (baserev, delta),

2736

p1, p2, flags, (baserev, delta),

2734

ifh, dfh,

2737

ifh, dfh,

2735

alwayscache=bool(addrevisioncb),

2738

alwayscache=bool(addrevisioncb),

2736

deltacomputer=deltacomputer)

2739

deltacomputer=deltacomputer)

2737

2740

2738

if addrevisioncb:

2741

if addrevisioncb:

2739

addrevisioncb(self, node)

2742

addrevisioncb(self, node)

2740

2743

2741

if not dfh and not self._inline:

2744

if not dfh and not self._inline:

2742

# addrevision switched from inline to conventional

2745

# addrevision switched from inline to conventional

2743

# reopen the index

2746

# reopen the index

2744

ifh.close()

2747

ifh.close()

2745

dfh = self._datafp("a+")

2748

dfh = self._datafp("a+")

2746

ifh = self._indexfp("a+")

2749

ifh = self._indexfp("a+")

2747

finally:

2750

finally:

2748

if dfh:

2751

if dfh:

2749

dfh.close()

2752

dfh.close()

2750

ifh.close()

2753

ifh.close()

2751

2754

2752

return nodes

2755

return nodes

2753

2756

2754

def iscensored(self, rev):

2757

def iscensored(self, rev):

2755

"""Check if a file revision is censored."""

2758

"""Check if a file revision is censored."""

2756

if not self._censorable:

2759

if not self._censorable:

2757

return False

2760

return False

2758

2761

2759

return self.flags(rev) & REVIDX_ISCENSORED

2762

return self.flags(rev) & REVIDX_ISCENSORED

2760

2763

2761

def _peek_iscensored(self, baserev, delta, flush):

2764

def _peek_iscensored(self, baserev, delta, flush):

2762

"""Quickly check if a delta produces a censored revision."""

2765

"""Quickly check if a delta produces a censored revision."""

2763

if not self._censorable:

2766

if not self._censorable:

2764

return False

2767

return False

2765

2768

2766

# Fragile heuristic: unless new file meta keys are added alphabetically

2769

# Fragile heuristic: unless new file meta keys are added alphabetically

2767

# preceding "censored", all censored revisions are prefixed by

2770

# preceding "censored", all censored revisions are prefixed by

2768

# "\1\ncensored:". A delta producing such a censored revision must be a

2771

# "\1\ncensored:". A delta producing such a censored revision must be a

2769

# full-replacement delta, so we inspect the first and only patch in the

2772

# full-replacement delta, so we inspect the first and only patch in the

2770

# delta for this prefix.

2773

# delta for this prefix.

2771

hlen = struct.calcsize(">lll")

2774

hlen = struct.calcsize(">lll")

2772

if len(delta) <= hlen:

2775

if len(delta) <= hlen:

2773

return False

2776

return False

2774

2777

2775

oldlen = self.rawsize(baserev)

2778

oldlen = self.rawsize(baserev)

2776

newlen = len(delta) - hlen

2779

newlen = len(delta) - hlen

2777

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2780

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2778

return False

2781

return False

2779

2782

2780

add = "\1\ncensored:"

2783

add = "\1\ncensored:"

2781

addlen = len(add)

2784

addlen = len(add)

2782

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2785

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2783

2786

2784

def getstrippoint(self, minlink):

2787

def getstrippoint(self, minlink):

2785

"""find the minimum rev that must be stripped to strip the linkrev

2788

"""find the minimum rev that must be stripped to strip the linkrev

2786

2789

2787

Returns a tuple containing the minimum rev and a set of all revs that

2790

Returns a tuple containing the minimum rev and a set of all revs that

2788

have linkrevs that will be broken by this strip.

2791

have linkrevs that will be broken by this strip.

2789

"""

2792

"""

2790

brokenrevs = set()

2793

brokenrevs = set()

2791

strippoint = len(self)

2794

strippoint = len(self)

2792

2795

2793

heads = {}

2796

heads = {}

2794

futurelargelinkrevs = set()

2797

futurelargelinkrevs = set()

2795

for head in self.headrevs():

2798

for head in self.headrevs():

2796

headlinkrev = self.linkrev(head)

2799

headlinkrev = self.linkrev(head)

2797

heads[head] = headlinkrev

2800

heads[head] = headlinkrev

2798

if headlinkrev >= minlink:

2801

if headlinkrev >= minlink:

2799

futurelargelinkrevs.add(headlinkrev)

2802

futurelargelinkrevs.add(headlinkrev)

2800

2803

2801

# This algorithm involves walking down the rev graph, starting at the

2804

# This algorithm involves walking down the rev graph, starting at the

2802

# heads. Since the revs are topologically sorted according to linkrev,

2805

# heads. Since the revs are topologically sorted according to linkrev,

2803

# once all head linkrevs are below the minlink, we know there are

2806

# once all head linkrevs are below the minlink, we know there are

2804

# no more revs that could have a linkrev greater than minlink.

2807

# no more revs that could have a linkrev greater than minlink.

2805

# So we can stop walking.

2808

# So we can stop walking.

2806

while futurelargelinkrevs:

2809

while futurelargelinkrevs:

2807

strippoint -= 1

2810

strippoint -= 1

2808

linkrev = heads.pop(strippoint)

2811

linkrev = heads.pop(strippoint)

2809

2812

2810

if linkrev < minlink:

2813

if linkrev < minlink:

2811

brokenrevs.add(strippoint)

2814

brokenrevs.add(strippoint)

2812

else:

2815

else:

2813

futurelargelinkrevs.remove(linkrev)

2816

futurelargelinkrevs.remove(linkrev)

2814

2817

2815

for p in self.parentrevs(strippoint):

2818

for p in self.parentrevs(strippoint):

2816

if p != nullrev:

2819

if p != nullrev:

2817

plinkrev = self.linkrev(p)

2820

plinkrev = self.linkrev(p)

2818

heads[p] = plinkrev

2821

heads[p] = plinkrev

2819

if plinkrev >= minlink:

2822

if plinkrev >= minlink:

2820

futurelargelinkrevs.add(plinkrev)

2823

futurelargelinkrevs.add(plinkrev)

2821

2824

2822

return strippoint, brokenrevs

2825

return strippoint, brokenrevs

2823

2826

2824

def strip(self, minlink, transaction):

2827

def strip(self, minlink, transaction):

2825

"""truncate the revlog on the first revision with a linkrev >= minlink

2828

"""truncate the revlog on the first revision with a linkrev >= minlink

2826

2829

2827

This function is called when we're stripping revision minlink and

2830

This function is called when we're stripping revision minlink and

2828

its descendants from the repository.

2831

its descendants from the repository.

2829

2832

2830

We have to remove all revisions with linkrev >= minlink, because

2833

We have to remove all revisions with linkrev >= minlink, because

2831

the equivalent changelog revisions will be renumbered after the

2834

the equivalent changelog revisions will be renumbered after the

2832

strip.

2835

strip.

2833

2836

2834

So we truncate the revlog on the first of these revisions, and

2837

So we truncate the revlog on the first of these revisions, and

2835

trust that the caller has saved the revisions that shouldn't be

2838

trust that the caller has saved the revisions that shouldn't be

2836

removed and that it'll re-add them after this truncation.

2839

removed and that it'll re-add them after this truncation.

2837

"""

2840

"""

2838

if len(self) == 0:

2841

if len(self) == 0:

2839

return

2842

return

2840

2843

2841

rev, _ = self.getstrippoint(minlink)

2844

rev, _ = self.getstrippoint(minlink)

2842

if rev == len(self):

2845

if rev == len(self):

2843

return

2846

return

2844

2847

2845

# first truncate the files on disk

2848

# first truncate the files on disk

2846

end = self.start(rev)

2849

end = self.start(rev)

2847

if not self._inline:

2850

if not self._inline:

2848

transaction.add(self.datafile, end)

2851

transaction.add(self.datafile, end)

2849

end = rev * self._io.size

2852

end = rev * self._io.size

2850

else:

2853

else:

2851

end += rev * self._io.size

2854

end += rev * self._io.size

2852

2855

2853

transaction.add(self.indexfile, end)

2856

transaction.add(self.indexfile, end)

2854

2857

2855

# then reset internal state in memory to forget those revisions

2858

# then reset internal state in memory to forget those revisions

2856

self._cache = None

2859

self._cache = None

2857

self._chaininfocache = {}

2860

self._chaininfocache = {}

2858

self._chunkclear()

2861

self._chunkclear()

2859

for x in pycompat.xrange(rev, len(self)):

2862

for x in pycompat.xrange(rev, len(self)):

2860

del self.nodemap[self.node(x)]

2863

del self.nodemap[self.node(x)]

2861

2864

2862

del self.index[rev:-1]

2865

del self.index[rev:-1]

2863

self._nodepos = None

2866

self._nodepos = None

2864

2867

2865

def checksize(self):

2868

def checksize(self):

2866

expected = 0

2869

expected = 0

2867

if len(self):

2870

if len(self):

2868

expected = max(0, self.end(len(self) - 1))

2871

expected = max(0, self.end(len(self) - 1))

2869

2872

2870

try:

2873

try:

2871

with self._datafp() as f:

2874

with self._datafp() as f:

2872

f.seek(0, 2)

2875

f.seek(0, 2)

2873

actual = f.tell()

2876

actual = f.tell()

2874

dd = actual - expected

2877

dd = actual - expected

2875

except IOError as inst:

2878

except IOError as inst:

2876

if inst.errno != errno.ENOENT:

2879

if inst.errno != errno.ENOENT:

2877

raise

2880

raise

2878

dd = 0

2881

dd = 0

2879

2882

2880

try:

2883

try:

2881

f = self.opener(self.indexfile)

2884

f = self.opener(self.indexfile)

2882

f.seek(0, 2)

2885

f.seek(0, 2)

2883

actual = f.tell()

2886

actual = f.tell()

2884

f.close()

2887

f.close()

2885

s = self._io.size

2888

s = self._io.size

2886

i = max(0, actual // s)

2889

i = max(0, actual // s)

2887

di = actual - (i * s)

2890

di = actual - (i * s)

2888

if self._inline:

2891

if self._inline:

2889

databytes = 0

2892

databytes = 0

2890

for r in self:

2893

for r in self:

2891

databytes += max(0, self.length(r))

2894

databytes += max(0, self.length(r))

2892

dd = 0

2895

dd = 0

2893

di = actual - len(self) * s - databytes

2896

di = actual - len(self) * s - databytes

2894

except IOError as inst:

2897

except IOError as inst:

2895

if inst.errno != errno.ENOENT:

2898

if inst.errno != errno.ENOENT:

2896

raise

2899

raise

2897

di = 0

2900

di = 0

2898

2901

2899

return (dd, di)

2902

return (dd, di)

2900

2903

2901

def files(self):

2904

def files(self):

2902

res = [self.indexfile]

2905

res = [self.indexfile]

2903

if not self._inline:

2906

if not self._inline:

2904

res.append(self.datafile)

2907

res.append(self.datafile)

2905

return res

2908

return res

2906

2909

2907

DELTAREUSEALWAYS = 'always'

2910

DELTAREUSEALWAYS = 'always'

2908

DELTAREUSESAMEREVS = 'samerevs'

2911

DELTAREUSESAMEREVS = 'samerevs'

2909

DELTAREUSENEVER = 'never'

2912

DELTAREUSENEVER = 'never'

2910

2913

2911

DELTAREUSEFULLADD = 'fulladd'

2914

DELTAREUSEFULLADD = 'fulladd'

2912

2915

2913

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2916

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2914

2917

2915

def clone(self, tr, destrevlog, addrevisioncb=None,

2918

def clone(self, tr, destrevlog, addrevisioncb=None,

2916

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2919

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2917

"""Copy this revlog to another, possibly with format changes.

2920

"""Copy this revlog to another, possibly with format changes.

2918

2921

2919

The destination revlog will contain the same revisions and nodes.

2922

The destination revlog will contain the same revisions and nodes.

2920

However, it may not be bit-for-bit identical due to e.g. delta encoding

2923

However, it may not be bit-for-bit identical due to e.g. delta encoding

2921

differences.

2924

differences.

2922

2925

2923

The ``deltareuse`` argument control how deltas from the existing revlog

2926

The ``deltareuse`` argument control how deltas from the existing revlog

2924

are preserved in the destination revlog. The argument can have the

2927

are preserved in the destination revlog. The argument can have the

2925

following values:

2928

following values:

2926

2929

2927

DELTAREUSEALWAYS

2930

DELTAREUSEALWAYS

2928

Deltas will always be reused (if possible), even if the destination

2931

Deltas will always be reused (if possible), even if the destination

2929

revlog would not select the same revisions for the delta. This is the

2932

revlog would not select the same revisions for the delta. This is the

2930

fastest mode of operation.

2933

fastest mode of operation.

2931

DELTAREUSESAMEREVS

2934

DELTAREUSESAMEREVS

2932

Deltas will be reused if the destination revlog would pick the same

2935

Deltas will be reused if the destination revlog would pick the same

2933

revisions for the delta. This mode strikes a balance between speed

2936

revisions for the delta. This mode strikes a balance between speed

2934

and optimization.

2937

and optimization.

2935

DELTAREUSENEVER

2938

DELTAREUSENEVER

2936

Deltas will never be reused. This is the slowest mode of execution.

2939

Deltas will never be reused. This is the slowest mode of execution.

2937

This mode can be used to recompute deltas (e.g. if the diff/delta

2940

This mode can be used to recompute deltas (e.g. if the diff/delta

2938

algorithm changes).

2941

algorithm changes).

2939

2942

2940

Delta computation can be slow, so the choice of delta reuse policy can

2943

Delta computation can be slow, so the choice of delta reuse policy can

2941

significantly affect run time.

2944

significantly affect run time.

2942

2945

2943

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2946

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2944

two extremes. Deltas will be reused if they are appropriate. But if the

2947

two extremes. Deltas will be reused if they are appropriate. But if the

2945

delta could choose a better revision, it will do so. This means if you

2948

delta could choose a better revision, it will do so. This means if you

2946

are converting a non-generaldelta revlog to a generaldelta revlog,

2949

are converting a non-generaldelta revlog to a generaldelta revlog,

2947

deltas will be recomputed if the delta's parent isn't a parent of the

2950

deltas will be recomputed if the delta's parent isn't a parent of the

2948

revision.

2951

revision.

2949

2952

2950

In addition to the delta policy, the ``deltabothparents`` argument

2953

In addition to the delta policy, the ``deltabothparents`` argument

2951

controls whether to compute deltas against both parents for merges.

2954

controls whether to compute deltas against both parents for merges.

2952

By default, the current default is used.

2955

By default, the current default is used.

2953

"""

2956

"""

2954

if deltareuse not in self.DELTAREUSEALL:

2957

if deltareuse not in self.DELTAREUSEALL:

2955

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2958

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2956

2959

2957

if len(destrevlog):

2960

if len(destrevlog):

2958

raise ValueError(_('destination revlog is not empty'))

2961

raise ValueError(_('destination revlog is not empty'))

2959

2962

2960

if getattr(self, 'filteredrevs', None):

2963

if getattr(self, 'filteredrevs', None):

2961

raise ValueError(_('source revlog has filtered revisions'))

2964

raise ValueError(_('source revlog has filtered revisions'))

2962

if getattr(destrevlog, 'filteredrevs', None):

2965

if getattr(destrevlog, 'filteredrevs', None):

2963

raise ValueError(_('destination revlog has filtered revisions'))

2966

raise ValueError(_('destination revlog has filtered revisions'))

2964

2967

2965

# lazydeltabase controls whether to reuse a cached delta, if possible.

2968

# lazydeltabase controls whether to reuse a cached delta, if possible.

2966

oldlazydeltabase = destrevlog._lazydeltabase

2969

oldlazydeltabase = destrevlog._lazydeltabase

2967

oldamd = destrevlog._deltabothparents

2970

oldamd = destrevlog._deltabothparents

2968

2971

2969

try:

2972

try:

2970

if deltareuse == self.DELTAREUSEALWAYS:

2973

if deltareuse == self.DELTAREUSEALWAYS:

2971

destrevlog._lazydeltabase = True

2974

destrevlog._lazydeltabase = True

2972

elif deltareuse == self.DELTAREUSESAMEREVS:

2975

elif deltareuse == self.DELTAREUSESAMEREVS:

2973

destrevlog._lazydeltabase = False

2976

destrevlog._lazydeltabase = False

2974

2977

2975

destrevlog._deltabothparents = deltabothparents or oldamd

2978

destrevlog._deltabothparents = deltabothparents or oldamd

2976

2979

2977

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2980

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2978

self.DELTAREUSESAMEREVS)

2981

self.DELTAREUSESAMEREVS)

2979

2982

2980

deltacomputer = _deltacomputer(destrevlog)

2983

deltacomputer = _deltacomputer(destrevlog)

2981

index = self.index

2984

index = self.index

2982

for rev in self:

2985

for rev in self:

2983

entry = index[rev]

2986

entry = index[rev]

2984

2987

2985

# Some classes override linkrev to take filtered revs into

2988

# Some classes override linkrev to take filtered revs into

2986

# account. Use raw entry from index.

2989

# account. Use raw entry from index.

2987

flags = entry[0] & 0xffff

2990

flags = entry[0] & 0xffff

2988

linkrev = entry[4]

2991

linkrev = entry[4]

2989

p1 = index[entry[5]][7]

2992

p1 = index[entry[5]][7]

2990

p2 = index[entry[6]][7]

2993

p2 = index[entry[6]][7]

2991

node = entry[7]

2994

node = entry[7]

2992

2995

2993

# (Possibly) reuse the delta from the revlog if allowed and

2996

# (Possibly) reuse the delta from the revlog if allowed and

2994

# the revlog chunk is a delta.

2997

# the revlog chunk is a delta.

2995

cachedelta = None

2998

cachedelta = None

2996

rawtext = None

2999

rawtext = None

2997

if populatecachedelta:

3000

if populatecachedelta:

2998

dp = self.deltaparent(rev)

3001

dp = self.deltaparent(rev)

2999

if dp != nullrev:

3002

if dp != nullrev:

3000

cachedelta = (dp, bytes(self._chunk(rev)))

3003

cachedelta = (dp, bytes(self._chunk(rev)))

3001

3004

3002

if not cachedelta:

3005

if not cachedelta:

3003

rawtext = self.revision(rev, raw=True)

3006

rawtext = self.revision(rev, raw=True)

3004

3007

3005

3008

3006

if deltareuse == self.DELTAREUSEFULLADD:

3009

if deltareuse == self.DELTAREUSEFULLADD:

3007

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3010

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3008

cachedelta=cachedelta,

3011

cachedelta=cachedelta,

3009

node=node, flags=flags,

3012

node=node, flags=flags,

3010

deltacomputer=deltacomputer)

3013

deltacomputer=deltacomputer)

3011

else:

3014

else:

3012

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3015

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3013

checkambig=False)

3016

checkambig=False)

3014

dfh = None

3017

dfh = None

3015

if not destrevlog._inline:

3018

if not destrevlog._inline:

3016

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3019

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3017

try:

3020

try:

3018

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3021

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3019

p2, flags, cachedelta, ifh, dfh,

3022

p2, flags, cachedelta, ifh, dfh,

3020

deltacomputer=deltacomputer)

3023

deltacomputer=deltacomputer)

3021

finally:

3024

finally:

3022

if dfh:

3025

if dfh:

3023

dfh.close()

3026

dfh.close()

3024

ifh.close()

3027

ifh.close()

3025

3028

3026

if addrevisioncb:

3029

if addrevisioncb:

3027

addrevisioncb(self, rev, node)

3030

addrevisioncb(self, rev, node)

3028

finally:

3031

finally:

3029

destrevlog._lazydeltabase = oldlazydeltabase

3032

destrevlog._lazydeltabase = oldlazydeltabase

3030

destrevlog._deltabothparents = oldamd

3033

destrevlog._deltabothparents = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def _segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> _segmentspan(revlog, [0, 4])
                 >>> _segmentspan(revlog, [3, 4])
                 >>> _segmentspan(revlog, [1, 2, 3,])
                 >>> _segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = _segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._deltabothparents and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                      chainbase, chainlen, compresseddeltalen)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     if not revinfo.textlen:
                         return None # empty file do not need delta
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     deltainfo = None
                     deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         # filter out delta base that will never produce good delta
                         candidaterevs = [r for r in candidaterevs
                                          if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # skip over empty delta (no need to include them in a chain)
                             while candidaterev != nullrev and not deltalength(candidaterev):
                                 candidaterev = deltaparent(candidaterev)
                             # no need to try a delta against nullid, this will be handled
                             # by fulltext later.
                             if candidaterev == nullrev:
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     length = len(self)
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                         if stop > length:
                             stop = length
                     else:
                         stop = length
                     return pycompat.xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                              _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                                  _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
-                    return False
+                    p1, p2 = self.parentrevs(rev)
+                    if deltap in (p1, p2):
+                        return False
+                    return self.issnapshot(deltap)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     if self._sparserevlog:
                         # As sparse-read will be used, we can consider that the distance,
                         # instead of being the span of the whole chunk,
                         # is the span of the largest read chunk
                         base = deltainfo.base
                         if base != nullrev:
                             deltachain = self._deltachain(base)[0]
                         else:
                             deltachain = []
                         chunks = _slicechunk(self, deltachain, deltainfo)
                         all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
                         distance = max(all_span)
                     else:
                         distance = deltainfo.distance
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if self._sparserevlog and maxdist < self._srmingapsize:
                         # In multiple place, we are ignoring irrelevant data range below a
                         # certain size. Be also apply this tradeoff here and relax span
                         # constraint for small enought content.
                         maxdist = self._srmingapsize
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     if maxdist < distance:
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the
                     #   delta will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd