upstream/mercurial-mirror Commit - r39193:a98e926b

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

95

CensoredNodeError = error.CensoredNodeError

95

CensoredNodeError = error.CensoredNodeError

96

ProgrammingError = error.ProgrammingError

96

ProgrammingError = error.ProgrammingError

97

98

# Store flag processors (cf. 'addflagprocessor()' to register)

98

# Store flag processors (cf. 'addflagprocessor()' to register)

99

_flagprocessors = {

99

_flagprocessors = {

100

REVIDX_ISCENSORED: None,

100

REVIDX_ISCENSORED: None,

101

}

101

}

102

103

_mdre = re.compile('\1\n')

103

_mdre = re.compile('\1\n')

104

def parsemeta(text):

104

def parsemeta(text):

105

"""return (metadatadict, metadatasize)"""

105

"""return (metadatadict, metadatasize)"""

106

# text can be buffer, so we can't use .startswith or .index

106

# text can be buffer, so we can't use .startswith or .index

107

if text[:2] != '\1\n':

107

if text[:2] != '\1\n':

108

return None, None

108

return None, None

109

s = _mdre.search(text, 2).start()

109

s = _mdre.search(text, 2).start()

110

mtext = text[2:s]

110

mtext = text[2:s]

111

meta = {}

111

meta = {}

112

for l in mtext.splitlines():

112

for l in mtext.splitlines():

113

k, v = l.split(": ", 1)

113

k, v = l.split(": ", 1)

114

meta[k] = v

114

meta[k] = v

115

return meta, (s + 2)

115

return meta, (s + 2)

116

117

def packmeta(meta, text):

117

def packmeta(meta, text):

118

keys = sorted(meta)

118

keys = sorted(meta)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

120

return "\1\n%s\1\n%s" % (metatext, text)

120

return "\1\n%s\1\n%s" % (metatext, text)

121

122

def _censoredtext(text):

122

def _censoredtext(text):

123

m, offs = parsemeta(text)

123

m, offs = parsemeta(text)

124

return m and "censored" in m

124

return m and "censored" in m

125

126

def addflagprocessor(flag, processor):

126

def addflagprocessor(flag, processor):

127

"""Register a flag processor on a revision data flag.

127

"""Register a flag processor on a revision data flag.

128

129

Invariant:

129

Invariant:

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

132

- Only one flag processor can be registered on a specific flag.

132

- Only one flag processor can be registered on a specific flag.

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

134

following signatures:

134

following signatures:

135

- (read) f(self, rawtext) -> text, bool

135

- (read) f(self, rawtext) -> text, bool

136

- (write) f(self, text) -> rawtext, bool

136

- (write) f(self, text) -> rawtext, bool

137

- (raw) f(self, rawtext) -> bool

137

- (raw) f(self, rawtext) -> bool

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

139

directly visible to the user.

139

directly visible to the user.

140

The boolean returned by these transforms is used to determine whether

140

The boolean returned by these transforms is used to determine whether

141

the returned text can be used for hash integrity checking. For example,

141

the returned text can be used for hash integrity checking. For example,

142

if "write" returns False, then "text" is used to generate hash. If

142

if "write" returns False, then "text" is used to generate hash. If

143

"write" returns True, that basically means "rawtext" returned by "write"

143

"write" returns True, that basically means "rawtext" returned by "write"

144

should be used to generate hash. Usually, "write" and "read" return

144

should be used to generate hash. Usually, "write" and "read" return

145

different booleans. And "raw" returns a same boolean as "write".

145

different booleans. And "raw" returns a same boolean as "write".

146

147

Note: The 'raw' transform is used for changegroup generation and in some

147

Note: The 'raw' transform is used for changegroup generation and in some

148

debug commands. In this case the transform only indicates whether the

148

debug commands. In this case the transform only indicates whether the

149

contents can be used for hash integrity checks.

149

contents can be used for hash integrity checks.

150

"""

150

"""

151

if not flag & REVIDX_KNOWN_FLAGS:

151

if not flag & REVIDX_KNOWN_FLAGS:

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

153

raise ProgrammingError(msg)

153

raise ProgrammingError(msg)

154

if flag not in REVIDX_FLAGS_ORDER:

154

if flag not in REVIDX_FLAGS_ORDER:

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

156

raise ProgrammingError(msg)

156

raise ProgrammingError(msg)

157

if flag in _flagprocessors:

157

if flag in _flagprocessors:

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

159

raise error.Abort(msg)

159

raise error.Abort(msg)

160

_flagprocessors[flag] = processor

160

_flagprocessors[flag] = processor

161

162

def getoffset(q):

162

def getoffset(q):

163

return int(q >> 16)

163

return int(q >> 16)

164

165

def gettype(q):

165

def gettype(q):

166

return int(q & 0xFFFF)

166

return int(q & 0xFFFF)

167

168

def offset_type(offset, type):

168

def offset_type(offset, type):

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

170

raise ValueError('unknown revlog index flags')

170

raise ValueError('unknown revlog index flags')

171

return int(int(offset) << 16 | type)

171

return int(int(offset) << 16 | type)

172

173

_nullhash = hashlib.sha1(nullid)

173

_nullhash = hashlib.sha1(nullid)

174

175

def hash(text, p1, p2):

175

def hash(text, p1, p2):

176

"""generate a hash from the given text and its parent hashes

176

"""generate a hash from the given text and its parent hashes

177

178

This hash combines both the current file contents and its history

178

This hash combines both the current file contents and its history

179

in a manner that makes it easy to distinguish nodes with the same

179

in a manner that makes it easy to distinguish nodes with the same

180

content in the revision graph.

180

content in the revision graph.

181

"""

181

"""

182

# As of now, if one of the parent node is null, p2 is null

182

# As of now, if one of the parent node is null, p2 is null

183

if p2 == nullid:

183

if p2 == nullid:

184

# deep copy of a hash is faster than creating one

184

# deep copy of a hash is faster than creating one

185

s = _nullhash.copy()

185

s = _nullhash.copy()

186

s.update(p1)

186

s.update(p1)

187

else:

187

else:

188

# none of the parent nodes are nullid

188

# none of the parent nodes are nullid

189

if p1 < p2:

189

if p1 < p2:

190

a = p1

190

a = p1

191

b = p2

191

b = p2

192

else:

192

else:

193

a = p2

193

a = p2

194

b = p1

194

b = p1

195

s = hashlib.sha1(a)

195

s = hashlib.sha1(a)

196

s.update(b)

196

s.update(b)

197

s.update(text)

197

s.update(text)

198

return s.digest()

198

return s.digest()

199

200

class _testrevlog(object):

200

class _testrevlog(object):

201

"""minimalist fake revlog to use in doctests"""

201

"""minimalist fake revlog to use in doctests"""

202

203

def __init__(self, data, density=0.5, mingap=0):

203

def __init__(self, data, density=0.5, mingap=0):

204

"""data is an list of revision payload boundaries"""

204

"""data is an list of revision payload boundaries"""

205

self._data = data

205

self._data = data

206

self._srdensitythreshold = density

206

self._srdensitythreshold = density

207

self._srmingapsize = mingap

207

self._srmingapsize = mingap

208

209

def start(self, rev):

209

def start(self, rev):

210

if rev == 0:

210

if rev == 0:

211

return 0

211

return 0

212

return self._data[rev - 1]

212

return self._data[rev - 1]

213

214

def end(self, rev):

214

def end(self, rev):

215

return self._data[rev]

215

return self._data[rev]

216

217

def length(self, rev):

217

def length(self, rev):

218

return self.end(rev) - self.start(rev)

218

return self.end(rev) - self.start(rev)

219

220

def __len__(self):

220

def __len__(self):

221

return len(self._data)

221

return len(self._data)

222

223

def _trimchunk(revlog, revs, startidx, endidx=None):

223

def _trimchunk(revlog, revs, startidx, endidx=None):

224

"""returns revs[startidx:endidx] without empty trailing revs

224

"""returns revs[startidx:endidx] without empty trailing revs

225

226

Doctest Setup

226

Doctest Setup

227

>>> revlog = _testrevlog([

227

>>> revlog = _testrevlog([

228

... 5, #0

228

... 5, #0

229

... 10, #1

229

... 10, #1

230

... 12, #2

230

... 12, #2

231

... 12, #3 (empty)

231

... 12, #3 (empty)

232

... 17, #4

232

... 17, #4

233

... 21, #5

233

... 21, #5

234

... 21, #6 (empty)

234

... 21, #6 (empty)

235

... ])

235

... ])

236

237

Contiguous cases:

237

Contiguous cases:

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

239

[0, 1, 2, 3, 4, 5]

239

[0, 1, 2, 3, 4, 5]

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

241

[0, 1, 2, 3, 4]

241

[0, 1, 2, 3, 4]

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

243

[0, 1, 2]

243

[0, 1, 2]

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

245

[2]

245

[2]

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

247

[3, 4, 5]

247

[3, 4, 5]

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

249

[3, 4]

249

[3, 4]

250

251

Discontiguous cases:

251

Discontiguous cases:

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

253

[1, 3, 5]

253

[1, 3, 5]

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

255

[1]

255

[1]

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

257

[3, 5]

257

[3, 5]

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

259

[3, 5]

259

[3, 5]

260

"""

260

"""

261

length = revlog.length

261

length = revlog.length

262

263

if endidx is None:

263

if endidx is None:

264

endidx = len(revs)

264

endidx = len(revs)

265

266

# If we have a non-emtpy delta candidate, there are nothing to trim

266

# If we have a non-emtpy delta candidate, there are nothing to trim

267

if revs[endidx - 1] < len(revlog):

267

if revs[endidx - 1] < len(revlog):

268

# Trim empty revs at the end, except the very first revision of a chain

268

# Trim empty revs at the end, except the very first revision of a chain

269

while (endidx > 1

269

while (endidx > 1

270

and endidx > startidx

270

and endidx > startidx

271

and length(revs[endidx - 1]) == 0):

271

and length(revs[endidx - 1]) == 0):

272

endidx -= 1

272

endidx -= 1

273

274

return revs[startidx:endidx]

274

return revs[startidx:endidx]

275

276

def _segmentspan(revlog, revs, deltainfo=None):

276

def _segmentspan(revlog, revs, deltainfo=None):

277

"""Get the byte span of a segment of revisions

277

"""Get the byte span of a segment of revisions

278

279

revs is a sorted array of revision numbers

279

revs is a sorted array of revision numbers

280

281

>>> revlog = _testrevlog([

281

>>> revlog = _testrevlog([

282

... 5, #0

282

... 5, #0

283

... 10, #1

283

... 10, #1

284

... 12, #2

284

... 12, #2

285

... 12, #3 (empty)

285

... 12, #3 (empty)

286

... 17, #4

286

... 17, #4

287

... ])

287

... ])

288

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

290

17

290

17

291

>>> _segmentspan(revlog, [0, 4])

291

>>> _segmentspan(revlog, [0, 4])

292

17

292

17

293

>>> _segmentspan(revlog, [3, 4])

293

>>> _segmentspan(revlog, [3, 4])

294

5

294

5

295

>>> _segmentspan(revlog, [1, 2, 3,])

295

>>> _segmentspan(revlog, [1, 2, 3,])

296

7

296

7

297

>>> _segmentspan(revlog, [1, 3])

297

>>> _segmentspan(revlog, [1, 3])

298

7

298

7

299

"""

299

"""

300

if not revs:

300

if not revs:

301

return 0

301

return 0

302

if deltainfo is not None and len(revlog) <= revs[-1]:

302

if deltainfo is not None and len(revlog) <= revs[-1]:

303

if len(revs) == 1:

303

if len(revs) == 1:

304

return deltainfo.deltalen

304

return deltainfo.deltalen

305

offset = revlog.end(len(revlog) - 1)

305

offset = revlog.end(len(revlog) - 1)

306

end = deltainfo.deltalen + offset

306

end = deltainfo.deltalen + offset

307

else:

307

else:

308

end = revlog.end(revs[-1])

308

end = revlog.end(revs[-1])

309

return end - revlog.start(revs[0])

309

return end - revlog.start(revs[0])

310

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

313

314

``revs`` is sliced into groups that should be read in one time.

314

``revs`` is sliced into groups that should be read in one time.

315

Assume that revs are sorted.

315

Assume that revs are sorted.

316

317

The initial chunk is sliced until the overall density (payload/chunks-span

317

The initial chunk is sliced until the overall density (payload/chunks-span

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

319

`revlog._srmingapsize` is skipped.

319

`revlog._srmingapsize` is skipped.

320

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

322

For consistency with other slicing choice, this limit won't go lower than

322

For consistency with other slicing choice, this limit won't go lower than

323

`revlog._srmingapsize`.

323

`revlog._srmingapsize`.

324

325

If individual revisions chunk are larger than this limit, they will still

325

If individual revisions chunk are larger than this limit, they will still

326

be raised individually.

326

be raised individually.

327

328

>>> revlog = _testrevlog([

328

>>> revlog = _testrevlog([

329

... 5, #00 (5)

329

... 5, #00 (5)

330

... 10, #01 (5)

330

... 10, #01 (5)

331

... 12, #02 (2)

331

... 12, #02 (2)

332

... 12, #03 (empty)

332

... 12, #03 (empty)

333

... 27, #04 (15)

333

... 27, #04 (15)

334

... 31, #05 (4)

334

... 31, #05 (4)

335

... 31, #06 (empty)

335

... 31, #06 (empty)

336

... 42, #07 (11)

336

... 42, #07 (11)

337

... 47, #08 (5)

337

... 47, #08 (5)

338

... 47, #09 (empty)

338

... 47, #09 (empty)

339

... 48, #10 (1)

339

... 48, #10 (1)

340

... 51, #11 (3)

340

... 51, #11 (3)

341

... 74, #12 (23)

341

... 74, #12 (23)

342

... 85, #13 (11)

342

... 85, #13 (11)

343

... 86, #14 (1)

343

... 86, #14 (1)

344

... 91, #15 (5)

344

... 91, #15 (5)

345

... ])

345

... ])

346

347

>>> list(_slicechunk(revlog, list(range(16))))

347

>>> list(_slicechunk(revlog, list(range(16))))

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

349

>>> list(_slicechunk(revlog, [0, 15]))

349

>>> list(_slicechunk(revlog, [0, 15]))

350

[[0], [15]]

350

[[0], [15]]

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

352

[[0], [11], [15]]

352

[[0], [11], [15]]

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

354

[[0], [11, 13, 15]]

354

[[0], [11, 13, 15]]

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

356

[[1, 2], [5, 8, 10, 11], [14]]

356

[[1, 2], [5, 8, 10, 11], [14]]

357

358

Slicing with a maximum chunk size

358

Slicing with a maximum chunk size

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

360

[[0], [11], [13], [15]]

360

[[0], [11], [13], [15]]

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

362

[[0], [11], [13, 15]]

362

[[0], [11], [13, 15]]

363

"""

363

"""

364

if targetsize is not None:

364

if targetsize is not None:

365

targetsize = max(targetsize, revlog._srmingapsize)

365

targetsize = max(targetsize, revlog._srmingapsize)

366

# targetsize should not be specified when evaluating delta candidates:

366

# targetsize should not be specified when evaluating delta candidates:

367

# * targetsize is used to ensure we stay within specification when reading,

367

# * targetsize is used to ensure we stay within specification when reading,

368

# * deltainfo is used to pick are good delta chain when writing.

368

# * deltainfo is used to pick are good delta chain when writing.

369

if not (deltainfo is None or targetsize is None):

369

if not (deltainfo is None or targetsize is None):

370

msg = 'cannot use `targetsize` with a `deltainfo`'

370

msg = 'cannot use `targetsize` with a `deltainfo`'

371

raise error.ProgrammingError(msg)

371

raise error.ProgrammingError(msg)

372

for chunk in _slicechunktodensity(revlog, revs,

372

for chunk in _slicechunktodensity(revlog, revs,

373

deltainfo,

373

deltainfo,

374

revlog._srdensitythreshold,

374

revlog._srdensitythreshold,

375

revlog._srmingapsize):

375

revlog._srmingapsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

377

yield subchunk

377

yield subchunk

378

379

def _slicechunktosize(revlog, revs, targetsize=None):

379

def _slicechunktosize(revlog, revs, targetsize=None):

380

"""slice revs to match the target size

380

"""slice revs to match the target size

381

382

This is intended to be used on chunk that density slicing selected by that

382

This is intended to be used on chunk that density slicing selected by that

383

are still too large compared to the read garantee of revlog. This might

383

are still too large compared to the read garantee of revlog. This might

384

happens when "minimal gap size" interrupted the slicing or when chain are

384

happens when "minimal gap size" interrupted the slicing or when chain are

385

built in a way that create large blocks next to each other.

385

built in a way that create large blocks next to each other.

386

387

>>> revlog = _testrevlog([

387

>>> revlog = _testrevlog([

388

... 3, #0 (3)

388

... 3, #0 (3)

389

... 5, #1 (2)

389

... 5, #1 (2)

390

... 6, #2 (1)

390

... 6, #2 (1)

391

... 8, #3 (2)

391

... 8, #3 (2)

392

... 8, #4 (empty)

392

... 8, #4 (empty)

393

... 11, #5 (3)

393

... 11, #5 (3)

394

... 12, #6 (1)

394

... 12, #6 (1)

395

... 13, #7 (1)

395

... 13, #7 (1)

396

... 14, #8 (1)

396

... 14, #8 (1)

397

... ])

397

... ])

398

399

Cases where chunk is already small enough

399

Cases where chunk is already small enough

400

>>> list(_slicechunktosize(revlog, [0], 3))

400

>>> list(_slicechunktosize(revlog, [0], 3))

401

[[0]]

401

[[0]]

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

403

[[6, 7]]

403

[[6, 7]]

404

>>> list(_slicechunktosize(revlog, [0], None))

404

>>> list(_slicechunktosize(revlog, [0], None))

405

[[0]]

405

[[0]]

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

407

[[6, 7]]

407

[[6, 7]]

408

409

cases where we need actual slicing

409

cases where we need actual slicing

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

411

[[0], [1]]

411

[[0], [1]]

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

413

[[1], [3]]

413

[[1], [3]]

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

415

[[1, 2], [3]]

415

[[1, 2], [3]]

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

417

[[3], [5]]

417

[[3], [5]]

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

419

[[3], [5]]

419

[[3], [5]]

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

421

[[5], [6, 7, 8]]

421

[[5], [6, 7, 8]]

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

424

425

Case with too large individual chunk (must return valid chunk)

425

Case with too large individual chunk (must return valid chunk)

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

427

[[0], [1]]

427

[[0], [1]]

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

429

[[1], [3]]

429

[[1], [3]]

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

431

[[3], [5]]

431

[[3], [5]]

432

"""

432

"""

433

assert targetsize is None or 0 <= targetsize

433

assert targetsize is None or 0 <= targetsize

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

435

yield revs

435

yield revs

436

return

436

return

437

438

startrevidx = 0

438

startrevidx = 0

439

startdata = revlog.start(revs[0])

439

startdata = revlog.start(revs[0])

440

endrevidx = 0

440

endrevidx = 0

441

iterrevs = enumerate(revs)

441

iterrevs = enumerate(revs)

442

next(iterrevs) # skip first rev.

442

next(iterrevs) # skip first rev.

443

for idx, r in iterrevs:

443

for idx, r in iterrevs:

444

span = revlog.end(r) - startdata

444

span = revlog.end(r) - startdata

445

if span <= targetsize:

445

if span <= targetsize:

446

endrevidx = idx

446

endrevidx = idx

447

else:

447

else:

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

449

if chunk:

449

if chunk:

450

yield chunk

450

yield chunk

451

startrevidx = idx

451

startrevidx = idx

452

startdata = revlog.start(r)

452

startdata = revlog.start(r)

453

endrevidx = idx

453

endrevidx = idx

454

yield _trimchunk(revlog, revs, startrevidx)

454

yield _trimchunk(revlog, revs, startrevidx)

455

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

457

mingapsize=0):

457

mingapsize=0):

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

459

460

``revs`` is sliced into groups that should be read in one time.

460

``revs`` is sliced into groups that should be read in one time.

461

Assume that revs are sorted.

461

Assume that revs are sorted.

462

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

464

to the top of the revlog.

464

to the top of the revlog.

465

466

The initial chunk is sliced until the overall density (payload/chunks-span

466

The initial chunk is sliced until the overall density (payload/chunks-span

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

468

skipped.

468

skipped.

469

470

>>> revlog = _testrevlog([

470

>>> revlog = _testrevlog([

471

... 5, #00 (5)

471

... 5, #00 (5)

472

... 10, #01 (5)

472

... 10, #01 (5)

473

... 12, #02 (2)

473

... 12, #02 (2)

474

... 12, #03 (empty)

474

... 12, #03 (empty)

475

... 27, #04 (15)

475

... 27, #04 (15)

476

... 31, #05 (4)

476

... 31, #05 (4)

477

... 31, #06 (empty)

477

... 31, #06 (empty)

478

... 42, #07 (11)

478

... 42, #07 (11)

479

... 47, #08 (5)

479

... 47, #08 (5)

480

... 47, #09 (empty)

480

... 47, #09 (empty)

481

... 48, #10 (1)

481

... 48, #10 (1)

482

... 51, #11 (3)

482

... 51, #11 (3)

483

... 74, #12 (23)

483

... 74, #12 (23)

484

... 85, #13 (11)

484

... 85, #13 (11)

485

... 86, #14 (1)

485

... 86, #14 (1)

486

... 91, #15 (5)

486

... 91, #15 (5)

487

... ])

487

... ])

488

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

492

[[0], [15]]

492

[[0], [15]]

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

494

[[0], [11], [15]]

494

[[0], [11], [15]]

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

496

[[0], [11, 13, 15]]

496

[[0], [11, 13, 15]]

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

498

[[1, 2], [5, 8, 10, 11], [14]]

498

[[1, 2], [5, 8, 10, 11], [14]]

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

500

... mingapsize=20))

500

... mingapsize=20))

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

503

... targetdensity=0.95))

503

... targetdensity=0.95))

504

[[1, 2], [5], [8, 10, 11], [14]]

504

[[1, 2], [5], [8, 10, 11], [14]]

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

506

... targetdensity=0.95, mingapsize=12))

506

... targetdensity=0.95, mingapsize=12))

507

[[1, 2], [5, 8, 10, 11], [14]]

507

[[1, 2], [5, 8, 10, 11], [14]]

508

"""

508

"""

509

start = revlog.start

509

start = revlog.start

510

length = revlog.length

510

length = revlog.length

511

512

if len(revs) <= 1:

512

if len(revs) <= 1:

513

yield revs

513

yield revs

514

return

514

return

515

516

nextrev = len(revlog)

516

nextrev = len(revlog)

517

nextoffset = revlog.end(nextrev - 1)

517

nextoffset = revlog.end(nextrev - 1)

518

519

if deltainfo is None:

519

if deltainfo is None:

520

deltachainspan = _segmentspan(revlog, revs)

520

deltachainspan = _segmentspan(revlog, revs)

521

chainpayload = sum(length(r) for r in revs)

521

chainpayload = sum(length(r) for r in revs)

522

else:

522

else:

523

deltachainspan = deltainfo.distance

523

deltachainspan = deltainfo.distance

524

chainpayload = deltainfo.compresseddeltalen

524

chainpayload = deltainfo.compresseddeltalen

525

526

if deltachainspan < mingapsize:

526

if deltachainspan < mingapsize:

527

yield revs

527

yield revs

528

return

528

return

529

530

readdata = deltachainspan

530

readdata = deltachainspan

531

532

if deltachainspan:

532

if deltachainspan:

533

density = chainpayload / float(deltachainspan)

533

density = chainpayload / float(deltachainspan)

534

else:

534

else:

535

density = 1.0

535

density = 1.0

536

537

if density >= targetdensity:

537

if density >= targetdensity:

538

yield revs

538

yield revs

539

return

539

return

540

541

if deltainfo is not None and deltainfo.deltalen:

541

if deltainfo is not None and deltainfo.deltalen:

542

revs = list(revs)

542

revs = list(revs)

543

revs.append(nextrev)

543

revs.append(nextrev)

544

545

# Store the gaps in a heap to have them sorted by decreasing size

545

# Store the gaps in a heap to have them sorted by decreasing size

546

gapsheap = []

546

gapsheap = []

547

heapq.heapify(gapsheap)

547

heapq.heapify(gapsheap)

548

prevend = None

548

prevend = None

549

for i, rev in enumerate(revs):

549

for i, rev in enumerate(revs):

550

if rev < nextrev:

550

if rev < nextrev:

551

revstart = start(rev)

551

revstart = start(rev)

552

revlen = length(rev)

552

revlen = length(rev)

553

else:

553

else:

554

revstart = nextoffset

554

revstart = nextoffset

555

revlen = deltainfo.deltalen

555

revlen = deltainfo.deltalen

556

557

# Skip empty revisions to form larger holes

557

# Skip empty revisions to form larger holes

558

if revlen == 0:

558

if revlen == 0:

559

continue

559

continue

560

561

if prevend is not None:

561

if prevend is not None:

562

gapsize = revstart - prevend

562

gapsize = revstart - prevend

563

# only consider holes that are large enough

563

# only consider holes that are large enough

564

if gapsize > mingapsize:

564

if gapsize > mingapsize:

565

heapq.heappush(gapsheap, (-gapsize, i))

565

heapq.heappush(gapsheap, (-gapsize, i))

566

567

prevend = revstart + revlen

567

prevend = revstart + revlen

568

569

# Collect the indices of the largest holes until the density is acceptable

569

# Collect the indices of the largest holes until the density is acceptable

570

indicesheap = []

570

indicesheap = []

571

heapq.heapify(indicesheap)

571

heapq.heapify(indicesheap)

572

while gapsheap and density < targetdensity:

572

while gapsheap and density < targetdensity:

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

574

575

heapq.heappush(indicesheap, gapidx)

575

heapq.heappush(indicesheap, gapidx)

576

577

# the gap sizes are stored as negatives to be sorted decreasingly

577

# the gap sizes are stored as negatives to be sorted decreasingly

578

# by the heap

578

# by the heap

579

readdata -= (-oppgapsize)

579

readdata -= (-oppgapsize)

580

if readdata > 0:

580

if readdata > 0:

581

density = chainpayload / float(readdata)

581

density = chainpayload / float(readdata)

582

else:

582

else:

583

density = 1.0

583

density = 1.0

584

585

# Cut the revs at collected indices

585

# Cut the revs at collected indices

586

previdx = 0

586

previdx = 0

587

while indicesheap:

587

while indicesheap:

588

idx = heapq.heappop(indicesheap)

588

idx = heapq.heappop(indicesheap)

589

590

chunk = _trimchunk(revlog, revs, previdx, idx)

590

chunk = _trimchunk(revlog, revs, previdx, idx)

591

if chunk:

591

if chunk:

592

yield chunk

592

yield chunk

593

594

previdx = idx

594

previdx = idx

595

596

chunk = _trimchunk(revlog, revs, previdx)

596

chunk = _trimchunk(revlog, revs, previdx)

597

if chunk:

597

if chunk:

598

yield chunk

598

yield chunk

599

600

@attr.s(slots=True, frozen=True)

600

@attr.s(slots=True, frozen=True)

601

class _deltainfo(object):

601

class _deltainfo(object):

602

distance = attr.ib()

602

distance = attr.ib()

603

deltalen = attr.ib()

603

deltalen = attr.ib()

604

data = attr.ib()

604

data = attr.ib()

605

base = attr.ib()

605

base = attr.ib()

606

chainbase = attr.ib()

606

chainbase = attr.ib()

607

chainlen = attr.ib()

607

chainlen = attr.ib()

608

compresseddeltalen = attr.ib()

608

compresseddeltalen = attr.ib()

609

snapshotdepth = attr.ib()

609

snapshotdepth = attr.ib()

610

611

class _deltacomputer(object):

611

class _deltacomputer(object):

612

def __init__(self, revlog):

612

def __init__(self, revlog):

613

self.revlog = revlog

613

self.revlog = revlog

614

615

def _getcandidaterevs(self, p1, p2, cachedelta):

615

def _getcandidaterevs(self, p1, p2, cachedelta):

616

"""

616

"""

617

Provides revisions that present an interest to be diffed against,

617

Provides revisions that present an interest to be diffed against,

618

grouped by level of easiness.

618

grouped by level of easiness.

619

"""

619

"""

620

revlog = self.revlog

620

revlog = self.revlog

621

gdelta = revlog._generaldelta

621

gdelta = revlog._generaldelta

622

curr = len(revlog)

622

curr = len(revlog)

623

prev = curr - 1

623

prev = curr - 1

624

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

624

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

625

626

# should we try to build a delta?

626

# should we try to build a delta?

627

if prev != nullrev and revlog.storedeltachains:

627

if prev != nullrev and revlog.storedeltachains:

628

tested = set()

628

tested = set()

629

# This condition is true most of the time when processing

629

# This condition is true most of the time when processing

630

# changegroup data into a generaldelta repo. The only time it

630

# changegroup data into a generaldelta repo. The only time it

631

# isn't true is if this is the first revision in a delta chain

631

# isn't true is if this is the first revision in a delta chain

632

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

632

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

633

if cachedelta and gdelta and revlog._lazydeltabase:

633

if cachedelta and gdelta and revlog._lazydeltabase:

634

# Assume what we received from the server is a good choice

634

# Assume what we received from the server is a good choice

635

# build delta will reuse the cache

635

# build delta will reuse the cache

636

yield (cachedelta[0],)

636

yield (cachedelta[0],)

637

tested.add(cachedelta[0])

637

tested.add(cachedelta[0])

638

639

if gdelta:

639

if gdelta:

640

# exclude already lazy tested base if any

640

# exclude already lazy tested base if any

641

parents = [p for p in (p1r, p2r)

641

parents = [p for p in (p1r, p2r)

642

if p != nullrev and p not in tested]

642

if p != nullrev and p not in tested]

643

644

if not revlog._deltabothparents and len(parents) == 2:

644

if not revlog._deltabothparents and len(parents) == 2:

645

parents.sort()

645

parents.sort()

646

# To minimize the chance of having to build a fulltext,

646

# To minimize the chance of having to build a fulltext,

647

# pick first whichever parent is closest to us (max rev)

647

# pick first whichever parent is closest to us (max rev)

648

yield (parents[1],)

648

yield (parents[1],)

649

# then the other one (min rev) if the first did not fit

649

# then the other one (min rev) if the first did not fit

650

yield (parents[0],)

650

yield (parents[0],)

651

tested.update(parents)

651

tested.update(parents)

652

elif len(parents) > 0:

652

elif len(parents) > 0:

653

# Test all parents (1 or 2), and keep the best candidate

653

# Test all parents (1 or 2), and keep the best candidate

654

yield parents

654

yield parents

655

tested.update(parents)

655

tested.update(parents)

656

657

if prev not in tested:

657

if prev not in tested:

658

# other approach failed try against prev to hopefully save us a

658

# other approach failed try against prev to hopefully save us a

659

# fulltext.

659

# fulltext.

660

yield (prev,)

660

yield (prev,)

661

tested.add(prev)

661

tested.add(prev)

662

663

def buildtext(self, revinfo, fh):

663

def buildtext(self, revinfo, fh):

664

"""Builds a fulltext version of a revision

664

"""Builds a fulltext version of a revision

665

666

revinfo: _revisioninfo instance that contains all needed info

666

revinfo: _revisioninfo instance that contains all needed info

667

fh: file handle to either the .i or the .d revlog file,

667

fh: file handle to either the .i or the .d revlog file,

668

depending on whether it is inlined or not

668

depending on whether it is inlined or not

669

"""

669

"""

670

btext = revinfo.btext

670

btext = revinfo.btext

671

if btext[0] is not None:

671

if btext[0] is not None:

672

return btext[0]

672

return btext[0]

673

674

revlog = self.revlog

674

revlog = self.revlog

675

cachedelta = revinfo.cachedelta

675

cachedelta = revinfo.cachedelta

676

flags = revinfo.flags

676

flags = revinfo.flags

677

node = revinfo.node

677

node = revinfo.node

678

679

baserev = cachedelta[0]

679

baserev = cachedelta[0]

680

delta = cachedelta[1]

680

delta = cachedelta[1]

681

# special case deltas which replace entire base; no need to decode

681

# special case deltas which replace entire base; no need to decode

682

# base revision. this neatly avoids censored bases, which throw when

682

# base revision. this neatly avoids censored bases, which throw when

683

# they're decoded.

683

# they're decoded.

684

hlen = struct.calcsize(">lll")

684

hlen = struct.calcsize(">lll")

685

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

685

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

686

len(delta) - hlen):

686

len(delta) - hlen):

687

btext[0] = delta[hlen:]

687

btext[0] = delta[hlen:]

688

else:

688

else:

689

# deltabase is rawtext before changed by flag processors, which is

689

# deltabase is rawtext before changed by flag processors, which is

690

# equivalent to non-raw text

690

# equivalent to non-raw text

691

basetext = revlog.revision(baserev, _df=fh, raw=False)

691

basetext = revlog.revision(baserev, _df=fh, raw=False)

692

btext[0] = mdiff.patch(basetext, delta)

692

btext[0] = mdiff.patch(basetext, delta)

693

694

try:

694

try:

695

res = revlog._processflags(btext[0], flags, 'read', raw=True)

695

res = revlog._processflags(btext[0], flags, 'read', raw=True)

696

btext[0], validatehash = res

696

btext[0], validatehash = res

697

if validatehash:

697

if validatehash:

698

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

698

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

699

if flags & REVIDX_ISCENSORED:

699

if flags & REVIDX_ISCENSORED:

700

raise RevlogError(_('node %s is not censored') % node)

700

raise RevlogError(_('node %s is not censored') % node)

701

except CensoredNodeError:

701

except CensoredNodeError:

702

# must pass the censored index flag to add censored revisions

702

# must pass the censored index flag to add censored revisions

703

if not flags & REVIDX_ISCENSORED:

703

if not flags & REVIDX_ISCENSORED:

704

raise

704

raise

705

return btext[0]

705

return btext[0]

706

707

def _builddeltadiff(self, base, revinfo, fh):

707

def _builddeltadiff(self, base, revinfo, fh):

708

revlog = self.revlog

708

revlog = self.revlog

709

t = self.buildtext(revinfo, fh)

709

t = self.buildtext(revinfo, fh)

710

if revlog.iscensored(base):

710

if revlog.iscensored(base):

711

# deltas based on a censored revision must replace the

711

# deltas based on a censored revision must replace the

712

# full content in one patch, so delta works everywhere

712

# full content in one patch, so delta works everywhere

713

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

713

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

714

delta = header + t

714

delta = header + t

715

else:

715

else:

716

ptext = revlog.revision(base, _df=fh, raw=True)

716

ptext = revlog.revision(base, _df=fh, raw=True)

717

delta = mdiff.textdiff(ptext, t)

717

delta = mdiff.textdiff(ptext, t)

718

719

return delta

719

return delta

720

721

def _builddeltainfo(self, revinfo, base, fh):

721

def _builddeltainfo(self, revinfo, base, fh):

722

# can we use the cached delta?

722

# can we use the cached delta?

723

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

723

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

724

delta = revinfo.cachedelta[1]

724

delta = revinfo.cachedelta[1]

725

else:

725

else:

726

delta = self._builddeltadiff(base, revinfo, fh)

726

delta = self._builddeltadiff(base, revinfo, fh)

727

revlog = self.revlog

727

revlog = self.revlog

728

header, data = revlog.compress(delta)

728

header, data = revlog.compress(delta)

729

deltalen = len(header) + len(data)

729

deltalen = len(header) + len(data)

730

chainbase = revlog.chainbase(base)

730

chainbase = revlog.chainbase(base)

731

offset = revlog.end(len(revlog) - 1)

731

offset = revlog.end(len(revlog) - 1)

732

dist = deltalen + offset - revlog.start(chainbase)

732

dist = deltalen + offset - revlog.start(chainbase)

733

if revlog._generaldelta:

733

if revlog._generaldelta:

734

deltabase = base

734

deltabase = base

735

else:

735

else:

736

deltabase = chainbase

736

deltabase = chainbase

737

chainlen, compresseddeltalen = revlog._chaininfo(base)

737

chainlen, compresseddeltalen = revlog._chaininfo(base)

738

chainlen += 1

738

chainlen += 1

739

compresseddeltalen += deltalen

739

compresseddeltalen += deltalen

740

741

revlog = self.revlog

741

revlog = self.revlog

742

snapshotdepth = None

742

snapshotdepth = None

743

if deltabase == nullrev:

743

if deltabase == nullrev:

744

snapshotdepth = 0

744

snapshotdepth = 0

745

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

745

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

746

# A delta chain should always be one full snapshot,

746

# A delta chain should always be one full snapshot,

747

# zero or more semi-snapshots, and zero or more deltas

747

# zero or more semi-snapshots, and zero or more deltas

748

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

748

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

749

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

749

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

750

snapshotdepth = len(revlog._deltachain(deltabase)[0])

750

snapshotdepth = len(revlog._deltachain(deltabase)[0])

751

752

return _deltainfo(dist, deltalen, (header, data), deltabase,

752

return _deltainfo(dist, deltalen, (header, data), deltabase,

753

chainbase, chainlen, compresseddeltalen,

753

chainbase, chainlen, compresseddeltalen,

754

snapshotdepth)

754

snapshotdepth)

755

756

def finddeltainfo(self, revinfo, fh):

756

def finddeltainfo(self, revinfo, fh):

757

"""Find an acceptable delta against a candidate revision

757

"""Find an acceptable delta against a candidate revision

758

759

revinfo: information about the revision (instance of _revisioninfo)

759

revinfo: information about the revision (instance of _revisioninfo)

760

fh: file handle to either the .i or the .d revlog file,

760

fh: file handle to either the .i or the .d revlog file,

761

depending on whether it is inlined or not

761

depending on whether it is inlined or not

762

763

Returns the first acceptable candidate revision, as ordered by

763

Returns the first acceptable candidate revision, as ordered by

764

_getcandidaterevs

764

_getcandidaterevs

765

"""

765

"""

766

if not revinfo.textlen:

766

if not revinfo.textlen:

767

return None # empty file do not need delta

767

return None # empty file do not need delta

768

769

cachedelta = revinfo.cachedelta

769

cachedelta = revinfo.cachedelta

770

p1 = revinfo.p1

770

p1 = revinfo.p1

771

p2 = revinfo.p2

771

p2 = revinfo.p2

772

revlog = self.revlog

772

revlog = self.revlog

773

774

deltalength = self.revlog.length

774

deltalength = self.revlog.length

775

deltaparent = self.revlog.deltaparent

775

deltaparent = self.revlog.deltaparent

776

777

deltainfo = None

777

deltainfo = None

778

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

778

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

779

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

779

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

780

# filter out delta base that will never produce good delta

780

# filter out delta base that will never produce good delta

781

candidaterevs = [r for r in candidaterevs

781

candidaterevs = [r for r in candidaterevs

782

if self.revlog.length(r) <= deltas_limit]

782

if self.revlog.length(r) <= deltas_limit]

783

nominateddeltas = []

783

nominateddeltas = []

784

for candidaterev in candidaterevs:

784

for candidaterev in candidaterevs:

785

# skip over empty delta (no need to include them in a chain)

785

# skip over empty delta (no need to include them in a chain)

786

while candidaterev != nullrev and not deltalength(candidaterev):

786

while candidaterev != nullrev and not deltalength(candidaterev):

787

candidaterev = deltaparent(candidaterev)

787

candidaterev = deltaparent(candidaterev)

788

# no need to try a delta against nullid, this will be handled

788

# no need to try a delta against nullid, this will be handled

789

# by fulltext later.

789

# by fulltext later.

790

if candidaterev == nullrev:

790

if candidaterev == nullrev:

791

continue

791

continue

792

# no delta for rawtext-changing revs (see "candelta" for why)

792

# no delta for rawtext-changing revs (see "candelta" for why)

793

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

793

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

794

continue

794

continue

795

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

795

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

796

if revlog._isgooddeltainfo(candidatedelta, revinfo):

796

if revlog._isgooddeltainfo(candidatedelta, revinfo):

797

nominateddeltas.append(candidatedelta)

797

nominateddeltas.append(candidatedelta)

798

if nominateddeltas:

798

if nominateddeltas:

799

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

799

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

800

break

800

break

801

802

return deltainfo

802

return deltainfo

803

804

@attr.s(slots=True, frozen=True)

804

@attr.s(slots=True, frozen=True)

805

class _revisioninfo(object):

805

class _revisioninfo(object):

806

"""Information about a revision that allows building its fulltext

806

"""Information about a revision that allows building its fulltext

807

node: expected hash of the revision

807

node: expected hash of the revision

808

p1, p2: parent revs of the revision

808

p1, p2: parent revs of the revision

809

btext: built text cache consisting of a one-element list

809

btext: built text cache consisting of a one-element list

810

cachedelta: (baserev, uncompressed_delta) or None

810

cachedelta: (baserev, uncompressed_delta) or None

811

flags: flags associated to the revision storage

811

flags: flags associated to the revision storage

812

813

One of btext[0] or cachedelta must be set.

813

One of btext[0] or cachedelta must be set.

814

"""

814

"""

815

node = attr.ib()

815

node = attr.ib()

816

p1 = attr.ib()

816

p1 = attr.ib()

817

p2 = attr.ib()

817

p2 = attr.ib()

818

btext = attr.ib()

818

btext = attr.ib()

819

textlen = attr.ib()

819

textlen = attr.ib()

820

cachedelta = attr.ib()

820

cachedelta = attr.ib()

821

flags = attr.ib()

821

flags = attr.ib()

822

823

# index v0:

823

# index v0:

824

# 4 bytes: offset

824

# 4 bytes: offset

825

# 4 bytes: compressed length

825

# 4 bytes: compressed length

826

# 4 bytes: base rev

826

# 4 bytes: base rev

827

# 4 bytes: link rev

827

# 4 bytes: link rev

828

# 20 bytes: parent 1 nodeid

828

# 20 bytes: parent 1 nodeid

829

# 20 bytes: parent 2 nodeid

829

# 20 bytes: parent 2 nodeid

830

# 20 bytes: nodeid

830

# 20 bytes: nodeid

831

indexformatv0 = struct.Struct(">4l20s20s20s")

831

indexformatv0 = struct.Struct(">4l20s20s20s")

832

indexformatv0_pack = indexformatv0.pack

832

indexformatv0_pack = indexformatv0.pack

833

indexformatv0_unpack = indexformatv0.unpack

833

indexformatv0_unpack = indexformatv0.unpack

834

835

class revlogoldindex(list):

835

class revlogoldindex(list):

836

def __getitem__(self, i):

836

def __getitem__(self, i):

837

if i == -1:

837

if i == -1:

838

return (0, 0, 0, -1, -1, -1, -1, nullid)

838

return (0, 0, 0, -1, -1, -1, -1, nullid)

839

return list.__getitem__(self, i)

839

return list.__getitem__(self, i)

840

841

# maximum <delta-chain-data>/<revision-text-length> ratio

841

# maximum <delta-chain-data>/<revision-text-length> ratio

842

LIMIT_DELTA2TEXT = 2

842

LIMIT_DELTA2TEXT = 2

843

844

class revlogoldio(object):

844

class revlogoldio(object):

845

def __init__(self):

845

def __init__(self):

846

self.size = indexformatv0.size

846

self.size = indexformatv0.size

847

848

def parseindex(self, data, inline):

848

def parseindex(self, data, inline):

849

s = self.size

849

s = self.size

850

index = []

850

index = []

851

nodemap = {nullid: nullrev}

851

nodemap = {nullid: nullrev}

852

n = off = 0

852

n = off = 0

853

l = len(data)

853

l = len(data)

854

while off + s <= l:

854

while off + s <= l:

855

cur = data[off:off + s]

855

cur = data[off:off + s]

856

off += s

856

off += s

857

e = indexformatv0_unpack(cur)

857

e = indexformatv0_unpack(cur)

858

# transform to revlogv1 format

858

# transform to revlogv1 format

859

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

859

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

860

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

860

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

861

index.append(e2)

861

index.append(e2)

862

nodemap[e[6]] = n

862

nodemap[e[6]] = n

863

n += 1

863

n += 1

864

865

return revlogoldindex(index), nodemap, None

865

return revlogoldindex(index), nodemap, None

866

867

def packentry(self, entry, node, version, rev):

867

def packentry(self, entry, node, version, rev):

868

if gettype(entry[0]):

868

if gettype(entry[0]):

869

raise RevlogError(_('index entry flags need revlog version 1'))

869

raise RevlogError(_('index entry flags need revlog version 1'))

870

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

870

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

871

node(entry[5]), node(entry[6]), entry[7])

871

node(entry[5]), node(entry[6]), entry[7])

872

return indexformatv0_pack(*e2)

872

return indexformatv0_pack(*e2)

873

874

# index ng:

874

# index ng:

875

# 6 bytes: offset

875

# 6 bytes: offset

876

# 2 bytes: flags

876

# 2 bytes: flags

877

# 4 bytes: compressed length

877

# 4 bytes: compressed length

878

# 4 bytes: uncompressed length

878

# 4 bytes: uncompressed length

879

# 4 bytes: base rev

879

# 4 bytes: base rev

880

# 4 bytes: link rev

880

# 4 bytes: link rev

881

# 4 bytes: parent 1 rev

881

# 4 bytes: parent 1 rev

882

# 4 bytes: parent 2 rev

882

# 4 bytes: parent 2 rev

883

# 32 bytes: nodeid

883

# 32 bytes: nodeid

884

indexformatng = struct.Struct(">Qiiiiii20s12x")

884

indexformatng = struct.Struct(">Qiiiiii20s12x")

885

indexformatng_pack = indexformatng.pack

885

indexformatng_pack = indexformatng.pack

886

versionformat = struct.Struct(">I")

886

versionformat = struct.Struct(">I")

887

versionformat_pack = versionformat.pack

887

versionformat_pack = versionformat.pack

888

versionformat_unpack = versionformat.unpack

888

versionformat_unpack = versionformat.unpack

889

890

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

890

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

891

# signed integer)

891

# signed integer)

892

_maxentrysize = 0x7fffffff

892

_maxentrysize = 0x7fffffff

893

894

class revlogio(object):

894

class revlogio(object):

895

def __init__(self):

895

def __init__(self):

896

self.size = indexformatng.size

896

self.size = indexformatng.size

897

898

def parseindex(self, data, inline):

898

def parseindex(self, data, inline):

899

# call the C implementation to parse the index data

899

# call the C implementation to parse the index data

900

index, cache = parsers.parse_index2(data, inline)

900

index, cache = parsers.parse_index2(data, inline)

901

return index, getattr(index, 'nodemap', None), cache

901

return index, getattr(index, 'nodemap', None), cache

902

903

def packentry(self, entry, node, version, rev):

903

def packentry(self, entry, node, version, rev):

904

p = indexformatng_pack(*entry)

904

p = indexformatng_pack(*entry)

905

if rev == 0:

905

if rev == 0:

906

p = versionformat_pack(version) + p[4:]

906

p = versionformat_pack(version) + p[4:]

907

return p

907

return p

908

909

class revlog(object):

909

class revlog(object):

910

"""

910

"""

911

the underlying revision storage object

911

the underlying revision storage object

912

913

A revlog consists of two parts, an index and the revision data.

913

A revlog consists of two parts, an index and the revision data.

914

915

The index is a file with a fixed record size containing

915

The index is a file with a fixed record size containing

916

information on each revision, including its nodeid (hash), the

916

information on each revision, including its nodeid (hash), the

917

nodeids of its parents, the position and offset of its data within

917

nodeids of its parents, the position and offset of its data within

918

the data file, and the revision it's based on. Finally, each entry

918

the data file, and the revision it's based on. Finally, each entry

919

contains a linkrev entry that can serve as a pointer to external

919

contains a linkrev entry that can serve as a pointer to external

920

data.

920

data.

921

922

The revision data itself is a linear collection of data chunks.

922

The revision data itself is a linear collection of data chunks.

923

Each chunk represents a revision and is usually represented as a

923

Each chunk represents a revision and is usually represented as a

924

delta against the previous chunk. To bound lookup time, runs of

924

delta against the previous chunk. To bound lookup time, runs of

925

deltas are limited to about 2 times the length of the original

925

deltas are limited to about 2 times the length of the original

926

version data. This makes retrieval of a version proportional to

926

version data. This makes retrieval of a version proportional to

927

its size, or O(1) relative to the number of revisions.

927

its size, or O(1) relative to the number of revisions.

928

929

Both pieces of the revlog are written to in an append-only

929

Both pieces of the revlog are written to in an append-only

930

fashion, which means we never need to rewrite a file to insert or

930

fashion, which means we never need to rewrite a file to insert or

931

remove data, and can use some simple techniques to avoid the need

931

remove data, and can use some simple techniques to avoid the need

932

for locking while reading.

932

for locking while reading.

933

934

If checkambig, indexfile is opened with checkambig=True at

934

If checkambig, indexfile is opened with checkambig=True at

935

writing, to avoid file stat ambiguity.

935

writing, to avoid file stat ambiguity.

936

937

If mmaplargeindex is True, and an mmapindexthreshold is set, the

937

If mmaplargeindex is True, and an mmapindexthreshold is set, the

938

index will be mmapped rather than read if it is larger than the

938

index will be mmapped rather than read if it is larger than the

939

configured threshold.

939

configured threshold.

940

941

If censorable is True, the revlog can have censored revisions.

941

If censorable is True, the revlog can have censored revisions.

942

"""

942

"""

943

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

943

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

944

mmaplargeindex=False, censorable=False):

944

mmaplargeindex=False, censorable=False):

945

"""

945

"""

946

create a revlog object

946

create a revlog object

947

948

opener is a function that abstracts the file opening operation

948

opener is a function that abstracts the file opening operation

949

and can be used to implement COW semantics or the like.

949

and can be used to implement COW semantics or the like.

950

"""

950

"""

951

self.indexfile = indexfile

951

self.indexfile = indexfile

952

self.datafile = datafile or (indexfile[:-2] + ".d")

952

self.datafile = datafile or (indexfile[:-2] + ".d")

953

self.opener = opener

953

self.opener = opener

954

# When True, indexfile is opened with checkambig=True at writing, to

954

# When True, indexfile is opened with checkambig=True at writing, to

955

# avoid file stat ambiguity.

955

# avoid file stat ambiguity.

956

self._checkambig = checkambig

956

self._checkambig = checkambig

957

self._censorable = censorable

957

self._censorable = censorable

958

# 3-tuple of (node, rev, text) for a raw revision.

958

# 3-tuple of (node, rev, text) for a raw revision.

959

self._cache = None

959

self._cache = None

960

# Maps rev to chain base rev.

960

# Maps rev to chain base rev.

961

self._chainbasecache = util.lrucachedict(100)

961

self._chainbasecache = util.lrucachedict(100)

962

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

962

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

963

self._chunkcache = (0, '')

963

self._chunkcache = (0, '')

964

# How much data to read and cache into the raw revlog data cache.

964

# How much data to read and cache into the raw revlog data cache.

965

self._chunkcachesize = 65536

965

self._chunkcachesize = 65536

966

self._maxchainlen = None

966

self._maxchainlen = None

967

self._deltabothparents = True

967

self._deltabothparents = True

968

self.index = []

968

self.index = []

969

# Mapping of partial identifiers to full nodes.

969

# Mapping of partial identifiers to full nodes.

970

self._pcache = {}

970

self._pcache = {}

971

# Mapping of revision integer to full node.

971

# Mapping of revision integer to full node.

972

self._nodecache = {nullid: nullrev}

972

self._nodecache = {nullid: nullrev}

973

self._nodepos = None

973

self._nodepos = None

974

self._compengine = 'zlib'

974

self._compengine = 'zlib'

975

self._maxdeltachainspan = -1

975

self._maxdeltachainspan = -1

976

self._withsparseread = False

976

self._withsparseread = False

977

self._sparserevlog = False

977

self._sparserevlog = False

978

self._srdensitythreshold = 0.50

978

self._srdensitythreshold = 0.50

979

self._srmingapsize = 262144

979

self._srmingapsize = 262144

980

981

mmapindexthreshold = None

981

mmapindexthreshold = None

982

v = REVLOG_DEFAULT_VERSION

982

v = REVLOG_DEFAULT_VERSION

983

opts = getattr(opener, 'options', None)

983

opts = getattr(opener, 'options', None)

984

if opts is not None:

984

if opts is not None:

985

if 'revlogv2' in opts:

985

if 'revlogv2' in opts:

986

# version 2 revlogs always use generaldelta.

986

# version 2 revlogs always use generaldelta.

987

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

987

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

988

elif 'revlogv1' in opts:

988

elif 'revlogv1' in opts:

989

if 'generaldelta' in opts:

989

if 'generaldelta' in opts:

990

v |= FLAG_GENERALDELTA

990

v |= FLAG_GENERALDELTA

991

else:

991

else:

992

v = 0

992

v = 0

993

if 'chunkcachesize' in opts:

993

if 'chunkcachesize' in opts:

994

self._chunkcachesize = opts['chunkcachesize']

994

self._chunkcachesize = opts['chunkcachesize']

995

if 'maxchainlen' in opts:

995

if 'maxchainlen' in opts:

996

self._maxchainlen = opts['maxchainlen']

996

self._maxchainlen = opts['maxchainlen']

997

if 'deltabothparents' in opts:

997

if 'deltabothparents' in opts:

998

self._deltabothparents = opts['deltabothparents']

998

self._deltabothparents = opts['deltabothparents']

999

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

999

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

1000

if 'compengine' in opts:

1000

if 'compengine' in opts:

1001

self._compengine = opts['compengine']

1001

self._compengine = opts['compengine']

1002

if 'maxdeltachainspan' in opts:

1002

if 'maxdeltachainspan' in opts:

1003

self._maxdeltachainspan = opts['maxdeltachainspan']

1003

self._maxdeltachainspan = opts['maxdeltachainspan']

1004

if mmaplargeindex and 'mmapindexthreshold' in opts:

1004

if mmaplargeindex and 'mmapindexthreshold' in opts:

1005

mmapindexthreshold = opts['mmapindexthreshold']

1005

mmapindexthreshold = opts['mmapindexthreshold']

1006

self._sparserevlog = bool(opts.get('sparse-revlog', False))

1006

self._sparserevlog = bool(opts.get('sparse-revlog', False))

1007

withsparseread = bool(opts.get('with-sparse-read', False))

1007

withsparseread = bool(opts.get('with-sparse-read', False))

1008

# sparse-revlog forces sparse-read

1008

# sparse-revlog forces sparse-read

1009

self._withsparseread = self._sparserevlog or withsparseread

1009

self._withsparseread = self._sparserevlog or withsparseread

1010

if 'sparse-read-density-threshold' in opts:

1010

if 'sparse-read-density-threshold' in opts:

1011

self._srdensitythreshold = opts['sparse-read-density-threshold']

1011

self._srdensitythreshold = opts['sparse-read-density-threshold']

1012

if 'sparse-read-min-gap-size' in opts:

1012

if 'sparse-read-min-gap-size' in opts:

1013

self._srmingapsize = opts['sparse-read-min-gap-size']

1013

self._srmingapsize = opts['sparse-read-min-gap-size']

1014

1015

if self._chunkcachesize <= 0:

1015

if self._chunkcachesize <= 0:

1016

raise RevlogError(_('revlog chunk cache size %r is not greater '

1016

raise RevlogError(_('revlog chunk cache size %r is not greater '

1017

'than 0') % self._chunkcachesize)

1017

'than 0') % self._chunkcachesize)

1018

elif self._chunkcachesize & (self._chunkcachesize - 1):

1018

elif self._chunkcachesize & (self._chunkcachesize - 1):

1019

raise RevlogError(_('revlog chunk cache size %r is not a power '

1019

raise RevlogError(_('revlog chunk cache size %r is not a power '

1020

'of 2') % self._chunkcachesize)

1020

'of 2') % self._chunkcachesize)

1021

1022

indexdata = ''

1022

indexdata = ''

1023

self._initempty = True

1023

self._initempty = True

1024

try:

1024

try:

1025

with self._indexfp() as f:

1025

with self._indexfp() as f:

1026

if (mmapindexthreshold is not None and

1026

if (mmapindexthreshold is not None and

1027

self.opener.fstat(f).st_size >= mmapindexthreshold):

1027

self.opener.fstat(f).st_size >= mmapindexthreshold):

1028

indexdata = util.buffer(util.mmapread(f))

1028

indexdata = util.buffer(util.mmapread(f))

1029

else:

1029

else:

1030

indexdata = f.read()

1030

indexdata = f.read()

1031

if len(indexdata) > 0:

1031

if len(indexdata) > 0:

1032

v = versionformat_unpack(indexdata[:4])[0]

1032

v = versionformat_unpack(indexdata[:4])[0]

1033

self._initempty = False

1033

self._initempty = False

1034

except IOError as inst:

1034

except IOError as inst:

1035

if inst.errno != errno.ENOENT:

1035

if inst.errno != errno.ENOENT:

1036

raise

1036

raise

1037

1038

self.version = v

1038

self.version = v

1039

self._inline = v & FLAG_INLINE_DATA

1039

self._inline = v & FLAG_INLINE_DATA

1040

self._generaldelta = v & FLAG_GENERALDELTA

1040

self._generaldelta = v & FLAG_GENERALDELTA

1041

flags = v & ~0xFFFF

1041

flags = v & ~0xFFFF

1042

fmt = v & 0xFFFF

1042

fmt = v & 0xFFFF

1043

if fmt == REVLOGV0:

1043

if fmt == REVLOGV0:

1044

if flags:

1044

if flags:

1045

raise RevlogError(_('unknown flags (%#04x) in version %d '

1045

raise RevlogError(_('unknown flags (%#04x) in version %d '

1046

'revlog %s') %

1046

'revlog %s') %

1047

(flags >> 16, fmt, self.indexfile))

1047

(flags >> 16, fmt, self.indexfile))

1048

elif fmt == REVLOGV1:

1048

elif fmt == REVLOGV1:

1049

if flags & ~REVLOGV1_FLAGS:

1049

if flags & ~REVLOGV1_FLAGS:

1050

raise RevlogError(_('unknown flags (%#04x) in version %d '

1050

raise RevlogError(_('unknown flags (%#04x) in version %d '

1051

'revlog %s') %

1051

'revlog %s') %

1052

(flags >> 16, fmt, self.indexfile))

1052

(flags >> 16, fmt, self.indexfile))

1053

elif fmt == REVLOGV2:

1053

elif fmt == REVLOGV2:

1054

if flags & ~REVLOGV2_FLAGS:

1054

if flags & ~REVLOGV2_FLAGS:

1055

raise RevlogError(_('unknown flags (%#04x) in version %d '

1055

raise RevlogError(_('unknown flags (%#04x) in version %d '

1056

'revlog %s') %

1056

'revlog %s') %

1057

(flags >> 16, fmt, self.indexfile))

1057

(flags >> 16, fmt, self.indexfile))

1058

else:

1058

else:

1059

raise RevlogError(_('unknown version (%d) in revlog %s') %

1059

raise RevlogError(_('unknown version (%d) in revlog %s') %

1060

(fmt, self.indexfile))

1060

(fmt, self.indexfile))

1061

1062

self.storedeltachains = True

1062

self.storedeltachains = True

1063

1064

self._io = revlogio()

1064

self._io = revlogio()

1065

if self.version == REVLOGV0:

1065

if self.version == REVLOGV0:

1066

self._io = revlogoldio()

1066

self._io = revlogoldio()

1067

try:

1067

try:

1068

d = self._io.parseindex(indexdata, self._inline)

1068

d = self._io.parseindex(indexdata, self._inline)

1069

except (ValueError, IndexError):

1069

except (ValueError, IndexError):

1070

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1070

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1071

self.index, nodemap, self._chunkcache = d

1071

self.index, nodemap, self._chunkcache = d

1072

if nodemap is not None:

1072

if nodemap is not None:

1073

self.nodemap = self._nodecache = nodemap

1073

self.nodemap = self._nodecache = nodemap

1074

if not self._chunkcache:

1074

if not self._chunkcache:

1075

self._chunkclear()

1075

self._chunkclear()

1076

# revnum -> (chain-length, sum-delta-length)

1076

# revnum -> (chain-length, sum-delta-length)

1077

self._chaininfocache = {}

1077

self._chaininfocache = {}

1078

# revlog header -> revlog compressor

1078

# revlog header -> revlog compressor

1079

self._decompressors = {}

1079

self._decompressors = {}

1080

1081

@util.propertycache

1081

@util.propertycache

1082

def _compressor(self):

1082

def _compressor(self):

1083

return util.compengines[self._compengine].revlogcompressor()

1083

return util.compengines[self._compengine].revlogcompressor()

1084

1085

def _indexfp(self, mode='r'):

1085

def _indexfp(self, mode='r'):

1086

"""file object for the revlog's index file"""

1086

"""file object for the revlog's index file"""

1087

args = {r'mode': mode}

1087

args = {r'mode': mode}

1088

if mode != 'r':

1088

if mode != 'r':

1089

args[r'checkambig'] = self._checkambig

1089

args[r'checkambig'] = self._checkambig

1090

if mode == 'w':

1090

if mode == 'w':

1091

args[r'atomictemp'] = True

1091

args[r'atomictemp'] = True

1092

return self.opener(self.indexfile, **args)

1092

return self.opener(self.indexfile, **args)

1093

1094

def _datafp(self, mode='r'):

1094

def _datafp(self, mode='r'):

1095

"""file object for the revlog's data file"""

1095

"""file object for the revlog's data file"""

1096

return self.opener(self.datafile, mode=mode)

1096

return self.opener(self.datafile, mode=mode)

1097

1098

@contextlib.contextmanager

1098

@contextlib.contextmanager

1099

def _datareadfp(self, existingfp=None):

1099

def _datareadfp(self, existingfp=None):

1100

"""file object suitable to read data"""

1100

"""file object suitable to read data"""

1101

if existingfp is not None:

1101

if existingfp is not None:

1102

yield existingfp

1102

yield existingfp

1103

else:

1103

else:

1104

if self._inline:

1104

if self._inline:

1105

func = self._indexfp

1105

func = self._indexfp

1106

else:

1106

else:

1107

func = self._datafp

1107

func = self._datafp

1108

with func() as fp:

1108

with func() as fp:

1109

yield fp

1109

yield fp

1110

1111

def tip(self):

1111

def tip(self):

1112

return self.node(len(self.index) - 1)

1112

return self.node(len(self.index) - 1)

1113

def __contains__(self, rev):

1113

def __contains__(self, rev):

1114

return 0 <= rev < len(self)

1114

return 0 <= rev < len(self)

1115

def __len__(self):

1115

def __len__(self):

1116

return len(self.index)

1116

return len(self.index)

1117

def __iter__(self):

1117

def __iter__(self):

1118

return iter(pycompat.xrange(len(self)))

1118

return iter(pycompat.xrange(len(self)))

1119

def revs(self, start=0, stop=None):

1119

def revs(self, start=0, stop=None):

1120

"""iterate over all rev in this revlog (from start to stop)"""

1120

"""iterate over all rev in this revlog (from start to stop)"""

1121

step = 1

1121

step = 1

1122

length = len(self)

1122

length = len(self)

1123

if stop is not None:

1123

if stop is not None:

1124

if start > stop:

1124

if start > stop:

1125

step = -1

1125

step = -1

1126

stop += step

1126

stop += step

1127

if stop > length:

1127

if stop > length:

1128

stop = length

1128

stop = length

1129

else:

1129

else:

1130

stop = length

1130

stop = length

1131

return pycompat.xrange(start, stop, step)

1131

return pycompat.xrange(start, stop, step)

1132

1133

@util.propertycache

1133

@util.propertycache

1134

def nodemap(self):

1134

def nodemap(self):

1135

if self.index:

1135

if self.index:

1136

# populate mapping down to the initial node

1136

# populate mapping down to the initial node

1137

node0 = self.index[0][7] # get around changelog filtering

1137

node0 = self.index[0][7] # get around changelog filtering

1138

self.rev(node0)

1138

self.rev(node0)

1139

return self._nodecache

1139

return self._nodecache

1140

1141

def hasnode(self, node):

1141

def hasnode(self, node):

1142

try:

1142

try:

1143

self.rev(node)

1143

self.rev(node)

1144

return True

1144

return True

1145

except KeyError:

1145

except KeyError:

1146

return False

1146

return False

1147

1148

def candelta(self, baserev, rev):

1148

def candelta(self, baserev, rev):

1149

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1149

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1150

# Disable delta if either rev requires a content-changing flag

1150

# Disable delta if either rev requires a content-changing flag

1151

# processor (ex. LFS). This is because such flag processor can alter

1151

# processor (ex. LFS). This is because such flag processor can alter

1152

# the rawtext content that the delta will be based on, and two clients

1152

# the rawtext content that the delta will be based on, and two clients

1153

# could have a same revlog node with different flags (i.e. different

1153

# could have a same revlog node with different flags (i.e. different

1154

# rawtext contents) and the delta could be incompatible.

1154

# rawtext contents) and the delta could be incompatible.

1155

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1155

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1156

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1156

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1157

return False

1157

return False

1158

return True

1158

return True

1159

1160

def clearcaches(self):

1160

def clearcaches(self):

1161

self._cache = None

1161

self._cache = None

1162

self._chainbasecache.clear()

1162

self._chainbasecache.clear()

1163

self._chunkcache = (0, '')

1163

self._chunkcache = (0, '')

1164

self._pcache = {}

1164

self._pcache = {}

1165

1166

try:

1166

try:

1167

self._nodecache.clearcaches()

1167

self._nodecache.clearcaches()

1168

except AttributeError:

1168

except AttributeError:

1169

self._nodecache = {nullid: nullrev}

1169

self._nodecache = {nullid: nullrev}

1170

self._nodepos = None

1170

self._nodepos = None

1171

1172

def rev(self, node):

1172

def rev(self, node):

1173

try:

1173

try:

1174

return self._nodecache[node]

1174

return self._nodecache[node]

1175

except TypeError:

1175

except TypeError:

1176

raise

1176

raise

1177

except RevlogError:

1177

except RevlogError:

1178

# parsers.c radix tree lookup failed

1178

# parsers.c radix tree lookup failed

1179

if node == wdirid or node in wdirfilenodeids:

1179

if node == wdirid or node in wdirfilenodeids:

1180

raise error.WdirUnsupported

1180

raise error.WdirUnsupported

1181

raise LookupError(node, self.indexfile, _('no node'))

1181

raise LookupError(node, self.indexfile, _('no node'))

1182

except KeyError:

1182

except KeyError:

1183

# pure python cache lookup failed

1183

# pure python cache lookup failed

1184

n = self._nodecache

1184

n = self._nodecache

1185

i = self.index

1185

i = self.index

1186

p = self._nodepos

1186

p = self._nodepos

1187

if p is None:

1187

if p is None:

1188

p = len(i) - 1

1188

p = len(i) - 1

1189

else:

1189

else:

1190

assert p < len(i)

1190

assert p < len(i)

1191

for r in pycompat.xrange(p, -1, -1):

1191

for r in pycompat.xrange(p, -1, -1):

1192

v = i[r][7]

1192

v = i[r][7]

1193

n[v] = r

1193

n[v] = r

1194

if v == node:

1194

if v == node:

1195

self._nodepos = r - 1

1195

self._nodepos = r - 1

1196

return r

1196

return r

1197

if node == wdirid or node in wdirfilenodeids:

1197

if node == wdirid or node in wdirfilenodeids:

1198

raise error.WdirUnsupported

1198

raise error.WdirUnsupported

1199

raise LookupError(node, self.indexfile, _('no node'))

1199

raise LookupError(node, self.indexfile, _('no node'))

1200

1201

# Accessors for index entries.

1201

# Accessors for index entries.

1202

1203

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1203

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1204

# are flags.

1204

# are flags.

1205

def start(self, rev):

1205

def start(self, rev):

1206

return int(self.index[rev][0] >> 16)

1206

return int(self.index[rev][0] >> 16)

1207

1208

def flags(self, rev):

1208

def flags(self, rev):

1209

return self.index[rev][0] & 0xFFFF

1209

return self.index[rev][0] & 0xFFFF

1210

1211

def length(self, rev):

1211

def length(self, rev):

1212

return self.index[rev][1]

1212

return self.index[rev][1]

1213

1214

def rawsize(self, rev):

1214

def rawsize(self, rev):

1215

"""return the length of the uncompressed text for a given revision"""

1215

"""return the length of the uncompressed text for a given revision"""

1216

l = self.index[rev][2]

1216

l = self.index[rev][2]

1217

if l >= 0:

1217

if l >= 0:

1218

return l

1218

return l

1219

1220

t = self.revision(rev, raw=True)

1220

t = self.revision(rev, raw=True)

1221

return len(t)

1221

return len(t)

1222

1223

def size(self, rev):

1223

def size(self, rev):

1224

"""length of non-raw text (processed by a "read" flag processor)"""

1224

"""length of non-raw text (processed by a "read" flag processor)"""

1225

# fast path: if no "read" flag processor could change the content,

1225

# fast path: if no "read" flag processor could change the content,

1226

# size is rawsize. note: ELLIPSIS is known to not change the content.

1226

# size is rawsize. note: ELLIPSIS is known to not change the content.

1227

flags = self.flags(rev)

1227

flags = self.flags(rev)

1228

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1228

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1229

return self.rawsize(rev)

1229

return self.rawsize(rev)

1230

1231

return len(self.revision(rev, raw=False))

1231

return len(self.revision(rev, raw=False))

1232

1233

def chainbase(self, rev):

1233

def chainbase(self, rev):

1234

base = self._chainbasecache.get(rev)

1234

base = self._chainbasecache.get(rev)

1235

if base is not None:

1235

if base is not None:

1236

return base

1236

return base

1237

1238

index = self.index

1238

index = self.index

1239

iterrev = rev

1239

iterrev = rev

1240

base = index[iterrev][3]

1240

base = index[iterrev][3]

1241

while base != iterrev:

1241

while base != iterrev:

1242

iterrev = base

1242

iterrev = base

1243

base = index[iterrev][3]

1243

base = index[iterrev][3]

1244

1245

self._chainbasecache[rev] = base

1245

self._chainbasecache[rev] = base

1246

return base

1246

return base

1247

1248

def linkrev(self, rev):

1248

def linkrev(self, rev):

1249

return self.index[rev][4]

1249

return self.index[rev][4]

1250

1251

def parentrevs(self, rev):

1251

def parentrevs(self, rev):

1252

try:

1252

try:

1253

entry = self.index[rev]

1253

entry = self.index[rev]

1254

except IndexError:

1254

except IndexError:

1255

if rev == wdirrev:

1255

if rev == wdirrev:

1256

raise error.WdirUnsupported

1256

raise error.WdirUnsupported

1257

raise

1257

raise

1258

1259

return entry[5], entry[6]

1259

return entry[5], entry[6]

1260

1261

def node(self, rev):

1261

def node(self, rev):

1262

try:

1262

try:

1263

return self.index[rev][7]

1263

return self.index[rev][7]

1264

except IndexError:

1264

except IndexError:

1265

if rev == wdirrev:

1265

if rev == wdirrev:

1266

raise error.WdirUnsupported

1266

raise error.WdirUnsupported

1267

raise

1267

raise

1268

1269

# Derived from index values.

1269

# Derived from index values.

1270

1271

def end(self, rev):

1271

def end(self, rev):

1272

return self.start(rev) + self.length(rev)

1272

return self.start(rev) + self.length(rev)

1273

1274

def parents(self, node):

1274

def parents(self, node):

1275

i = self.index

1275

i = self.index

1276

d = i[self.rev(node)]

1276

d = i[self.rev(node)]

1277

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1277

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1278

1279

def chainlen(self, rev):

1279

def chainlen(self, rev):

1280

return self._chaininfo(rev)[0]

1280

return self._chaininfo(rev)[0]

1281

1282

def _chaininfo(self, rev):

1282

def _chaininfo(self, rev):

1283

chaininfocache = self._chaininfocache

1283

chaininfocache = self._chaininfocache

1284

if rev in chaininfocache:

1284

if rev in chaininfocache:

1285

return chaininfocache[rev]

1285

return chaininfocache[rev]

1286

index = self.index

1286

index = self.index

1287

generaldelta = self._generaldelta

1287

generaldelta = self._generaldelta

1288

iterrev = rev

1288

iterrev = rev

1289

e = index[iterrev]

1289

e = index[iterrev]

1290

clen = 0

1290

clen = 0

1291

compresseddeltalen = 0

1291

compresseddeltalen = 0

1292

while iterrev != e[3]:

1292

while iterrev != e[3]:

1293

clen += 1

1293

clen += 1

1294

compresseddeltalen += e[1]

1294

compresseddeltalen += e[1]

1295

if generaldelta:

1295

if generaldelta:

1296

iterrev = e[3]

1296

iterrev = e[3]

1297

else:

1297

else:

1298

iterrev -= 1

1298

iterrev -= 1

1299

if iterrev in chaininfocache:

1299

if iterrev in chaininfocache:

1300

t = chaininfocache[iterrev]

1300

t = chaininfocache[iterrev]

1301

clen += t[0]

1301

clen += t[0]

1302

compresseddeltalen += t[1]

1302

compresseddeltalen += t[1]

1303

break

1303

break

1304

e = index[iterrev]

1304

e = index[iterrev]

1305

else:

1305

else:

1306

# Add text length of base since decompressing that also takes

1306

# Add text length of base since decompressing that also takes

1307

# work. For cache hits the length is already included.

1307

# work. For cache hits the length is already included.

1308

compresseddeltalen += e[1]

1308

compresseddeltalen += e[1]

1309

r = (clen, compresseddeltalen)

1309

r = (clen, compresseddeltalen)

1310

chaininfocache[rev] = r

1310

chaininfocache[rev] = r

1311

return r

1311

return r

1312

1313

def _deltachain(self, rev, stoprev=None):

1313

def _deltachain(self, rev, stoprev=None):

1314

"""Obtain the delta chain for a revision.

1314

"""Obtain the delta chain for a revision.

1315

1316

``stoprev`` specifies a revision to stop at. If not specified, we

1316

``stoprev`` specifies a revision to stop at. If not specified, we

1317

stop at the base of the chain.

1317

stop at the base of the chain.

1318

1319

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1319

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1320

revs in ascending order and ``stopped`` is a bool indicating whether

1320

revs in ascending order and ``stopped`` is a bool indicating whether

1321

``stoprev`` was hit.

1321

``stoprev`` was hit.

1322

"""

1322

"""

1323

# Try C implementation.

1323

# Try C implementation.

1324

try:

1324

try:

1325

return self.index.deltachain(rev, stoprev, self._generaldelta)

1325

return self.index.deltachain(rev, stoprev, self._generaldelta)

1326

except AttributeError:

1326

except AttributeError:

1327

pass

1327

pass

1328

1329

chain = []

1329

chain = []

1330

1331

# Alias to prevent attribute lookup in tight loop.

1331

# Alias to prevent attribute lookup in tight loop.

1332

index = self.index

1332

index = self.index

1333

generaldelta = self._generaldelta

1333

generaldelta = self._generaldelta

1334

1335

iterrev = rev

1335

iterrev = rev

1336

e = index[iterrev]

1336

e = index[iterrev]

1337

while iterrev != e[3] and iterrev != stoprev:

1337

while iterrev != e[3] and iterrev != stoprev:

1338

chain.append(iterrev)

1338

chain.append(iterrev)

1339

if generaldelta:

1339

if generaldelta:

1340

iterrev = e[3]

1340

iterrev = e[3]

1341

else:

1341

else:

1342

iterrev -= 1

1342

iterrev -= 1

1343

e = index[iterrev]

1343

e = index[iterrev]

1344

1345

if iterrev == stoprev:

1345

if iterrev == stoprev:

1346

stopped = True

1346

stopped = True

1347

else:

1347

else:

1348

chain.append(iterrev)

1348

chain.append(iterrev)

1349

stopped = False

1349

stopped = False

1350

1351

chain.reverse()

1351

chain.reverse()

1352

return chain, stopped

1352

return chain, stopped

1353

1354

def ancestors(self, revs, stoprev=0, inclusive=False):

1354

def ancestors(self, revs, stoprev=0, inclusive=False):

1355

"""Generate the ancestors of 'revs' in reverse topological order.

1355

"""Generate the ancestors of 'revs' in reverse topological order.

1356

Does not generate revs lower than stoprev.

1356

Does not generate revs lower than stoprev.

1357

1358

See the documentation for ancestor.lazyancestors for more details."""

1358

See the documentation for ancestor.lazyancestors for more details."""

1359

1360

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1360

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1361

inclusive=inclusive)

1361

inclusive=inclusive)

1362

1363

def descendants(self, revs):

1363

def descendants(self, revs):

1364

"""Generate the descendants of 'revs' in revision order.

1364

"""Generate the descendants of 'revs' in revision order.

1365

1366

Yield a sequence of revision numbers starting with a child of

1366

Yield a sequence of revision numbers starting with a child of

1367

some rev in revs, i.e., each revision is *not* considered a

1367

some rev in revs, i.e., each revision is *not* considered a

1368

descendant of itself. Results are ordered by revision number (a

1368

descendant of itself. Results are ordered by revision number (a

1369

topological sort)."""

1369

topological sort)."""

1370

first = min(revs)

1370

first = min(revs)

1371

if first == nullrev:

1371

if first == nullrev:

1372

for i in self:

1372

for i in self:

1373

yield i

1373

yield i

1374

return

1374

return

1375

1376

seen = set(revs)

1376

seen = set(revs)

1377

for i in self.revs(start=first + 1):

1377

for i in self.revs(start=first + 1):

1378

for x in self.parentrevs(i):

1378

for x in self.parentrevs(i):

1379

if x != nullrev and x in seen:

1379

if x != nullrev and x in seen:

1380

seen.add(i)

1380

seen.add(i)

1381

yield i

1381

yield i

1382

break

1382

break

1383

1384

def findcommonmissing(self, common=None, heads=None):

1384

def findcommonmissing(self, common=None, heads=None):

1385

"""Return a tuple of the ancestors of common and the ancestors of heads

1385

"""Return a tuple of the ancestors of common and the ancestors of heads

1386

that are not ancestors of common. In revset terminology, we return the

1386

that are not ancestors of common. In revset terminology, we return the

1387

tuple:

1387

tuple:

1388

1389

::common, (::heads) - (::common)

1389

::common, (::heads) - (::common)

1390

1391

The list is sorted by revision number, meaning it is

1391

The list is sorted by revision number, meaning it is

1392

topologically sorted.

1392

topologically sorted.

1393

1394

'heads' and 'common' are both lists of node IDs. If heads is

1394

'heads' and 'common' are both lists of node IDs. If heads is

1395

not supplied, uses all of the revlog's heads. If common is not

1395

not supplied, uses all of the revlog's heads. If common is not

1396

supplied, uses nullid."""

1396

supplied, uses nullid."""

1397

if common is None:

1397

if common is None:

1398

common = [nullid]

1398

common = [nullid]

1399

if heads is None:

1399

if heads is None:

1400

heads = self.heads()

1400

heads = self.heads()

1401

1402

common = [self.rev(n) for n in common]

1402

common = [self.rev(n) for n in common]

1403

heads = [self.rev(n) for n in heads]

1403

heads = [self.rev(n) for n in heads]

1404

1405

# we want the ancestors, but inclusive

1405

# we want the ancestors, but inclusive

1406

class lazyset(object):

1406

class lazyset(object):

1407

def __init__(self, lazyvalues):

1407

def __init__(self, lazyvalues):

1408

self.addedvalues = set()

1408

self.addedvalues = set()

1409

self.lazyvalues = lazyvalues

1409

self.lazyvalues = lazyvalues

1410

1411

def __contains__(self, value):

1411

def __contains__(self, value):

1412

return value in self.addedvalues or value in self.lazyvalues

1412

return value in self.addedvalues or value in self.lazyvalues

1413

1414

def __iter__(self):

1414

def __iter__(self):

1415

added = self.addedvalues

1415

added = self.addedvalues

1416

for r in added:

1416

for r in added:

1417

yield r

1417

yield r

1418

for r in self.lazyvalues:

1418

for r in self.lazyvalues:

1419

if not r in added:

1419

if not r in added:

1420

yield r

1420

yield r

1421

1422

def add(self, value):

1422

def add(self, value):

1423

self.addedvalues.add(value)

1423

self.addedvalues.add(value)

1424

1425

def update(self, values):

1425

def update(self, values):

1426

self.addedvalues.update(values)

1426

self.addedvalues.update(values)

1427

1428

has = lazyset(self.ancestors(common))

1428

has = lazyset(self.ancestors(common))

1429

has.add(nullrev)

1429

has.add(nullrev)

1430

has.update(common)

1430

has.update(common)

1431

1432

# take all ancestors from heads that aren't in has

1432

# take all ancestors from heads that aren't in has

1433

missing = set()

1433

missing = set()

1434

visit = collections.deque(r for r in heads if r not in has)

1434

visit = collections.deque(r for r in heads if r not in has)

1435

while visit:

1435

while visit:

1436

r = visit.popleft()

1436

r = visit.popleft()

1437

if r in missing:

1437

if r in missing:

1438

continue

1438

continue

1439

else:

1439

else:

1440

missing.add(r)

1440

missing.add(r)

1441

for p in self.parentrevs(r):

1441

for p in self.parentrevs(r):

1442

if p not in has:

1442

if p not in has:

1443

visit.append(p)

1443

visit.append(p)

1444

missing = list(missing)

1444

missing = list(missing)

1445

missing.sort()

1445

missing.sort()

1446

return has, [self.node(miss) for miss in missing]

1446

return has, [self.node(miss) for miss in missing]

1447

1448

def incrementalmissingrevs(self, common=None):

1448

def incrementalmissingrevs(self, common=None):

1449

"""Return an object that can be used to incrementally compute the

1449

"""Return an object that can be used to incrementally compute the

1450

revision numbers of the ancestors of arbitrary sets that are not

1450

revision numbers of the ancestors of arbitrary sets that are not

1451

ancestors of common. This is an ancestor.incrementalmissingancestors

1451

ancestors of common. This is an ancestor.incrementalmissingancestors

1452

object.

1452

object.

1453

1454

'common' is a list of revision numbers. If common is not supplied, uses

1454

'common' is a list of revision numbers. If common is not supplied, uses

1455

nullrev.

1455

nullrev.

1456

"""

1456

"""

1457

if common is None:

1457

if common is None:

1458

common = [nullrev]

1458

common = [nullrev]

1459

1460

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1460

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1461

1462

def findmissingrevs(self, common=None, heads=None):

1462

def findmissingrevs(self, common=None, heads=None):

1463

"""Return the revision numbers of the ancestors of heads that

1463

"""Return the revision numbers of the ancestors of heads that

1464

are not ancestors of common.

1464

are not ancestors of common.

1465

1466

More specifically, return a list of revision numbers corresponding to

1466

More specifically, return a list of revision numbers corresponding to

1467

nodes N such that every N satisfies the following constraints:

1467

nodes N such that every N satisfies the following constraints:

1468

1469

1. N is an ancestor of some node in 'heads'

1469

1. N is an ancestor of some node in 'heads'

1470

2. N is not an ancestor of any node in 'common'

1470

2. N is not an ancestor of any node in 'common'

1471

1472

The list is sorted by revision number, meaning it is

1472

The list is sorted by revision number, meaning it is

1473

topologically sorted.

1473

topologically sorted.

1474

1475

'heads' and 'common' are both lists of revision numbers. If heads is

1475

'heads' and 'common' are both lists of revision numbers. If heads is

1476

not supplied, uses all of the revlog's heads. If common is not

1476

not supplied, uses all of the revlog's heads. If common is not

1477

supplied, uses nullid."""

1477

supplied, uses nullid."""

1478

if common is None:

1478

if common is None:

1479

common = [nullrev]

1479

common = [nullrev]

1480

if heads is None:

1480

if heads is None:

1481

heads = self.headrevs()

1481

heads = self.headrevs()

1482

1483

inc = self.incrementalmissingrevs(common=common)

1483

inc = self.incrementalmissingrevs(common=common)

1484

return inc.missingancestors(heads)

1484

return inc.missingancestors(heads)

1485

1486

def findmissing(self, common=None, heads=None):

1486

def findmissing(self, common=None, heads=None):

1487

"""Return the ancestors of heads that are not ancestors of common.

1487

"""Return the ancestors of heads that are not ancestors of common.

1488

1489

More specifically, return a list of nodes N such that every N

1489

More specifically, return a list of nodes N such that every N

1490

satisfies the following constraints:

1490

satisfies the following constraints:

1491

1492

1. N is an ancestor of some node in 'heads'

1492

1. N is an ancestor of some node in 'heads'

1493

2. N is not an ancestor of any node in 'common'

1493

2. N is not an ancestor of any node in 'common'

1494

1495

The list is sorted by revision number, meaning it is

1495

The list is sorted by revision number, meaning it is

1496

topologically sorted.

1496

topologically sorted.

1497

1498

'heads' and 'common' are both lists of node IDs. If heads is

1498

'heads' and 'common' are both lists of node IDs. If heads is

1499

not supplied, uses all of the revlog's heads. If common is not

1499

not supplied, uses all of the revlog's heads. If common is not

1500

supplied, uses nullid."""

1500

supplied, uses nullid."""

1501

if common is None:

1501

if common is None:

1502

common = [nullid]

1502

common = [nullid]

1503

if heads is None:

1503

if heads is None:

1504

heads = self.heads()

1504

heads = self.heads()

1505

1506

common = [self.rev(n) for n in common]

1506

common = [self.rev(n) for n in common]

1507

heads = [self.rev(n) for n in heads]

1507

heads = [self.rev(n) for n in heads]

1508

1509

inc = self.incrementalmissingrevs(common=common)

1509

inc = self.incrementalmissingrevs(common=common)

1510

return [self.node(r) for r in inc.missingancestors(heads)]

1510

return [self.node(r) for r in inc.missingancestors(heads)]

1511

1512

def nodesbetween(self, roots=None, heads=None):

1512

def nodesbetween(self, roots=None, heads=None):

1513

"""Return a topological path from 'roots' to 'heads'.

1513

"""Return a topological path from 'roots' to 'heads'.

1514

1515

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1515

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1516

topologically sorted list of all nodes N that satisfy both of

1516

topologically sorted list of all nodes N that satisfy both of

1517

these constraints:

1517

these constraints:

1518

1519

1. N is a descendant of some node in 'roots'

1519

1. N is a descendant of some node in 'roots'

1520

2. N is an ancestor of some node in 'heads'

1520

2. N is an ancestor of some node in 'heads'

1521

1522

Every node is considered to be both a descendant and an ancestor

1522

Every node is considered to be both a descendant and an ancestor

1523

of itself, so every reachable node in 'roots' and 'heads' will be

1523

of itself, so every reachable node in 'roots' and 'heads' will be

1524

included in 'nodes'.

1524

included in 'nodes'.

1525

1526

'outroots' is the list of reachable nodes in 'roots', i.e., the

1526

'outroots' is the list of reachable nodes in 'roots', i.e., the

1527

subset of 'roots' that is returned in 'nodes'. Likewise,

1527

subset of 'roots' that is returned in 'nodes'. Likewise,

1528

'outheads' is the subset of 'heads' that is also in 'nodes'.

1528

'outheads' is the subset of 'heads' that is also in 'nodes'.

1529

1530

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1530

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1531

unspecified, uses nullid as the only root. If 'heads' is

1531

unspecified, uses nullid as the only root. If 'heads' is

1532

unspecified, uses list of all of the revlog's heads."""

1532

unspecified, uses list of all of the revlog's heads."""

1533

nonodes = ([], [], [])

1533

nonodes = ([], [], [])

1534

if roots is not None:

1534

if roots is not None:

1535

roots = list(roots)

1535

roots = list(roots)

1536

if not roots:

1536

if not roots:

1537

return nonodes

1537

return nonodes

1538

lowestrev = min([self.rev(n) for n in roots])

1538

lowestrev = min([self.rev(n) for n in roots])

1539

else:

1539

else:

1540

roots = [nullid] # Everybody's a descendant of nullid

1540

roots = [nullid] # Everybody's a descendant of nullid

1541

lowestrev = nullrev

1541

lowestrev = nullrev

1542

if (lowestrev == nullrev) and (heads is None):

1542

if (lowestrev == nullrev) and (heads is None):

1543

# We want _all_ the nodes!

1543

# We want _all_ the nodes!

1544

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1544

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1545

if heads is None:

1545

if heads is None:

1546

# All nodes are ancestors, so the latest ancestor is the last

1546

# All nodes are ancestors, so the latest ancestor is the last

1547

# node.

1547

# node.

1548

highestrev = len(self) - 1

1548

highestrev = len(self) - 1

1549

# Set ancestors to None to signal that every node is an ancestor.

1549

# Set ancestors to None to signal that every node is an ancestor.

1550

ancestors = None

1550

ancestors = None

1551

# Set heads to an empty dictionary for later discovery of heads

1551

# Set heads to an empty dictionary for later discovery of heads

1552

heads = {}

1552

heads = {}

1553

else:

1553

else:

1554

heads = list(heads)

1554

heads = list(heads)

1555

if not heads:

1555

if not heads:

1556

return nonodes

1556

return nonodes

1557

ancestors = set()

1557

ancestors = set()

1558

# Turn heads into a dictionary so we can remove 'fake' heads.

1558

# Turn heads into a dictionary so we can remove 'fake' heads.

1559

# Also, later we will be using it to filter out the heads we can't

1559

# Also, later we will be using it to filter out the heads we can't

1560

# find from roots.

1560

# find from roots.

1561

heads = dict.fromkeys(heads, False)

1561

heads = dict.fromkeys(heads, False)

1562

# Start at the top and keep marking parents until we're done.

1562

# Start at the top and keep marking parents until we're done.

1563

nodestotag = set(heads)

1563

nodestotag = set(heads)

1564

# Remember where the top was so we can use it as a limit later.

1564

# Remember where the top was so we can use it as a limit later.

1565

highestrev = max([self.rev(n) for n in nodestotag])

1565

highestrev = max([self.rev(n) for n in nodestotag])

1566

while nodestotag:

1566

while nodestotag:

1567

# grab a node to tag

1567

# grab a node to tag

1568

n = nodestotag.pop()

1568

n = nodestotag.pop()

1569

# Never tag nullid

1569

# Never tag nullid

1570

if n == nullid:

1570

if n == nullid:

1571

continue

1571

continue

1572

# A node's revision number represents its place in a

1572

# A node's revision number represents its place in a

1573

# topologically sorted list of nodes.

1573

# topologically sorted list of nodes.

1574

r = self.rev(n)

1574

r = self.rev(n)

1575

if r >= lowestrev:

1575

if r >= lowestrev:

1576

if n not in ancestors:

1576

if n not in ancestors:

1577

# If we are possibly a descendant of one of the roots

1577

# If we are possibly a descendant of one of the roots

1578

# and we haven't already been marked as an ancestor

1578

# and we haven't already been marked as an ancestor

1579

ancestors.add(n) # Mark as ancestor

1579

ancestors.add(n) # Mark as ancestor

1580

# Add non-nullid parents to list of nodes to tag.

1580

# Add non-nullid parents to list of nodes to tag.

1581

nodestotag.update([p for p in self.parents(n) if

1581

nodestotag.update([p for p in self.parents(n) if

1582

p != nullid])

1582

p != nullid])

1583

elif n in heads: # We've seen it before, is it a fake head?

1583

elif n in heads: # We've seen it before, is it a fake head?

1584

# So it is, real heads should not be the ancestors of

1584

# So it is, real heads should not be the ancestors of

1585

# any other heads.

1585

# any other heads.

1586

heads.pop(n)

1586

heads.pop(n)

1587

if not ancestors:

1587

if not ancestors:

1588

return nonodes

1588

return nonodes

1589

# Now that we have our set of ancestors, we want to remove any

1589

# Now that we have our set of ancestors, we want to remove any

1590

# roots that are not ancestors.

1590

# roots that are not ancestors.

1591

1592

# If one of the roots was nullid, everything is included anyway.

1592

# If one of the roots was nullid, everything is included anyway.

1593

if lowestrev > nullrev:

1593

if lowestrev > nullrev:

1594

# But, since we weren't, let's recompute the lowest rev to not

1594

# But, since we weren't, let's recompute the lowest rev to not

1595

# include roots that aren't ancestors.

1595

# include roots that aren't ancestors.

1596

1597

# Filter out roots that aren't ancestors of heads

1597

# Filter out roots that aren't ancestors of heads

1598

roots = [root for root in roots if root in ancestors]

1598

roots = [root for root in roots if root in ancestors]

1599

# Recompute the lowest revision

1599

# Recompute the lowest revision

1600

if roots:

1600

if roots:

1601

lowestrev = min([self.rev(root) for root in roots])

1601

lowestrev = min([self.rev(root) for root in roots])

1602

else:

1602

else:

1603

# No more roots? Return empty list

1603

# No more roots? Return empty list

1604

return nonodes

1604

return nonodes

1605

else:

1605

else:

1606

# We are descending from nullid, and don't need to care about

1606

# We are descending from nullid, and don't need to care about

1607

# any other roots.

1607

# any other roots.

1608

lowestrev = nullrev

1608

lowestrev = nullrev

1609

roots = [nullid]

1609

roots = [nullid]

1610

# Transform our roots list into a set.

1610

# Transform our roots list into a set.

1611

descendants = set(roots)

1611

descendants = set(roots)

1612

# Also, keep the original roots so we can filter out roots that aren't

1612

# Also, keep the original roots so we can filter out roots that aren't

1613

# 'real' roots (i.e. are descended from other roots).

1613

# 'real' roots (i.e. are descended from other roots).

1614

roots = descendants.copy()

1614

roots = descendants.copy()

1615

# Our topologically sorted list of output nodes.

1615

# Our topologically sorted list of output nodes.

1616

orderedout = []

1616

orderedout = []

1617

# Don't start at nullid since we don't want nullid in our output list,

1617

# Don't start at nullid since we don't want nullid in our output list,

1618

# and if nullid shows up in descendants, empty parents will look like

1618

# and if nullid shows up in descendants, empty parents will look like

1619

# they're descendants.

1619

# they're descendants.

1620

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1620

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1621

n = self.node(r)

1621

n = self.node(r)

1622

isdescendant = False

1622

isdescendant = False

1623

if lowestrev == nullrev: # Everybody is a descendant of nullid

1623

if lowestrev == nullrev: # Everybody is a descendant of nullid

1624

isdescendant = True

1624

isdescendant = True

1625

elif n in descendants:

1625

elif n in descendants:

1626

# n is already a descendant

1626

# n is already a descendant

1627

isdescendant = True

1627

isdescendant = True

1628

# This check only needs to be done here because all the roots

1628

# This check only needs to be done here because all the roots

1629

# will start being marked is descendants before the loop.

1629

# will start being marked is descendants before the loop.

1630

if n in roots:

1630

if n in roots:

1631

# If n was a root, check if it's a 'real' root.

1631

# If n was a root, check if it's a 'real' root.

1632

p = tuple(self.parents(n))

1632

p = tuple(self.parents(n))

1633

# If any of its parents are descendants, it's not a root.

1633

# If any of its parents are descendants, it's not a root.

1634

if (p[0] in descendants) or (p[1] in descendants):

1634

if (p[0] in descendants) or (p[1] in descendants):

1635

roots.remove(n)

1635

roots.remove(n)

1636

else:

1636

else:

1637

p = tuple(self.parents(n))

1637

p = tuple(self.parents(n))

1638

# A node is a descendant if either of its parents are

1638

# A node is a descendant if either of its parents are

1639

# descendants. (We seeded the dependents list with the roots

1639

# descendants. (We seeded the dependents list with the roots

1640

# up there, remember?)

1640

# up there, remember?)

1641

if (p[0] in descendants) or (p[1] in descendants):

1641

if (p[0] in descendants) or (p[1] in descendants):

1642

descendants.add(n)

1642

descendants.add(n)

1643

isdescendant = True

1643

isdescendant = True

1644

if isdescendant and ((ancestors is None) or (n in ancestors)):

1644

if isdescendant and ((ancestors is None) or (n in ancestors)):

1645

# Only include nodes that are both descendants and ancestors.

1645

# Only include nodes that are both descendants and ancestors.

1646

orderedout.append(n)

1646

orderedout.append(n)

1647

if (ancestors is not None) and (n in heads):

1647

if (ancestors is not None) and (n in heads):

1648

# We're trying to figure out which heads are reachable

1648

# We're trying to figure out which heads are reachable

1649

# from roots.

1649

# from roots.

1650

# Mark this head as having been reached

1650

# Mark this head as having been reached

1651

heads[n] = True

1651

heads[n] = True

1652

elif ancestors is None:

1652

elif ancestors is None:

1653

# Otherwise, we're trying to discover the heads.

1653

# Otherwise, we're trying to discover the heads.

1654

# Assume this is a head because if it isn't, the next step

1654

# Assume this is a head because if it isn't, the next step

1655

# will eventually remove it.

1655

# will eventually remove it.

1656

heads[n] = True

1656

heads[n] = True

1657

# But, obviously its parents aren't.

1657

# But, obviously its parents aren't.

1658

for p in self.parents(n):

1658

for p in self.parents(n):

1659

heads.pop(p, None)

1659

heads.pop(p, None)

1660

heads = [head for head, flag in heads.iteritems() if flag]

1660

heads = [head for head, flag in heads.iteritems() if flag]

1661

roots = list(roots)

1661

roots = list(roots)

1662

assert orderedout

1662

assert orderedout

1663

assert roots

1663

assert roots

1664

assert heads

1664

assert heads

1665

return (orderedout, roots, heads)

1665

return (orderedout, roots, heads)

1666

1667

def headrevs(self):

1667

def headrevs(self):

1668

try:

1668

try:

1669

return self.index.headrevs()

1669

return self.index.headrevs()

1670

except AttributeError:

1670

except AttributeError:

1671

return self._headrevs()

1671

return self._headrevs()

1672

1673

def computephases(self, roots):

1673

def computephases(self, roots):

1674

return self.index.computephasesmapsets(roots)

1674

return self.index.computephasesmapsets(roots)

1675

1676

def _headrevs(self):

1676

def _headrevs(self):

1677

count = len(self)

1677

count = len(self)

1678

if not count:

1678

if not count:

1679

return [nullrev]

1679

return [nullrev]

1680

# we won't iter over filtered rev so nobody is a head at start

1680

# we won't iter over filtered rev so nobody is a head at start

1681

ishead = [0] * (count + 1)

1681

ishead = [0] * (count + 1)

1682

index = self.index

1682

index = self.index

1683

for r in self:

1683

for r in self:

1684

ishead[r] = 1 # I may be an head

1684

ishead[r] = 1 # I may be an head

1685

e = index[r]

1685

e = index[r]

1686

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1686

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1687

return [r for r, val in enumerate(ishead) if val]

1687

return [r for r, val in enumerate(ishead) if val]

1688

1689

def heads(self, start=None, stop=None):

1689

def heads(self, start=None, stop=None):

1690

"""return the list of all nodes that have no children

1690

"""return the list of all nodes that have no children

1691

1692

if start is specified, only heads that are descendants of

1692

if start is specified, only heads that are descendants of

1693

start will be returned

1693

start will be returned

1694

if stop is specified, it will consider all the revs from stop

1694

if stop is specified, it will consider all the revs from stop

1695

as if they had no children

1695

as if they had no children

1696

"""

1696

"""

1697

if start is None and stop is None:

1697

if start is None and stop is None:

1698

if not len(self):

1698

if not len(self):

1699

return [nullid]

1699

return [nullid]

1700

return [self.node(r) for r in self.headrevs()]

1700

return [self.node(r) for r in self.headrevs()]

1701

1702

if start is None:

1702

if start is None:

1703

start = nullid

1703

start = nullid

1704

if stop is None:

1704

if stop is None:

1705

stop = []

1705

stop = []

1706

stoprevs = set([self.rev(n) for n in stop])

1706

stoprevs = set([self.rev(n) for n in stop])

1707

startrev = self.rev(start)

1707

startrev = self.rev(start)

1708

reachable = {startrev}

1708

reachable = {startrev}

1709

heads = {startrev}

1709

heads = {startrev}

1710

1711

parentrevs = self.parentrevs

1711

parentrevs = self.parentrevs

1712

for r in self.revs(start=startrev + 1):

1712

for r in self.revs(start=startrev + 1):

1713

for p in parentrevs(r):

1713

for p in parentrevs(r):

1714

if p in reachable:

1714

if p in reachable:

1715

if r not in stoprevs:

1715

if r not in stoprevs:

1716

reachable.add(r)

1716

reachable.add(r)

1717

heads.add(r)

1717

heads.add(r)

1718

if p in heads and p not in stoprevs:

1718

if p in heads and p not in stoprevs:

1719

heads.remove(p)

1719

heads.remove(p)

1720

1721

return [self.node(r) for r in heads]

1721

return [self.node(r) for r in heads]

1722

1723

def children(self, node):

1723

def children(self, node):

1724

"""find the children of a given node"""

1724

"""find the children of a given node"""

1725

c = []

1725

c = []

1726

p = self.rev(node)

1726

p = self.rev(node)

1727

for r in self.revs(start=p + 1):

1727

for r in self.revs(start=p + 1):

1728

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1728

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1729

if prevs:

1729

if prevs:

1730

for pr in prevs:

1730

for pr in prevs:

1731

if pr == p:

1731

if pr == p:

1732

c.append(self.node(r))

1732

c.append(self.node(r))

1733

elif p == nullrev:

1733

elif p == nullrev:

1734

c.append(self.node(r))

1734

c.append(self.node(r))

1735

return c

1735

return c

1736

1737

def commonancestorsheads(self, a, b):

1737

def commonancestorsheads(self, a, b):

1738

"""calculate all the heads of the common ancestors of nodes a and b"""

1738

"""calculate all the heads of the common ancestors of nodes a and b"""

1739

a, b = self.rev(a), self.rev(b)

1739

a, b = self.rev(a), self.rev(b)

1740

ancs = self._commonancestorsheads(a, b)

1740

ancs = self._commonancestorsheads(a, b)

1741

return pycompat.maplist(self.node, ancs)

1741

return pycompat.maplist(self.node, ancs)

1742

1743

def _commonancestorsheads(self, *revs):

1743

def _commonancestorsheads(self, *revs):

1744

"""calculate all the heads of the common ancestors of revs"""

1744

"""calculate all the heads of the common ancestors of revs"""

1745

try:

1745

try:

1746

ancs = self.index.commonancestorsheads(*revs)

1746

ancs = self.index.commonancestorsheads(*revs)

1747

except (AttributeError, OverflowError): # C implementation failed

1747

except (AttributeError, OverflowError): # C implementation failed

1748

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1748

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1749

return ancs

1749

return ancs

1750

1751

def isancestor(self, a, b):

1751

def isancestor(self, a, b):

1752

"""return True if node a is an ancestor of node b

1752

"""return True if node a is an ancestor of node b

1753

1754

A revision is considered an ancestor of itself."""

1754

A revision is considered an ancestor of itself."""

1755

a, b = self.rev(a), self.rev(b)

1755

a, b = self.rev(a), self.rev(b)

1756

return self.isancestorrev(a, b)

1756

return self.isancestorrev(a, b)

1757

1758

def isancestorrev(self, a, b):

1758

def isancestorrev(self, a, b):

1759

"""return True if revision a is an ancestor of revision b

1759

"""return True if revision a is an ancestor of revision b

1760

1761

A revision is considered an ancestor of itself.

1761

A revision is considered an ancestor of itself.

1762

1763

The implementation of this is trivial but the use of

1763

The implementation of this is trivial but the use of

1764

commonancestorsheads is not."""

1764

commonancestorsheads is not."""

1765

if a == nullrev:

1765

if a == nullrev:

1766

return True

1766

return True

1767

elif a == b:

1767

elif a == b:

1768

return True

1768

return True

1769

elif a > b:

1769

elif a > b:

1770

return False

1770

return False

1771

return a in self._commonancestorsheads(a, b)

1771

return a in self._commonancestorsheads(a, b)

1772

1773

def ancestor(self, a, b):

1773

def ancestor(self, a, b):

1774

"""calculate the "best" common ancestor of nodes a and b"""

1774

"""calculate the "best" common ancestor of nodes a and b"""

1775

1776

a, b = self.rev(a), self.rev(b)

1776

a, b = self.rev(a), self.rev(b)

1777

try:

1777

try:

1778

ancs = self.index.ancestors(a, b)

1778

ancs = self.index.ancestors(a, b)

1779

except (AttributeError, OverflowError):

1779

except (AttributeError, OverflowError):

1780

ancs = ancestor.ancestors(self.parentrevs, a, b)

1780

ancs = ancestor.ancestors(self.parentrevs, a, b)

1781

if ancs:

1781

if ancs:

1782

# choose a consistent winner when there's a tie

1782

# choose a consistent winner when there's a tie

1783

return min(map(self.node, ancs))

1783

return min(map(self.node, ancs))

1784

return nullid

1784

return nullid

1785

1786

def _match(self, id):

1786

def _match(self, id):

1787

if isinstance(id, int):

1787

if isinstance(id, int):

1788

# rev

1788

# rev

1789

return self.node(id)

1789

return self.node(id)

1790

if len(id) == 20:

1790

if len(id) == 20:

1791

# possibly a binary node

1791

# possibly a binary node

1792

# odds of a binary node being all hex in ASCII are 1 in 10**25

1792

# odds of a binary node being all hex in ASCII are 1 in 10**25

1793

try:

1793

try:

1794

node = id

1794

node = id

1795

self.rev(node) # quick search the index

1795

self.rev(node) # quick search the index

1796

return node

1796

return node

1797

except LookupError:

1797

except LookupError:

1798

pass # may be partial hex id

1798

pass # may be partial hex id

1799

try:

1799

try:

1800

# str(rev)

1800

# str(rev)

1801

rev = int(id)

1801

rev = int(id)

1802

if "%d" % rev != id:

1802

if "%d" % rev != id:

1803

raise ValueError

1803

raise ValueError

1804

if rev < 0:

1804

if rev < 0:

1805

rev = len(self) + rev

1805

rev = len(self) + rev

1806

if rev < 0 or rev >= len(self):

1806

if rev < 0 or rev >= len(self):

1807

raise ValueError

1807

raise ValueError

1808

return self.node(rev)

1808

return self.node(rev)

1809

except (ValueError, OverflowError):

1809

except (ValueError, OverflowError):

1810

pass

1810

pass

1811

if len(id) == 40:

1811

if len(id) == 40:

1812

try:

1812

try:

1813

# a full hex nodeid?

1813

# a full hex nodeid?

1814

node = bin(id)

1814

node = bin(id)

1815

self.rev(node)

1815

self.rev(node)

1816

return node

1816

return node

1817

except (TypeError, LookupError):

1817

except (TypeError, LookupError):

1818

pass

1818

pass

1819

1820

def _partialmatch(self, id):

1820

def _partialmatch(self, id):

1821

# we don't care wdirfilenodeids as they should be always full hash

1821

# we don't care wdirfilenodeids as they should be always full hash

1822

maybewdir = wdirhex.startswith(id)

1822

maybewdir = wdirhex.startswith(id)

1823

try:

1823

try:

1824

partial = self.index.partialmatch(id)

1824

partial = self.index.partialmatch(id)

1825

if partial and self.hasnode(partial):

1825

if partial and self.hasnode(partial):

1826

if maybewdir:

1826

if maybewdir:

1827

# single 'ff...' match in radix tree, ambiguous with wdir

1827

# single 'ff...' match in radix tree, ambiguous with wdir

1828

raise RevlogError

1828

raise RevlogError

1829

return partial

1829

return partial

1830

if maybewdir:

1830

if maybewdir:

1831

# no 'ff...' match in radix tree, wdir identified

1831

# no 'ff...' match in radix tree, wdir identified

1832

raise error.WdirUnsupported

1832

raise error.WdirUnsupported

1833

return None

1833

return None

1834

except RevlogError:

1834

except RevlogError:

1835

# parsers.c radix tree lookup gave multiple matches

1835

# parsers.c radix tree lookup gave multiple matches

1836

# fast path: for unfiltered changelog, radix tree is accurate

1836

# fast path: for unfiltered changelog, radix tree is accurate

1837

if not getattr(self, 'filteredrevs', None):

1837

if not getattr(self, 'filteredrevs', None):

1838

raise AmbiguousPrefixLookupError(id, self.indexfile,

1838

raise AmbiguousPrefixLookupError(id, self.indexfile,

1839

_('ambiguous identifier'))

1839

_('ambiguous identifier'))

1840

# fall through to slow path that filters hidden revisions

1840

# fall through to slow path that filters hidden revisions

1841

except (AttributeError, ValueError):

1841

except (AttributeError, ValueError):

1842

# we are pure python, or key was too short to search radix tree

1842

# we are pure python, or key was too short to search radix tree

1843

pass

1843

pass

1844

1845

if id in self._pcache:

1845

if id in self._pcache:

1846

return self._pcache[id]

1846

return self._pcache[id]

1847

1848

if len(id) <= 40:

1848

if len(id) <= 40:

1849

try:

1849

try:

1850

# hex(node)[:...]

1850

# hex(node)[:...]

1851

l = len(id) // 2 # grab an even number of digits

1851

l = len(id) // 2 # grab an even number of digits

1852

prefix = bin(id[:l * 2])

1852

prefix = bin(id[:l * 2])

1853

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1853

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1854

nl = [n for n in nl if hex(n).startswith(id) and

1854

nl = [n for n in nl if hex(n).startswith(id) and

1855

self.hasnode(n)]

1855

self.hasnode(n)]

1856

if len(nl) > 0:

1856

if len(nl) > 0:

1857

if len(nl) == 1 and not maybewdir:

1857

if len(nl) == 1 and not maybewdir:

1858

self._pcache[id] = nl[0]

1858

self._pcache[id] = nl[0]

1859

return nl[0]

1859

return nl[0]

1860

raise AmbiguousPrefixLookupError(id, self.indexfile,

1860

raise AmbiguousPrefixLookupError(id, self.indexfile,

1861

_('ambiguous identifier'))

1861

_('ambiguous identifier'))

1862

if maybewdir:

1862

if maybewdir:

1863

raise error.WdirUnsupported

1863

raise error.WdirUnsupported

1864

return None

1864

return None

1865

except TypeError:

1865

except TypeError:

1866

pass

1866

pass

1867

1868

def lookup(self, id):

1868

def lookup(self, id):

1869

"""locate a node based on:

1869

"""locate a node based on:

1870

- revision number or str(revision number)

1870

- revision number or str(revision number)

1871

- nodeid or subset of hex nodeid

1871

- nodeid or subset of hex nodeid

1872

"""

1872

"""

1873

n = self._match(id)

1873

n = self._match(id)

1874

if n is not None:

1874

if n is not None:

1875

return n

1875

return n

1876

n = self._partialmatch(id)

1876

n = self._partialmatch(id)

1877

if n:

1877

if n:

1878

return n

1878

return n

1879

1880

raise LookupError(id, self.indexfile, _('no match found'))

1880

raise LookupError(id, self.indexfile, _('no match found'))

1881

1882

def shortest(self, node, minlength=1):

1882

def shortest(self, node, minlength=1):

1883

"""Find the shortest unambiguous prefix that matches node."""

1883

"""Find the shortest unambiguous prefix that matches node."""

1884

def isvalid(prefix):

1884

def isvalid(prefix):

1885

try:

1885

try:

1886

node = self._partialmatch(prefix)

1886

node = self._partialmatch(prefix)

1887

except error.RevlogError:

1887

except error.RevlogError:

1888

return False

1888

return False

1889

except error.WdirUnsupported:

1889

except error.WdirUnsupported:

1890

# single 'ff...' match

1890

# single 'ff...' match

1891

return True

1891

return True

1892

if node is None:

1892

if node is None:

1893

raise LookupError(node, self.indexfile, _('no node'))

1893

raise LookupError(node, self.indexfile, _('no node'))

1894

return True

1894

return True

1895

1896

def maybewdir(prefix):

1896

def maybewdir(prefix):

1897

return all(c == 'f' for c in prefix)

1897

return all(c == 'f' for c in prefix)

1898

1899

hexnode = hex(node)

1899

hexnode = hex(node)

1900

1901

def disambiguate(hexnode, minlength):

1901

def disambiguate(hexnode, minlength):

1902

"""Disambiguate against wdirid."""

1902

"""Disambiguate against wdirid."""

1903

for length in range(minlength, 41):

1903

for length in range(minlength, 41):

1904

prefix = hexnode[:length]

1904

prefix = hexnode[:length]

1905

if not maybewdir(prefix):

1905

if not maybewdir(prefix):

1906

return prefix

1906

return prefix

1907

1908

if not getattr(self, 'filteredrevs', None):

1908

if not getattr(self, 'filteredrevs', None):

1909

try:

1909

try:

1910

length = max(self.index.shortest(node), minlength)

1910

length = max(self.index.shortest(node), minlength)

1911

return disambiguate(hexnode, length)

1911

return disambiguate(hexnode, length)

1912

except RevlogError:

1912

except RevlogError:

1913

if node != wdirid:

1913

if node != wdirid:

1914

raise LookupError(node, self.indexfile, _('no node'))

1914

raise LookupError(node, self.indexfile, _('no node'))

1915

except AttributeError:

1915

except AttributeError:

1916

# Fall through to pure code

1916

# Fall through to pure code

1917

pass

1917

pass

1918

1919

if node == wdirid:

1919

if node == wdirid:

1920

for length in range(minlength, 41):

1920

for length in range(minlength, 41):

1921

prefix = hexnode[:length]

1921

prefix = hexnode[:length]

1922

if isvalid(prefix):

1922

if isvalid(prefix):

1923

return prefix

1923

return prefix

1924

1925

for length in range(minlength, 41):

1925

for length in range(minlength, 41):

1926

prefix = hexnode[:length]

1926

prefix = hexnode[:length]

1927

if isvalid(prefix):

1927

if isvalid(prefix):

1928

return disambiguate(hexnode, length)

1928

return disambiguate(hexnode, length)

1929

1930

def cmp(self, node, text):

1930

def cmp(self, node, text):

1931

"""compare text with a given file revision

1931

"""compare text with a given file revision

1932

1933

returns True if text is different than what is stored.

1933

returns True if text is different than what is stored.

1934

"""

1934

"""

1935

p1, p2 = self.parents(node)

1935

p1, p2 = self.parents(node)

1936

return hash(text, p1, p2) != node

1936

return hash(text, p1, p2) != node

1937

1938

def _cachesegment(self, offset, data):

1938

def _cachesegment(self, offset, data):

1939

"""Add a segment to the revlog cache.

1939

"""Add a segment to the revlog cache.

1940

1941

Accepts an absolute offset and the data that is at that location.

1941

Accepts an absolute offset and the data that is at that location.

1942

"""

1942

"""

1943

o, d = self._chunkcache

1943

o, d = self._chunkcache

1944

# try to add to existing cache

1944

# try to add to existing cache

1945

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1945

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1946

self._chunkcache = o, d + data

1946

self._chunkcache = o, d + data

1947

else:

1947

else:

1948

self._chunkcache = offset, data

1948

self._chunkcache = offset, data

1949

1950

def _readsegment(self, offset, length, df=None):

1950

def _readsegment(self, offset, length, df=None):

1951

"""Load a segment of raw data from the revlog.

1951

"""Load a segment of raw data from the revlog.

1952

1953

Accepts an absolute offset, length to read, and an optional existing

1953

Accepts an absolute offset, length to read, and an optional existing

1954

file handle to read from.

1954

file handle to read from.

1955

1956

If an existing file handle is passed, it will be seeked and the

1956

If an existing file handle is passed, it will be seeked and the

1957

original seek position will NOT be restored.

1957

original seek position will NOT be restored.

1958

1959

Returns a str or buffer of raw byte data.

1959

Returns a str or buffer of raw byte data.

1960

"""

1960

"""

1961

# Cache data both forward and backward around the requested

1961

# Cache data both forward and backward around the requested

1962

# data, in a fixed size window. This helps speed up operations

1962

# data, in a fixed size window. This helps speed up operations

1963

# involving reading the revlog backwards.

1963

# involving reading the revlog backwards.

1964

cachesize = self._chunkcachesize

1964

cachesize = self._chunkcachesize

1965

realoffset = offset & ~(cachesize - 1)

1965

realoffset = offset & ~(cachesize - 1)

1966

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1966

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1967

- realoffset)

1967

- realoffset)

1968

with self._datareadfp(df) as df:

1968

with self._datareadfp(df) as df:

1969

df.seek(realoffset)

1969

df.seek(realoffset)

1970

d = df.read(reallength)

1970

d = df.read(reallength)

1971

self._cachesegment(realoffset, d)

1971

self._cachesegment(realoffset, d)

1972

if offset != realoffset or reallength != length:

1972

if offset != realoffset or reallength != length:

1973

return util.buffer(d, offset - realoffset, length)

1973

return util.buffer(d, offset - realoffset, length)

1974

return d

1974

return d

1975

1976

def _getsegment(self, offset, length, df=None):

1976

def _getsegment(self, offset, length, df=None):

1977

"""Obtain a segment of raw data from the revlog.

1977

"""Obtain a segment of raw data from the revlog.

1978

1979

Accepts an absolute offset, length of bytes to obtain, and an

1979

Accepts an absolute offset, length of bytes to obtain, and an

1980

optional file handle to the already-opened revlog. If the file

1980

optional file handle to the already-opened revlog. If the file

1981

handle is used, it's original seek position will not be preserved.

1981

handle is used, it's original seek position will not be preserved.

1982

1983

Requests for data may be returned from a cache.

1983

Requests for data may be returned from a cache.

1984

1985

Returns a str or a buffer instance of raw byte data.

1985

Returns a str or a buffer instance of raw byte data.

1986

"""

1986

"""

1987

o, d = self._chunkcache

1987

o, d = self._chunkcache

1988

l = len(d)

1988

l = len(d)

1989

1990

# is it in the cache?

1990

# is it in the cache?

1991

cachestart = offset - o

1991

cachestart = offset - o

1992

cacheend = cachestart + length

1992

cacheend = cachestart + length

1993

if cachestart >= 0 and cacheend <= l:

1993

if cachestart >= 0 and cacheend <= l:

1994

if cachestart == 0 and cacheend == l:

1994

if cachestart == 0 and cacheend == l:

1995

return d # avoid a copy

1995

return d # avoid a copy

1996

return util.buffer(d, cachestart, cacheend - cachestart)

1996

return util.buffer(d, cachestart, cacheend - cachestart)

1997

1998

return self._readsegment(offset, length, df=df)

1998

return self._readsegment(offset, length, df=df)

1999

2000

def _getsegmentforrevs(self, startrev, endrev, df=None):

2000

def _getsegmentforrevs(self, startrev, endrev, df=None):

2001

"""Obtain a segment of raw data corresponding to a range of revisions.

2001

"""Obtain a segment of raw data corresponding to a range of revisions.

2002

2003

Accepts the start and end revisions and an optional already-open

2003

Accepts the start and end revisions and an optional already-open

2004

file handle to be used for reading. If the file handle is read, its

2004

file handle to be used for reading. If the file handle is read, its

2005

seek position will not be preserved.

2005

seek position will not be preserved.

2006

2007

Requests for data may be satisfied by a cache.

2007

Requests for data may be satisfied by a cache.

2008

2009

Returns a 2-tuple of (offset, data) for the requested range of

2009

Returns a 2-tuple of (offset, data) for the requested range of

2010

revisions. Offset is the integer offset from the beginning of the

2010

revisions. Offset is the integer offset from the beginning of the

2011

revlog and data is a str or buffer of the raw byte data.

2011

revlog and data is a str or buffer of the raw byte data.

2012

2013

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2013

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2014

to determine where each revision's data begins and ends.

2014

to determine where each revision's data begins and ends.

2015

"""

2015

"""

2016

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2016

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2017

# (functions are expensive).

2017

# (functions are expensive).

2018

index = self.index

2018

index = self.index

2019

istart = index[startrev]

2019

istart = index[startrev]

2020

start = int(istart[0] >> 16)

2020

start = int(istart[0] >> 16)

2021

if startrev == endrev:

2021

if startrev == endrev:

2022

end = start + istart[1]

2022

end = start + istart[1]

2023

else:

2023

else:

2024

iend = index[endrev]

2024

iend = index[endrev]

2025

end = int(iend[0] >> 16) + iend[1]

2025

end = int(iend[0] >> 16) + iend[1]

2026

2027

if self._inline:

2027

if self._inline:

2028

start += (startrev + 1) * self._io.size

2028

start += (startrev + 1) * self._io.size

2029

end += (endrev + 1) * self._io.size

2029

end += (endrev + 1) * self._io.size

2030

length = end - start

2030

length = end - start

2031

2032

return start, self._getsegment(start, length, df=df)

2032

return start, self._getsegment(start, length, df=df)

2033

2034

def _chunk(self, rev, df=None):

2034

def _chunk(self, rev, df=None):

2035

"""Obtain a single decompressed chunk for a revision.

2035

"""Obtain a single decompressed chunk for a revision.

2036

2037

Accepts an integer revision and an optional already-open file handle

2037

Accepts an integer revision and an optional already-open file handle

2038

to be used for reading. If used, the seek position of the file will not

2038

to be used for reading. If used, the seek position of the file will not

2039

be preserved.

2039

be preserved.

2040

2041

Returns a str holding uncompressed data for the requested revision.

2041

Returns a str holding uncompressed data for the requested revision.

2042

"""

2042

"""

2043

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2043

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2044

2045

def _chunks(self, revs, df=None, targetsize=None):

2045

def _chunks(self, revs, df=None, targetsize=None):

2046

"""Obtain decompressed chunks for the specified revisions.

2046

"""Obtain decompressed chunks for the specified revisions.

2047

2048

Accepts an iterable of numeric revisions that are assumed to be in

2048

Accepts an iterable of numeric revisions that are assumed to be in

2049

ascending order. Also accepts an optional already-open file handle

2049

ascending order. Also accepts an optional already-open file handle

2050

to be used for reading. If used, the seek position of the file will

2050

to be used for reading. If used, the seek position of the file will

2051

not be preserved.

2051

not be preserved.

2052

2053

This function is similar to calling ``self._chunk()`` multiple times,

2053

This function is similar to calling ``self._chunk()`` multiple times,

2054

but is faster.

2054

but is faster.

2055

2056

Returns a list with decompressed data for each requested revision.

2056

Returns a list with decompressed data for each requested revision.

2057

"""

2057

"""

2058

if not revs:

2058

if not revs:

2059

return []

2059

return []

2060

start = self.start

2060

start = self.start

2061

length = self.length

2061

length = self.length

2062

inline = self._inline

2062

inline = self._inline

2063

iosize = self._io.size

2063

iosize = self._io.size

2064

buffer = util.buffer

2064

buffer = util.buffer

2065

2066

l = []

2066

l = []

2067

ladd = l.append

2067

ladd = l.append

2068

2069

if not self._withsparseread:

2069

if not self._withsparseread:

2070

slicedchunks = (revs,)

2070

slicedchunks = (revs,)

2071

else:

2071

else:

2072

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2072

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2073

2074

for revschunk in slicedchunks:

2074

for revschunk in slicedchunks:

2075

firstrev = revschunk[0]

2075

firstrev = revschunk[0]

2076

# Skip trailing revisions with empty diff

2076

# Skip trailing revisions with empty diff

2077

for lastrev in revschunk[::-1]:

2077

for lastrev in revschunk[::-1]:

2078

if length(lastrev) != 0:

2078

if length(lastrev) != 0:

2079

break

2079

break

2080

2081

try:

2081

try:

2082

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2082

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2083

except OverflowError:

2083

except OverflowError:

2084

# issue4215 - we can't cache a run of chunks greater than

2084

# issue4215 - we can't cache a run of chunks greater than

2085

# 2G on Windows

2085

# 2G on Windows

2086

return [self._chunk(rev, df=df) for rev in revschunk]

2086

return [self._chunk(rev, df=df) for rev in revschunk]

2087

2088

decomp = self.decompress

2088

decomp = self.decompress

2089

for rev in revschunk:

2089

for rev in revschunk:

2090

chunkstart = start(rev)

2090

chunkstart = start(rev)

2091

if inline:

2091

if inline:

2092

chunkstart += (rev + 1) * iosize

2092

chunkstart += (rev + 1) * iosize

2093

chunklength = length(rev)

2093

chunklength = length(rev)

2094

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2094

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2095

2096

return l

2096

return l

2097

2098

def _chunkclear(self):

2098

def _chunkclear(self):

2099

"""Clear the raw chunk cache."""

2099

"""Clear the raw chunk cache."""

2100

self._chunkcache = (0, '')

2100

self._chunkcache = (0, '')

2101

2102

def deltaparent(self, rev):

2102

def deltaparent(self, rev):

2103

"""return deltaparent of the given revision"""

2103

"""return deltaparent of the given revision"""

2104

base = self.index[rev][3]

2104

base = self.index[rev][3]

2105

if base == rev:

2105

if base == rev:

2106

return nullrev

2106

return nullrev

2107

elif self._generaldelta:

2107

elif self._generaldelta:

2108

return base

2108

return base

2109

else:

2109

else:

2110

return rev - 1

2110

return rev - 1

2111

2112

def issnapshot(self, rev):

2112

def issnapshot(self, rev):

2113

"""tells whether rev is a snapshot

2113

"""tells whether rev is a snapshot

2114

"""

2114

"""

2115

if rev == nullrev:

2115

if rev == nullrev:

2116

return True

2116

return True

2117

deltap = self.deltaparent(rev)

2117

deltap = self.deltaparent(rev)

2118

if deltap == nullrev:

2118

if deltap == nullrev:

2119

return True

2119

return True

2120

p1, p2 = self.parentrevs(rev)

2120

p1, p2 = self.parentrevs(rev)

2121

if deltap in (p1, p2):

2121

if deltap in (p1, p2):

2122

return False

2122

return False

2123

return self.issnapshot(deltap)

2123

return self.issnapshot(deltap)

2124

2125

def snapshotdepth(self, rev):

2125

def snapshotdepth(self, rev):

2126

"""number of snapshot in the chain before this one"""

2126

"""number of snapshot in the chain before this one"""

2127

if not self.issnapshot(rev):

2127

if not self.issnapshot(rev):

2128

raise ProgrammingError('revision %d not a snapshot')

2128

raise ProgrammingError('revision %d not a snapshot')

2129

return len(self._deltachain(rev)[0]) - 1

2129

return len(self._deltachain(rev)[0]) - 1

2130

2131

def revdiff(self, rev1, rev2):

2131

def revdiff(self, rev1, rev2):

2132

"""return or calculate a delta between two revisions

2132

"""return or calculate a delta between two revisions

2133

2134

The delta calculated is in binary form and is intended to be written to

2134

The delta calculated is in binary form and is intended to be written to

2135

revlog data directly. So this function needs raw revision data.

2135

revlog data directly. So this function needs raw revision data.

2136

"""

2136

"""

2137

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2137

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2138

return bytes(self._chunk(rev2))

2138

return bytes(self._chunk(rev2))

2139

2140

return mdiff.textdiff(self.revision(rev1, raw=True),

2140

return mdiff.textdiff(self.revision(rev1, raw=True),

2141

self.revision(rev2, raw=True))

2141

self.revision(rev2, raw=True))

2142

2143

def revision(self, nodeorrev, _df=None, raw=False):

2143

def revision(self, nodeorrev, _df=None, raw=False):

2144

"""return an uncompressed revision of a given node or revision

2144

"""return an uncompressed revision of a given node or revision

2145

number.

2145

number.

2146

2147

_df - an existing file handle to read from. (internal-only)

2147

_df - an existing file handle to read from. (internal-only)

2148

raw - an optional argument specifying if the revision data is to be

2148

raw - an optional argument specifying if the revision data is to be

2149

treated as raw data when applying flag transforms. 'raw' should be set

2149

treated as raw data when applying flag transforms. 'raw' should be set

2150

to True when generating changegroups or in debug commands.

2150

to True when generating changegroups or in debug commands.

2151

"""

2151

"""

2152

if isinstance(nodeorrev, int):

2152

if isinstance(nodeorrev, int):

2153

rev = nodeorrev

2153

rev = nodeorrev

2154

node = self.node(rev)

2154

node = self.node(rev)

2155

else:

2155

else:

2156

node = nodeorrev

2156

node = nodeorrev

2157

rev = None

2157

rev = None

2158

2159

cachedrev = None

2159

cachedrev = None

2160

flags = None

2160

flags = None

2161

rawtext = None

2161

rawtext = None

2162

if node == nullid:

2162

if node == nullid:

2163

return ""

2163

return ""

2164

if self._cache:

2164

if self._cache:

2165

if self._cache[0] == node:

2165

if self._cache[0] == node:

2166

# _cache only stores rawtext

2166

# _cache only stores rawtext

2167

if raw:

2167

if raw:

2168

return self._cache[2]

2168

return self._cache[2]

2169

# duplicated, but good for perf

2169

# duplicated, but good for perf

2170

if rev is None:

2170

if rev is None:

2171

rev = self.rev(node)

2171

rev = self.rev(node)

2172

if flags is None:

2172

if flags is None:

2173

flags = self.flags(rev)

2173

flags = self.flags(rev)

2174

# no extra flags set, no flag processor runs, text = rawtext

2174

# no extra flags set, no flag processor runs, text = rawtext

2175

if flags == REVIDX_DEFAULT_FLAGS:

2175

if flags == REVIDX_DEFAULT_FLAGS:

2176

return self._cache[2]

2176

return self._cache[2]

2177

# rawtext is reusable. need to run flag processor

2177

# rawtext is reusable. need to run flag processor

2178

rawtext = self._cache[2]

2178

rawtext = self._cache[2]

2179

2180

cachedrev = self._cache[1]

2180

cachedrev = self._cache[1]

2181

2182

# look up what we need to read

2182

# look up what we need to read

2183

if rawtext is None:

2183

if rawtext is None:

2184

if rev is None:

2184

if rev is None:

2185

rev = self.rev(node)

2185

rev = self.rev(node)

2186

2187

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2187

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2188

if stopped:

2188

if stopped:

2189

rawtext = self._cache[2]

2189

rawtext = self._cache[2]

2190

2191

# drop cache to save memory

2191

# drop cache to save memory

2192

self._cache = None

2192

self._cache = None

2193

2194

targetsize = None

2194

targetsize = None

2195

rawsize = self.index[rev][2]

2195

rawsize = self.index[rev][2]

2196

if 0 <= rawsize:

2196

if 0 <= rawsize:

2197

targetsize = 4 * rawsize

2197

targetsize = 4 * rawsize

2198

2199

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2199

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2200

if rawtext is None:

2200

if rawtext is None:

2201

rawtext = bytes(bins[0])

2201

rawtext = bytes(bins[0])

2202

bins = bins[1:]

2202

bins = bins[1:]

2203

2204

rawtext = mdiff.patches(rawtext, bins)

2204

rawtext = mdiff.patches(rawtext, bins)

2205

self._cache = (node, rev, rawtext)

2205

self._cache = (node, rev, rawtext)

2206

2207

if flags is None:

2207

if flags is None:

2208

if rev is None:

2208

if rev is None:

2209

rev = self.rev(node)

2209

rev = self.rev(node)

2210

flags = self.flags(rev)

2210

flags = self.flags(rev)

2211

2212

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2212

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2213

if validatehash:

2213

if validatehash:

2214

self.checkhash(text, node, rev=rev)

2214

self.checkhash(text, node, rev=rev)

2215

2216

return text

2216

return text

2217

2218

def hash(self, text, p1, p2):

2218

def hash(self, text, p1, p2):

2219

"""Compute a node hash.

2219

"""Compute a node hash.

2220

2221

Available as a function so that subclasses can replace the hash

2221

Available as a function so that subclasses can replace the hash

2222

as needed.

2222

as needed.

2223

"""

2223

"""

2224

return hash(text, p1, p2)

2224

return hash(text, p1, p2)

2225

2226

def _processflags(self, text, flags, operation, raw=False):

2226

def _processflags(self, text, flags, operation, raw=False):

2227

"""Inspect revision data flags and applies transforms defined by

2227

"""Inspect revision data flags and applies transforms defined by

2228

registered flag processors.

2228

registered flag processors.

2229

2230

``text`` - the revision data to process

2230

``text`` - the revision data to process

2231

``flags`` - the revision flags

2231

``flags`` - the revision flags

2232

``operation`` - the operation being performed (read or write)

2232

``operation`` - the operation being performed (read or write)

2233

``raw`` - an optional argument describing if the raw transform should be

2233

``raw`` - an optional argument describing if the raw transform should be

2234

applied.

2234

applied.

2235

2236

This method processes the flags in the order (or reverse order if

2236

This method processes the flags in the order (or reverse order if

2237

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2237

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2238

flag processors registered for present flags. The order of flags defined

2238

flag processors registered for present flags. The order of flags defined

2239

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2239

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2240

2241

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2241

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2242

processed text and ``validatehash`` is a bool indicating whether the

2242

processed text and ``validatehash`` is a bool indicating whether the

2243

returned text should be checked for hash integrity.

2243

returned text should be checked for hash integrity.

2244

2245

Note: If the ``raw`` argument is set, it has precedence over the

2245

Note: If the ``raw`` argument is set, it has precedence over the

2246

operation and will only update the value of ``validatehash``.

2246

operation and will only update the value of ``validatehash``.

2247

"""

2247

"""

2248

# fast path: no flag processors will run

2248

# fast path: no flag processors will run

2249

if flags == 0:

2249

if flags == 0:

2250

return text, True

2250

return text, True

2251

if not operation in ('read', 'write'):

2251

if not operation in ('read', 'write'):

2252

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2252

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2253

# Check all flags are known.

2253

# Check all flags are known.

2254

if flags & ~REVIDX_KNOWN_FLAGS:

2254

if flags & ~REVIDX_KNOWN_FLAGS:

2255

raise RevlogError(_("incompatible revision flag '%#x'") %

2255

raise RevlogError(_("incompatible revision flag '%#x'") %

2256

(flags & ~REVIDX_KNOWN_FLAGS))

2256

(flags & ~REVIDX_KNOWN_FLAGS))

2257

validatehash = True

2257

validatehash = True

2258

# Depending on the operation (read or write), the order might be

2258

# Depending on the operation (read or write), the order might be

2259

# reversed due to non-commutative transforms.

2259

# reversed due to non-commutative transforms.

2260

orderedflags = REVIDX_FLAGS_ORDER

2260

orderedflags = REVIDX_FLAGS_ORDER

2261

if operation == 'write':

2261

if operation == 'write':

2262

orderedflags = reversed(orderedflags)

2262

orderedflags = reversed(orderedflags)

2263

2264

for flag in orderedflags:

2264

for flag in orderedflags:

2265

# If a flagprocessor has been registered for a known flag, apply the

2265

# If a flagprocessor has been registered for a known flag, apply the

2266

# related operation transform and update result tuple.

2266

# related operation transform and update result tuple.

2267

if flag & flags:

2267

if flag & flags:

2268

vhash = True

2268

vhash = True

2269

2270

if flag not in _flagprocessors:

2270

if flag not in _flagprocessors:

2271

message = _("missing processor for flag '%#x'") % (flag)

2271

message = _("missing processor for flag '%#x'") % (flag)

2272

raise RevlogError(message)

2272

raise RevlogError(message)

2273

2274

processor = _flagprocessors[flag]

2274

processor = _flagprocessors[flag]

2275

if processor is not None:

2275

if processor is not None:

2276

readtransform, writetransform, rawtransform = processor

2276

readtransform, writetransform, rawtransform = processor

2277

2278

if raw:

2278

if raw:

2279

vhash = rawtransform(self, text)

2279

vhash = rawtransform(self, text)

2280

elif operation == 'read':

2280

elif operation == 'read':

2281

text, vhash = readtransform(self, text)

2281

text, vhash = readtransform(self, text)

2282

else: # write operation

2282

else: # write operation

2283

text, vhash = writetransform(self, text)

2283

text, vhash = writetransform(self, text)

2284

validatehash = validatehash and vhash

2284

validatehash = validatehash and vhash

2285

2286

return text, validatehash

2286

return text, validatehash

2287

2288

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2288

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2289

"""Check node hash integrity.

2289

"""Check node hash integrity.

2290

2291

Available as a function so that subclasses can extend hash mismatch

2291

Available as a function so that subclasses can extend hash mismatch

2292

behaviors as needed.

2292

behaviors as needed.

2293

"""

2293

"""

2294

try:

2294

try:

2295

if p1 is None and p2 is None:

2295

if p1 is None and p2 is None:

2296

p1, p2 = self.parents(node)

2296

p1, p2 = self.parents(node)

2297

if node != self.hash(text, p1, p2):

2297

if node != self.hash(text, p1, p2):

2298

revornode = rev

2298

revornode = rev

2299

if revornode is None:

2299

if revornode is None:

2300

revornode = templatefilters.short(hex(node))

2300

revornode = templatefilters.short(hex(node))

2301

raise RevlogError(_("integrity check failed on %s:%s")

2301

raise RevlogError(_("integrity check failed on %s:%s")

2302

% (self.indexfile, pycompat.bytestr(revornode)))

2302

% (self.indexfile, pycompat.bytestr(revornode)))

2303

except RevlogError:

2303

except RevlogError:

2304

if self._censorable and _censoredtext(text):

2304

if self._censorable and _censoredtext(text):

2305

raise error.CensoredNodeError(self.indexfile, node, text)

2305

raise error.CensoredNodeError(self.indexfile, node, text)

2306

raise

2306

raise

2307

2308

def _enforceinlinesize(self, tr, fp=None):

2308

def _enforceinlinesize(self, tr, fp=None):

2309

"""Check if the revlog is too big for inline and convert if so.

2309

"""Check if the revlog is too big for inline and convert if so.

2310

2311

This should be called after revisions are added to the revlog. If the

2311

This should be called after revisions are added to the revlog. If the

2312

revlog has grown too large to be an inline revlog, it will convert it

2312

revlog has grown too large to be an inline revlog, it will convert it

2313

to use multiple index and data files.

2313

to use multiple index and data files.

2314

"""

2314

"""

2315

tiprev = len(self) - 1

2315

tiprev = len(self) - 1

2316

if (not self._inline or

2316

if (not self._inline or

2317

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2317

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2318

return

2318

return

2319

2320

trinfo = tr.find(self.indexfile)

2320

trinfo = tr.find(self.indexfile)

2321

if trinfo is None:

2321

if trinfo is None:

2322

raise RevlogError(_("%s not found in the transaction")

2322

raise RevlogError(_("%s not found in the transaction")

2323

% self.indexfile)

2323

% self.indexfile)

2324

2325

trindex = trinfo[2]

2325

trindex = trinfo[2]

2326

if trindex is not None:

2326

if trindex is not None:

2327

dataoff = self.start(trindex)

2327

dataoff = self.start(trindex)

2328

else:

2328

else:

2329

# revlog was stripped at start of transaction, use all leftover data

2329

# revlog was stripped at start of transaction, use all leftover data

2330

trindex = len(self) - 1

2330

trindex = len(self) - 1

2331

dataoff = self.end(tiprev)

2331

dataoff = self.end(tiprev)

2332

2333

tr.add(self.datafile, dataoff)

2333

tr.add(self.datafile, dataoff)

2334

2335

if fp:

2335

if fp:

2336

fp.flush()

2336

fp.flush()

2337

fp.close()

2337

fp.close()

2338

2339

with self._datafp('w') as df:

2339

with self._datafp('w') as df:

2340

for r in self:

2340

for r in self:

2341

df.write(self._getsegmentforrevs(r, r)[1])

2341

df.write(self._getsegmentforrevs(r, r)[1])

2342

2343

with self._indexfp('w') as fp:

2343

with self._indexfp('w') as fp:

2344

self.version &= ~FLAG_INLINE_DATA

2344

self.version &= ~FLAG_INLINE_DATA

2345

self._inline = False

2345

self._inline = False

2346

io = self._io

2346

io = self._io

2347

for i in self:

2347

for i in self:

2348

e = io.packentry(self.index[i], self.node, self.version, i)

2348

e = io.packentry(self.index[i], self.node, self.version, i)

2349

fp.write(e)

2349

fp.write(e)

2350

2351

# the temp file replace the real index when we exit the context

2351

# the temp file replace the real index when we exit the context

2352

# manager

2352

# manager

2353

2354

tr.replace(self.indexfile, trindex * self._io.size)

2354

tr.replace(self.indexfile, trindex * self._io.size)

2355

self._chunkclear()

2355

self._chunkclear()

2356

2357

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2357

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2358

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2358

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2359

"""add a revision to the log

2359

"""add a revision to the log

2360

2361

text - the revision data to add

2361

text - the revision data to add

2362

transaction - the transaction object used for rollback

2362

transaction - the transaction object used for rollback

2363

link - the linkrev data to add

2363

link - the linkrev data to add

2364

p1, p2 - the parent nodeids of the revision

2364

p1, p2 - the parent nodeids of the revision

2365

cachedelta - an optional precomputed delta

2365

cachedelta - an optional precomputed delta

2366

node - nodeid of revision; typically node is not specified, and it is

2366

node - nodeid of revision; typically node is not specified, and it is

2367

computed by default as hash(text, p1, p2), however subclasses might

2367

computed by default as hash(text, p1, p2), however subclasses might

2368

use different hashing method (and override checkhash() in such case)

2368

use different hashing method (and override checkhash() in such case)

2369

flags - the known flags to set on the revision

2369

flags - the known flags to set on the revision

2370

deltacomputer - an optional _deltacomputer instance shared between

2370

deltacomputer - an optional _deltacomputer instance shared between

2371

multiple calls

2371

multiple calls

2372

"""

2372

"""

2373

if link == nullrev:

2373

if link == nullrev:

2374

raise RevlogError(_("attempted to add linkrev -1 to %s")

2374

raise RevlogError(_("attempted to add linkrev -1 to %s")

2375

% self.indexfile)

2375

% self.indexfile)

2376

2377

if flags:

2377

if flags:

2378

node = node or self.hash(text, p1, p2)

2378

node = node or self.hash(text, p1, p2)

2379

2380

rawtext, validatehash = self._processflags(text, flags, 'write')

2380

rawtext, validatehash = self._processflags(text, flags, 'write')

2381

2382

# If the flag processor modifies the revision data, ignore any provided

2382

# If the flag processor modifies the revision data, ignore any provided

2383

# cachedelta.

2383

# cachedelta.

2384

if rawtext != text:

2384

if rawtext != text:

2385

cachedelta = None

2385

cachedelta = None

2386

2387

if len(rawtext) > _maxentrysize:

2387

if len(rawtext) > _maxentrysize:

2388

raise RevlogError(

2388

raise RevlogError(

2389

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2389

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2390

% (self.indexfile, len(rawtext)))

2390

% (self.indexfile, len(rawtext)))

2391

2392

node = node or self.hash(rawtext, p1, p2)

2392

node = node or self.hash(rawtext, p1, p2)

2393

if node in self.nodemap:

2393

if node in self.nodemap:

2394

return node

2394

return node

2395

2396

if validatehash:

2396

if validatehash:

2397

self.checkhash(rawtext, node, p1=p1, p2=p2)

2397

self.checkhash(rawtext, node, p1=p1, p2=p2)

2398

2399

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2399

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2400

flags, cachedelta=cachedelta,

2400

flags, cachedelta=cachedelta,

2401

deltacomputer=deltacomputer)

2401

deltacomputer=deltacomputer)

2402

2403

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2403

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2404

cachedelta=None, deltacomputer=None):

2404

cachedelta=None, deltacomputer=None):

2405

"""add a raw revision with known flags, node and parents

2405

"""add a raw revision with known flags, node and parents

2406

useful when reusing a revision not stored in this revlog (ex: received

2406

useful when reusing a revision not stored in this revlog (ex: received

2407

over wire, or read from an external bundle).

2407

over wire, or read from an external bundle).

2408

"""

2408

"""

2409

dfh = None

2409

dfh = None

2410

if not self._inline:

2410

if not self._inline:

2411

dfh = self._datafp("a+")

2411

dfh = self._datafp("a+")

2412

ifh = self._indexfp("a+")

2412

ifh = self._indexfp("a+")

2413

try:

2413

try:

2414

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2414

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2415

flags, cachedelta, ifh, dfh,

2415

flags, cachedelta, ifh, dfh,

2416

deltacomputer=deltacomputer)

2416

deltacomputer=deltacomputer)

2417

finally:

2417

finally:

2418

if dfh:

2418

if dfh:

2419

dfh.close()

2419

dfh.close()

2420

ifh.close()

2420

ifh.close()

2421

2422

def compress(self, data):

2422

def compress(self, data):

2423

"""Generate a possibly-compressed representation of data."""

2423

"""Generate a possibly-compressed representation of data."""

2424

if not data:

2424

if not data:

2425

return '', data

2425

return '', data

2426

2427

compressed = self._compressor.compress(data)

2427

compressed = self._compressor.compress(data)

2428

2429

if compressed:

2429

if compressed:

2430

# The revlog compressor added the header in the returned data.

2430

# The revlog compressor added the header in the returned data.

2431

return '', compressed

2431

return '', compressed

2432

2433

if data[0:1] == '\0':

2433

if data[0:1] == '\0':

2434

return '', data

2434

return '', data

2435

return 'u', data

2435

return 'u', data

2436

2437

def decompress(self, data):

2437

def decompress(self, data):

2438

"""Decompress a revlog chunk.

2438

"""Decompress a revlog chunk.

2439

2440

The chunk is expected to begin with a header identifying the

2440

The chunk is expected to begin with a header identifying the

2441

format type so it can be routed to an appropriate decompressor.

2441

format type so it can be routed to an appropriate decompressor.

2442

"""

2442

"""

2443

if not data:

2443

if not data:

2444

return data

2444

return data

2445

2446

# Revlogs are read much more frequently than they are written and many

2446

# Revlogs are read much more frequently than they are written and many

2447

# chunks only take microseconds to decompress, so performance is

2447

# chunks only take microseconds to decompress, so performance is

2448

# important here.

2448

# important here.

2449

#

2449

#

2450

# We can make a few assumptions about revlogs:

2450

# We can make a few assumptions about revlogs:

2451

#

2451

#

2452

# 1) the majority of chunks will be compressed (as opposed to inline

2452

# 1) the majority of chunks will be compressed (as opposed to inline

2453

# raw data).

2453

# raw data).

2454

# 2) decompressing *any* data will likely by at least 10x slower than

2454

# 2) decompressing *any* data will likely by at least 10x slower than

2455

# returning raw inline data.

2455

# returning raw inline data.

2456

# 3) we want to prioritize common and officially supported compression

2456

# 3) we want to prioritize common and officially supported compression

2457

# engines

2457

# engines

2458

#

2458

#

2459

# It follows that we want to optimize for "decompress compressed data

2459

# It follows that we want to optimize for "decompress compressed data

2460

# when encoded with common and officially supported compression engines"

2460

# when encoded with common and officially supported compression engines"

2461

# case over "raw data" and "data encoded by less common or non-official

2461

# case over "raw data" and "data encoded by less common or non-official

2462

# compression engines." That is why we have the inline lookup first

2462

# compression engines." That is why we have the inline lookup first

2463

# followed by the compengines lookup.

2463

# followed by the compengines lookup.

2464

#

2464

#

2465

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2465

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2466

# compressed chunks. And this matters for changelog and manifest reads.

2466

# compressed chunks. And this matters for changelog and manifest reads.

2467

t = data[0:1]

2467

t = data[0:1]

2468

2469

if t == 'x':

2469

if t == 'x':

2470

try:

2470

try:

2471

return _zlibdecompress(data)

2471

return _zlibdecompress(data)

2472

except zlib.error as e:

2472

except zlib.error as e:

2473

raise RevlogError(_('revlog decompress error: %s') %

2473

raise RevlogError(_('revlog decompress error: %s') %

2474

stringutil.forcebytestr(e))

2474

stringutil.forcebytestr(e))

2475

# '\0' is more common than 'u' so it goes first.

2475

# '\0' is more common than 'u' so it goes first.

2476

elif t == '\0':

2476

elif t == '\0':

2477

return data

2477

return data

2478

elif t == 'u':

2478

elif t == 'u':

2479

return util.buffer(data, 1)

2479

return util.buffer(data, 1)

2480

2481

try:

2481

try:

2482

compressor = self._decompressors[t]

2482

compressor = self._decompressors[t]

2483

except KeyError:

2483

except KeyError:

2484

try:

2484

try:

2485

engine = util.compengines.forrevlogheader(t)

2485

engine = util.compengines.forrevlogheader(t)

2486

compressor = engine.revlogcompressor()

2486

compressor = engine.revlogcompressor()

2487

self._decompressors[t] = compressor

2487

self._decompressors[t] = compressor

2488

except KeyError:

2488

except KeyError:

2489

raise RevlogError(_('unknown compression type %r') % t)

2489

raise RevlogError(_('unknown compression type %r') % t)

2490

2491

return compressor.decompress(data)

2491

return compressor.decompress(data)

2492

2493

def _isgooddeltainfo(self, deltainfo, revinfo):

2493

def _isgooddeltainfo(self, deltainfo, revinfo):

2494

"""Returns True if the given delta is good. Good means that it is within

2494

"""Returns True if the given delta is good. Good means that it is within

2495

the disk span, disk size, and chain length bounds that we know to be

2495

the disk span, disk size, and chain length bounds that we know to be

2496

performant."""

2496

performant."""

2497

if deltainfo is None:

2497

if deltainfo is None:

2498

return False

2498

return False

2499

2500

# - 'deltainfo.distance' is the distance from the base revision --

2500

# - 'deltainfo.distance' is the distance from the base revision --

2501

# bounding it limits the amount of I/O we need to do.

2501

# bounding it limits the amount of I/O we need to do.

2502

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2502

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2503

# deltas we need to apply -- bounding it limits the amount of CPU

2503

# deltas we need to apply -- bounding it limits the amount of CPU

2504

# we consume.

2504

# we consume.

2505

2506

if self._sparserevlog:

2506

if self._sparserevlog:

2507

# As sparse-read will be used, we can consider that the distance,

2507

# As sparse-read will be used, we can consider that the distance,

2508

# instead of being the span of the whole chunk,

2508

# instead of being the span of the whole chunk,

2509

# is the span of the largest read chunk

2509

# is the span of the largest read chunk

2510

base = deltainfo.base

2510

base = deltainfo.base

2511

2512

if base != nullrev:

2512

if base != nullrev:

2513

deltachain = self._deltachain(base)[0]

2513

deltachain = self._deltachain(base)[0]

2514

else:

2514

else:

2515

deltachain = []

2515

deltachain = []

2516

2517

# search for the first non-snapshot revision

2518

for idx, r in enumerate(deltachain):

2519

if not self.issnapshot(r):

2520

break

2521

deltachain = deltachain[idx:]

2517

chunks = _slicechunk(self, deltachain, deltainfo)

2522

chunks = _slicechunk(self, deltachain, deltainfo)

2518

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2523

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2519

distance = max(all_span)

2524

distance = max(all_span)

2520

else:

2525

else:

2521

distance = deltainfo.distance

2526

distance = deltainfo.distance

2522

2527

2523

textlen = revinfo.textlen

2528

textlen = revinfo.textlen

2524

defaultmax = textlen * 4

2529

defaultmax = textlen * 4

2525

maxdist = self._maxdeltachainspan

2530

maxdist = self._maxdeltachainspan

2526

if not maxdist:

2531

if not maxdist:

2527

maxdist = distance # ensure the conditional pass

2532

maxdist = distance # ensure the conditional pass

2528

maxdist = max(maxdist, defaultmax)

2533

maxdist = max(maxdist, defaultmax)

2529

if self._sparserevlog and maxdist < self._srmingapsize:

2534

if self._sparserevlog and maxdist < self._srmingapsize:

2530

# In multiple place, we are ignoring irrelevant data range below a

2535

# In multiple place, we are ignoring irrelevant data range below a

2531

# certain size. Be also apply this tradeoff here and relax span

2536

# certain size. Be also apply this tradeoff here and relax span

2532

# constraint for small enought content.

2537

# constraint for small enought content.

2533

maxdist = self._srmingapsize

2538

maxdist = self._srmingapsize

2534

2539

2535

# Bad delta from read span:

2540

# Bad delta from read span:

2536

#

2541

#

2537

# If the span of data read is larger than the maximum allowed.

2542

# If the span of data read is larger than the maximum allowed.

2538

if maxdist < distance:

2543

if maxdist < distance:

2539

return False

2544

return False

2540

2545

2541

# Bad delta from new delta size:

2546

# Bad delta from new delta size:

2542

#

2547

#

2543

# If the delta size is larger than the target text, storing the

2548

# If the delta size is larger than the target text, storing the

2544

# delta will be inefficient.

2549

# delta will be inefficient.

2545

if textlen < deltainfo.deltalen:

2550

if textlen < deltainfo.deltalen:

2546

return False

2551

return False

2547

2552

2548

# Bad delta from cumulated payload size:

2553

# Bad delta from cumulated payload size:

2549

#

2554

#

2550

# If the sum of delta get larger than K * target text length.

2555

# If the sum of delta get larger than K * target text length.

2551

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2556

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2552

return False

2557

return False

2553

2558

2554

# Bad delta from chain length:

2559

# Bad delta from chain length:

2555

#

2560

#

2556

# If the number of delta in the chain gets too high.

2561

# If the number of delta in the chain gets too high.

2557

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2562

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2558

return False

2563

return False

2559

2564

2560

# bad delta from intermediate snapshot size limit

2565

# bad delta from intermediate snapshot size limit

2561

#

2566

#

2562

# If an intermediate snapshot size is higher than the limit. The

2567

# If an intermediate snapshot size is higher than the limit. The

2563

# limit exist to prevent endless chain of intermediate delta to be

2568

# limit exist to prevent endless chain of intermediate delta to be

2564

# created.

2569

# created.

2565

if (deltainfo.snapshotdepth is not None and

2570

if (deltainfo.snapshotdepth is not None and

2566

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

2571

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

2567

return False

2572

return False

2568

2573

2569

# bad delta if new intermediate snapshot is larger than the previous

2574

# bad delta if new intermediate snapshot is larger than the previous

2570

# snapshot

2575

# snapshot

2571

if (deltainfo.snapshotdepth

2576

if (deltainfo.snapshotdepth

2572

and self.length(deltainfo.base) < deltainfo.deltalen):

2577

and self.length(deltainfo.base) < deltainfo.deltalen):

2573

return False

2578

return False

2574

2579

2575

return True

2580

return True

2576

2581

2577

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2582

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2578

cachedelta, ifh, dfh, alwayscache=False,

2583

cachedelta, ifh, dfh, alwayscache=False,

2579

deltacomputer=None):

2584

deltacomputer=None):

2580

"""internal function to add revisions to the log

2585

"""internal function to add revisions to the log

2581

2586

2582

see addrevision for argument descriptions.

2587

see addrevision for argument descriptions.

2583

2588

2584

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2589

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2585

2590

2586

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2591

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2587

be used.

2592

be used.

2588

2593

2589

invariants:

2594

invariants:

2590

- rawtext is optional (can be None); if not set, cachedelta must be set.

2595

- rawtext is optional (can be None); if not set, cachedelta must be set.

2591

if both are set, they must correspond to each other.

2596

if both are set, they must correspond to each other.

2592

"""

2597

"""

2593

if node == nullid:

2598

if node == nullid:

2594

raise RevlogError(_("%s: attempt to add null revision") %

2599

raise RevlogError(_("%s: attempt to add null revision") %

2595

(self.indexfile))

2600

(self.indexfile))

2596

if node == wdirid or node in wdirfilenodeids:

2601

if node == wdirid or node in wdirfilenodeids:

2597

raise RevlogError(_("%s: attempt to add wdir revision") %

2602

raise RevlogError(_("%s: attempt to add wdir revision") %

2598

(self.indexfile))

2603

(self.indexfile))

2599

2604

2600

if self._inline:

2605

if self._inline:

2601

fh = ifh

2606

fh = ifh

2602

else:

2607

else:

2603

fh = dfh

2608

fh = dfh

2604

2609

2605

btext = [rawtext]

2610

btext = [rawtext]

2606

2611

2607

curr = len(self)

2612

curr = len(self)

2608

prev = curr - 1

2613

prev = curr - 1

2609

offset = self.end(prev)

2614

offset = self.end(prev)

2610

p1r, p2r = self.rev(p1), self.rev(p2)

2615

p1r, p2r = self.rev(p1), self.rev(p2)

2611

2616

2612

# full versions are inserted when the needed deltas

2617

# full versions are inserted when the needed deltas

2613

# become comparable to the uncompressed text

2618

# become comparable to the uncompressed text

2614

if rawtext is None:

2619

if rawtext is None:

2615

# need rawtext size, before changed by flag processors, which is

2620

# need rawtext size, before changed by flag processors, which is

2616

# the non-raw size. use revlog explicitly to avoid filelog's extra

2621

# the non-raw size. use revlog explicitly to avoid filelog's extra

2617

# logic that might remove metadata size.

2622

# logic that might remove metadata size.

2618

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2623

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2619

cachedelta[1])

2624

cachedelta[1])

2620

else:

2625

else:

2621

textlen = len(rawtext)

2626

textlen = len(rawtext)

2622

2627

2623

if deltacomputer is None:

2628

if deltacomputer is None:

2624

deltacomputer = _deltacomputer(self)

2629

deltacomputer = _deltacomputer(self)

2625

2630

2626

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2631

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2627

2632

2628

# no delta for flag processor revision (see "candelta" for why)

2633

# no delta for flag processor revision (see "candelta" for why)

2629

# not calling candelta since only one revision needs test, also to

2634

# not calling candelta since only one revision needs test, also to

2630

# avoid overhead fetching flags again.

2635

# avoid overhead fetching flags again.

2631

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2636

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2632

deltainfo = None

2637

deltainfo = None

2633

else:

2638

else:

2634

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2639

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2635

2640

2636

if deltainfo is not None:

2641

if deltainfo is not None:

2637

base = deltainfo.base

2642

base = deltainfo.base

2638

chainbase = deltainfo.chainbase

2643

chainbase = deltainfo.chainbase

2639

data = deltainfo.data

2644

data = deltainfo.data

2640

l = deltainfo.deltalen

2645

l = deltainfo.deltalen

2641

else:

2646

else:

2642

rawtext = deltacomputer.buildtext(revinfo, fh)

2647

rawtext = deltacomputer.buildtext(revinfo, fh)

2643

data = self.compress(rawtext)

2648

data = self.compress(rawtext)

2644

l = len(data[1]) + len(data[0])

2649

l = len(data[1]) + len(data[0])

2645

base = chainbase = curr

2650

base = chainbase = curr

2646

2651

2647

e = (offset_type(offset, flags), l, textlen,

2652

e = (offset_type(offset, flags), l, textlen,

2648

base, link, p1r, p2r, node)

2653

base, link, p1r, p2r, node)

2649

self.index.append(e)

2654

self.index.append(e)

2650

self.nodemap[node] = curr

2655

self.nodemap[node] = curr

2651

2656

2652

entry = self._io.packentry(e, self.node, self.version, curr)

2657

entry = self._io.packentry(e, self.node, self.version, curr)

2653

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2658

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2654

2659

2655

if alwayscache and rawtext is None:

2660

if alwayscache and rawtext is None:

2656

rawtext = deltacomputer._buildtext(revinfo, fh)

2661

rawtext = deltacomputer._buildtext(revinfo, fh)

2657

2662

2658

if type(rawtext) == bytes: # only accept immutable objects

2663

if type(rawtext) == bytes: # only accept immutable objects

2659

self._cache = (node, curr, rawtext)

2664

self._cache = (node, curr, rawtext)

2660

self._chainbasecache[curr] = chainbase

2665

self._chainbasecache[curr] = chainbase

2661

return node

2666

return node

2662

2667

2663

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2668

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2664

# Files opened in a+ mode have inconsistent behavior on various

2669

# Files opened in a+ mode have inconsistent behavior on various

2665

# platforms. Windows requires that a file positioning call be made

2670

# platforms. Windows requires that a file positioning call be made

2666

# when the file handle transitions between reads and writes. See

2671

# when the file handle transitions between reads and writes. See

2667

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2672

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2668

# platforms, Python or the platform itself can be buggy. Some versions

2673

# platforms, Python or the platform itself can be buggy. Some versions

2669

# of Solaris have been observed to not append at the end of the file

2674

# of Solaris have been observed to not append at the end of the file

2670

# if the file was seeked to before the end. See issue4943 for more.

2675

# if the file was seeked to before the end. See issue4943 for more.

2671

#

2676

#

2672

# We work around this issue by inserting a seek() before writing.

2677

# We work around this issue by inserting a seek() before writing.

2673

# Note: This is likely not necessary on Python 3.

2678

# Note: This is likely not necessary on Python 3.

2674

ifh.seek(0, os.SEEK_END)

2679

ifh.seek(0, os.SEEK_END)

2675

if dfh:

2680

if dfh:

2676

dfh.seek(0, os.SEEK_END)

2681

dfh.seek(0, os.SEEK_END)

2677

2682

2678

curr = len(self) - 1

2683

curr = len(self) - 1

2679

if not self._inline:

2684

if not self._inline:

2680

transaction.add(self.datafile, offset)

2685

transaction.add(self.datafile, offset)

2681

transaction.add(self.indexfile, curr * len(entry))

2686

transaction.add(self.indexfile, curr * len(entry))

2682

if data[0]:

2687

if data[0]:

2683

dfh.write(data[0])

2688

dfh.write(data[0])

2684

dfh.write(data[1])

2689

dfh.write(data[1])

2685

ifh.write(entry)

2690

ifh.write(entry)

2686

else:

2691

else:

2687

offset += curr * self._io.size

2692

offset += curr * self._io.size

2688

transaction.add(self.indexfile, offset, curr)

2693

transaction.add(self.indexfile, offset, curr)

2689

ifh.write(entry)

2694

ifh.write(entry)

2690

ifh.write(data[0])

2695

ifh.write(data[0])

2691

ifh.write(data[1])

2696

ifh.write(data[1])

2692

self._enforceinlinesize(transaction, ifh)

2697

self._enforceinlinesize(transaction, ifh)

2693

2698

2694

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2699

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2695

"""

2700

"""

2696

add a delta group

2701

add a delta group

2697

2702

2698

given a set of deltas, add them to the revision log. the

2703

given a set of deltas, add them to the revision log. the

2699

first delta is against its parent, which should be in our

2704

first delta is against its parent, which should be in our

2700

log, the rest are against the previous delta.

2705

log, the rest are against the previous delta.

2701

2706

2702

If ``addrevisioncb`` is defined, it will be called with arguments of

2707

If ``addrevisioncb`` is defined, it will be called with arguments of

2703

this revlog and the node that was added.

2708

this revlog and the node that was added.

2704

"""

2709

"""

2705

2710

2706

nodes = []

2711

nodes = []

2707

2712

2708

r = len(self)

2713

r = len(self)

2709

end = 0

2714

end = 0

2710

if r:

2715

if r:

2711

end = self.end(r - 1)

2716

end = self.end(r - 1)

2712

ifh = self._indexfp("a+")

2717

ifh = self._indexfp("a+")

2713

isize = r * self._io.size

2718

isize = r * self._io.size

2714

if self._inline:

2719

if self._inline:

2715

transaction.add(self.indexfile, end + isize, r)

2720

transaction.add(self.indexfile, end + isize, r)

2716

dfh = None

2721

dfh = None

2717

else:

2722

else:

2718

transaction.add(self.indexfile, isize, r)

2723

transaction.add(self.indexfile, isize, r)

2719

transaction.add(self.datafile, end)

2724

transaction.add(self.datafile, end)

2720

dfh = self._datafp("a+")

2725

dfh = self._datafp("a+")

2721

def flush():

2726

def flush():

2722

if dfh:

2727

if dfh:

2723

dfh.flush()

2728

dfh.flush()

2724

ifh.flush()

2729

ifh.flush()

2725

try:

2730

try:

2726

deltacomputer = _deltacomputer(self)

2731

deltacomputer = _deltacomputer(self)

2727

# loop through our set of deltas

2732

# loop through our set of deltas

2728

for data in deltas:

2733

for data in deltas:

2729

node, p1, p2, linknode, deltabase, delta, flags = data

2734

node, p1, p2, linknode, deltabase, delta, flags = data

2730

link = linkmapper(linknode)

2735

link = linkmapper(linknode)

2731

flags = flags or REVIDX_DEFAULT_FLAGS

2736

flags = flags or REVIDX_DEFAULT_FLAGS

2732

2737

2733

nodes.append(node)

2738

nodes.append(node)

2734

2739

2735

if node in self.nodemap:

2740

if node in self.nodemap:

2736

# this can happen if two branches make the same change

2741

# this can happen if two branches make the same change

2737

continue

2742

continue

2738

2743

2739

for p in (p1, p2):

2744

for p in (p1, p2):

2740

if p not in self.nodemap:

2745

if p not in self.nodemap:

2741

raise LookupError(p, self.indexfile,

2746

raise LookupError(p, self.indexfile,

2742

_('unknown parent'))

2747

_('unknown parent'))

2743

2748

2744

if deltabase not in self.nodemap:

2749

if deltabase not in self.nodemap:

2745

raise LookupError(deltabase, self.indexfile,

2750

raise LookupError(deltabase, self.indexfile,

2746

_('unknown delta base'))

2751

_('unknown delta base'))

2747

2752

2748

baserev = self.rev(deltabase)

2753

baserev = self.rev(deltabase)

2749

2754

2750

if baserev != nullrev and self.iscensored(baserev):

2755

if baserev != nullrev and self.iscensored(baserev):

2751

# if base is censored, delta must be full replacement in a

2756

# if base is censored, delta must be full replacement in a

2752

# single patch operation

2757

# single patch operation

2753

hlen = struct.calcsize(">lll")

2758

hlen = struct.calcsize(">lll")

2754

oldlen = self.rawsize(baserev)

2759

oldlen = self.rawsize(baserev)

2755

newlen = len(delta) - hlen

2760

newlen = len(delta) - hlen

2756

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2761

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2757

raise error.CensoredBaseError(self.indexfile,

2762

raise error.CensoredBaseError(self.indexfile,

2758

self.node(baserev))

2763

self.node(baserev))

2759

2764

2760

if not flags and self._peek_iscensored(baserev, delta, flush):

2765

if not flags and self._peek_iscensored(baserev, delta, flush):

2761

flags |= REVIDX_ISCENSORED

2766

flags |= REVIDX_ISCENSORED

2762

2767

2763

# We assume consumers of addrevisioncb will want to retrieve

2768

# We assume consumers of addrevisioncb will want to retrieve

2764

# the added revision, which will require a call to

2769

# the added revision, which will require a call to

2765

# revision(). revision() will fast path if there is a cache

2770

# revision(). revision() will fast path if there is a cache

2766

# hit. So, we tell _addrevision() to always cache in this case.

2771

# hit. So, we tell _addrevision() to always cache in this case.

2767

# We're only using addgroup() in the context of changegroup

2772

# We're only using addgroup() in the context of changegroup

2768

# generation so the revision data can always be handled as raw

2773

# generation so the revision data can always be handled as raw

2769

# by the flagprocessor.

2774

# by the flagprocessor.

2770

self._addrevision(node, None, transaction, link,

2775

self._addrevision(node, None, transaction, link,

2771

p1, p2, flags, (baserev, delta),

2776

p1, p2, flags, (baserev, delta),

2772

ifh, dfh,

2777

ifh, dfh,

2773

alwayscache=bool(addrevisioncb),

2778

alwayscache=bool(addrevisioncb),

2774

deltacomputer=deltacomputer)

2779

deltacomputer=deltacomputer)

2775

2780

2776

if addrevisioncb:

2781

if addrevisioncb:

2777

addrevisioncb(self, node)

2782

addrevisioncb(self, node)

2778

2783

2779

if not dfh and not self._inline:

2784

if not dfh and not self._inline:

2780

# addrevision switched from inline to conventional

2785

# addrevision switched from inline to conventional

2781

# reopen the index

2786

# reopen the index

2782

ifh.close()

2787

ifh.close()

2783

dfh = self._datafp("a+")

2788

dfh = self._datafp("a+")

2784

ifh = self._indexfp("a+")

2789

ifh = self._indexfp("a+")

2785

finally:

2790

finally:

2786

if dfh:

2791

if dfh:

2787

dfh.close()

2792

dfh.close()

2788

ifh.close()

2793

ifh.close()

2789

2794

2790

return nodes

2795

return nodes

2791

2796

2792

def iscensored(self, rev):

2797

def iscensored(self, rev):

2793

"""Check if a file revision is censored."""

2798

"""Check if a file revision is censored."""

2794

if not self._censorable:

2799

if not self._censorable:

2795

return False

2800

return False

2796

2801

2797

return self.flags(rev) & REVIDX_ISCENSORED

2802

return self.flags(rev) & REVIDX_ISCENSORED

2798

2803

2799

def _peek_iscensored(self, baserev, delta, flush):

2804

def _peek_iscensored(self, baserev, delta, flush):

2800

"""Quickly check if a delta produces a censored revision."""

2805

"""Quickly check if a delta produces a censored revision."""

2801

if not self._censorable:

2806

if not self._censorable:

2802

return False

2807

return False

2803

2808

2804

# Fragile heuristic: unless new file meta keys are added alphabetically

2809

# Fragile heuristic: unless new file meta keys are added alphabetically

2805

# preceding "censored", all censored revisions are prefixed by

2810

# preceding "censored", all censored revisions are prefixed by

2806

# "\1\ncensored:". A delta producing such a censored revision must be a

2811

# "\1\ncensored:". A delta producing such a censored revision must be a

2807

# full-replacement delta, so we inspect the first and only patch in the

2812

# full-replacement delta, so we inspect the first and only patch in the

2808

# delta for this prefix.

2813

# delta for this prefix.

2809

hlen = struct.calcsize(">lll")

2814

hlen = struct.calcsize(">lll")

2810

if len(delta) <= hlen:

2815

if len(delta) <= hlen:

2811

return False

2816

return False

2812

2817

2813

oldlen = self.rawsize(baserev)

2818

oldlen = self.rawsize(baserev)

2814

newlen = len(delta) - hlen

2819

newlen = len(delta) - hlen

2815

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2820

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2816

return False

2821

return False

2817

2822

2818

add = "\1\ncensored:"

2823

add = "\1\ncensored:"

2819

addlen = len(add)

2824

addlen = len(add)

2820

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2825

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2821

2826

2822

def getstrippoint(self, minlink):

2827

def getstrippoint(self, minlink):

2823

"""find the minimum rev that must be stripped to strip the linkrev

2828

"""find the minimum rev that must be stripped to strip the linkrev

2824

2829

2825

Returns a tuple containing the minimum rev and a set of all revs that

2830

Returns a tuple containing the minimum rev and a set of all revs that

2826

have linkrevs that will be broken by this strip.

2831

have linkrevs that will be broken by this strip.

2827

"""

2832

"""

2828

brokenrevs = set()

2833

brokenrevs = set()

2829

strippoint = len(self)

2834

strippoint = len(self)

2830

2835

2831

heads = {}

2836

heads = {}

2832

futurelargelinkrevs = set()

2837

futurelargelinkrevs = set()

2833

for head in self.headrevs():

2838

for head in self.headrevs():

2834

headlinkrev = self.linkrev(head)

2839

headlinkrev = self.linkrev(head)

2835

heads[head] = headlinkrev

2840

heads[head] = headlinkrev

2836

if headlinkrev >= minlink:

2841

if headlinkrev >= minlink:

2837

futurelargelinkrevs.add(headlinkrev)

2842

futurelargelinkrevs.add(headlinkrev)

2838

2843

2839

# This algorithm involves walking down the rev graph, starting at the

2844

# This algorithm involves walking down the rev graph, starting at the

2840

# heads. Since the revs are topologically sorted according to linkrev,

2845

# heads. Since the revs are topologically sorted according to linkrev,

2841

# once all head linkrevs are below the minlink, we know there are

2846

# once all head linkrevs are below the minlink, we know there are

2842

# no more revs that could have a linkrev greater than minlink.

2847

# no more revs that could have a linkrev greater than minlink.

2843

# So we can stop walking.

2848

# So we can stop walking.

2844

while futurelargelinkrevs:

2849

while futurelargelinkrevs:

2845

strippoint -= 1

2850

strippoint -= 1

2846

linkrev = heads.pop(strippoint)

2851

linkrev = heads.pop(strippoint)

2847

2852

2848

if linkrev < minlink:

2853

if linkrev < minlink:

2849

brokenrevs.add(strippoint)

2854

brokenrevs.add(strippoint)

2850

else:

2855

else:

2851

futurelargelinkrevs.remove(linkrev)

2856

futurelargelinkrevs.remove(linkrev)

2852

2857

2853

for p in self.parentrevs(strippoint):

2858

for p in self.parentrevs(strippoint):

2854

if p != nullrev:

2859

if p != nullrev:

2855

plinkrev = self.linkrev(p)

2860

plinkrev = self.linkrev(p)

2856

heads[p] = plinkrev

2861

heads[p] = plinkrev

2857

if plinkrev >= minlink:

2862

if plinkrev >= minlink:

2858

futurelargelinkrevs.add(plinkrev)

2863

futurelargelinkrevs.add(plinkrev)

2859

2864

2860

return strippoint, brokenrevs

2865

return strippoint, brokenrevs

2861

2866

2862

def strip(self, minlink, transaction):

2867

def strip(self, minlink, transaction):

2863

"""truncate the revlog on the first revision with a linkrev >= minlink

2868

"""truncate the revlog on the first revision with a linkrev >= minlink

2864

2869

2865

This function is called when we're stripping revision minlink and

2870

This function is called when we're stripping revision minlink and

2866

its descendants from the repository.

2871

its descendants from the repository.

2867

2872

2868

We have to remove all revisions with linkrev >= minlink, because

2873

We have to remove all revisions with linkrev >= minlink, because

2869

the equivalent changelog revisions will be renumbered after the

2874

the equivalent changelog revisions will be renumbered after the

2870

strip.

2875

strip.

2871

2876

2872

So we truncate the revlog on the first of these revisions, and

2877

So we truncate the revlog on the first of these revisions, and

2873

trust that the caller has saved the revisions that shouldn't be

2878

trust that the caller has saved the revisions that shouldn't be

2874

removed and that it'll re-add them after this truncation.

2879

removed and that it'll re-add them after this truncation.

2875

"""

2880

"""

2876

if len(self) == 0:

2881

if len(self) == 0:

2877

return

2882

return

2878

2883

2879

rev, _ = self.getstrippoint(minlink)

2884

rev, _ = self.getstrippoint(minlink)

2880

if rev == len(self):

2885

if rev == len(self):

2881

return

2886

return

2882

2887

2883

# first truncate the files on disk

2888

# first truncate the files on disk

2884

end = self.start(rev)

2889

end = self.start(rev)

2885

if not self._inline:

2890

if not self._inline:

2886

transaction.add(self.datafile, end)

2891

transaction.add(self.datafile, end)

2887

end = rev * self._io.size

2892

end = rev * self._io.size

2888

else:

2893

else:

2889

end += rev * self._io.size

2894

end += rev * self._io.size

2890

2895

2891

transaction.add(self.indexfile, end)

2896

transaction.add(self.indexfile, end)

2892

2897

2893

# then reset internal state in memory to forget those revisions

2898

# then reset internal state in memory to forget those revisions

2894

self._cache = None

2899

self._cache = None

2895

self._chaininfocache = {}

2900

self._chaininfocache = {}

2896

self._chunkclear()

2901

self._chunkclear()

2897

for x in pycompat.xrange(rev, len(self)):

2902

for x in pycompat.xrange(rev, len(self)):

2898

del self.nodemap[self.node(x)]

2903

del self.nodemap[self.node(x)]

2899

2904

2900

del self.index[rev:-1]

2905

del self.index[rev:-1]

2901

self._nodepos = None

2906

self._nodepos = None

2902

2907

2903

def checksize(self):

2908

def checksize(self):

2904

expected = 0

2909

expected = 0

2905

if len(self):

2910

if len(self):

2906

expected = max(0, self.end(len(self) - 1))

2911

expected = max(0, self.end(len(self) - 1))

2907

2912

2908

try:

2913

try:

2909

with self._datafp() as f:

2914

with self._datafp() as f:

2910

f.seek(0, 2)

2915

f.seek(0, 2)

2911

actual = f.tell()

2916

actual = f.tell()

2912

dd = actual - expected

2917

dd = actual - expected

2913

except IOError as inst:

2918

except IOError as inst:

2914

if inst.errno != errno.ENOENT:

2919

if inst.errno != errno.ENOENT:

2915

raise

2920

raise

2916

dd = 0

2921

dd = 0

2917

2922

2918

try:

2923

try:

2919

f = self.opener(self.indexfile)

2924

f = self.opener(self.indexfile)

2920

f.seek(0, 2)

2925

f.seek(0, 2)

2921

actual = f.tell()

2926

actual = f.tell()

2922

f.close()

2927

f.close()

2923

s = self._io.size

2928

s = self._io.size

2924

i = max(0, actual // s)

2929

i = max(0, actual // s)

2925

di = actual - (i * s)

2930

di = actual - (i * s)

2926

if self._inline:

2931

if self._inline:

2927

databytes = 0

2932

databytes = 0

2928

for r in self:

2933

for r in self:

2929

databytes += max(0, self.length(r))

2934

databytes += max(0, self.length(r))

2930

dd = 0

2935

dd = 0

2931

di = actual - len(self) * s - databytes

2936

di = actual - len(self) * s - databytes

2932

except IOError as inst:

2937

except IOError as inst:

2933

if inst.errno != errno.ENOENT:

2938

if inst.errno != errno.ENOENT:

2934

raise

2939

raise

2935

di = 0

2940

di = 0

2936

2941

2937

return (dd, di)

2942

return (dd, di)

2938

2943

2939

def files(self):

2944

def files(self):

2940

res = [self.indexfile]

2945

res = [self.indexfile]

2941

if not self._inline:

2946

if not self._inline:

2942

res.append(self.datafile)

2947

res.append(self.datafile)

2943

return res

2948

return res

2944

2949

2945

DELTAREUSEALWAYS = 'always'

2950

DELTAREUSEALWAYS = 'always'

2946

DELTAREUSESAMEREVS = 'samerevs'

2951

DELTAREUSESAMEREVS = 'samerevs'

2947

DELTAREUSENEVER = 'never'

2952

DELTAREUSENEVER = 'never'

2948

2953

2949

DELTAREUSEFULLADD = 'fulladd'

2954

DELTAREUSEFULLADD = 'fulladd'

2950

2955

2951

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2956

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2952

2957

2953

def clone(self, tr, destrevlog, addrevisioncb=None,

2958

def clone(self, tr, destrevlog, addrevisioncb=None,

2954

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2959

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2955

"""Copy this revlog to another, possibly with format changes.

2960

"""Copy this revlog to another, possibly with format changes.

2956

2961

2957

The destination revlog will contain the same revisions and nodes.

2962

The destination revlog will contain the same revisions and nodes.

2958

However, it may not be bit-for-bit identical due to e.g. delta encoding

2963

However, it may not be bit-for-bit identical due to e.g. delta encoding

2959

differences.

2964

differences.

2960

2965

2961

The ``deltareuse`` argument control how deltas from the existing revlog

2966

The ``deltareuse`` argument control how deltas from the existing revlog

2962

are preserved in the destination revlog. The argument can have the

2967

are preserved in the destination revlog. The argument can have the

2963

following values:

2968

following values:

2964

2969

2965

DELTAREUSEALWAYS

2970

DELTAREUSEALWAYS

2966

Deltas will always be reused (if possible), even if the destination

2971

Deltas will always be reused (if possible), even if the destination

2967

revlog would not select the same revisions for the delta. This is the

2972

revlog would not select the same revisions for the delta. This is the

2968

fastest mode of operation.

2973

fastest mode of operation.

2969

DELTAREUSESAMEREVS

2974

DELTAREUSESAMEREVS

2970

Deltas will be reused if the destination revlog would pick the same

2975

Deltas will be reused if the destination revlog would pick the same

2971

revisions for the delta. This mode strikes a balance between speed

2976

revisions for the delta. This mode strikes a balance between speed

2972

and optimization.

2977

and optimization.

2973

DELTAREUSENEVER

2978

DELTAREUSENEVER

2974

Deltas will never be reused. This is the slowest mode of execution.

2979

Deltas will never be reused. This is the slowest mode of execution.

2975

This mode can be used to recompute deltas (e.g. if the diff/delta

2980

This mode can be used to recompute deltas (e.g. if the diff/delta

2976

algorithm changes).

2981

algorithm changes).

2977

2982

2978

Delta computation can be slow, so the choice of delta reuse policy can

2983

Delta computation can be slow, so the choice of delta reuse policy can

2979

significantly affect run time.

2984

significantly affect run time.

2980

2985

2981

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2986

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2982

two extremes. Deltas will be reused if they are appropriate. But if the

2987

two extremes. Deltas will be reused if they are appropriate. But if the

2983

delta could choose a better revision, it will do so. This means if you

2988

delta could choose a better revision, it will do so. This means if you

2984

are converting a non-generaldelta revlog to a generaldelta revlog,

2989

are converting a non-generaldelta revlog to a generaldelta revlog,

2985

deltas will be recomputed if the delta's parent isn't a parent of the

2990

deltas will be recomputed if the delta's parent isn't a parent of the

2986

revision.

2991

revision.

2987

2992

2988

In addition to the delta policy, the ``deltabothparents`` argument

2993

In addition to the delta policy, the ``deltabothparents`` argument

2989

controls whether to compute deltas against both parents for merges.

2994

controls whether to compute deltas against both parents for merges.

2990

By default, the current default is used.

2995

By default, the current default is used.

2991

"""

2996

"""

2992

if deltareuse not in self.DELTAREUSEALL:

2997

if deltareuse not in self.DELTAREUSEALL:

2993

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2998

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2994

2999

2995

if len(destrevlog):

3000

if len(destrevlog):

2996

raise ValueError(_('destination revlog is not empty'))

3001

raise ValueError(_('destination revlog is not empty'))

2997

3002

2998

if getattr(self, 'filteredrevs', None):

3003

if getattr(self, 'filteredrevs', None):

2999

raise ValueError(_('source revlog has filtered revisions'))

3004

raise ValueError(_('source revlog has filtered revisions'))

3000

if getattr(destrevlog, 'filteredrevs', None):

3005

if getattr(destrevlog, 'filteredrevs', None):

3001

raise ValueError(_('destination revlog has filtered revisions'))

3006

raise ValueError(_('destination revlog has filtered revisions'))

3002

3007

3003

# lazydeltabase controls whether to reuse a cached delta, if possible.

3008

# lazydeltabase controls whether to reuse a cached delta, if possible.

3004

oldlazydeltabase = destrevlog._lazydeltabase

3009

oldlazydeltabase = destrevlog._lazydeltabase

3005

oldamd = destrevlog._deltabothparents

3010

oldamd = destrevlog._deltabothparents

3006

3011

3007

try:

3012

try:

3008

if deltareuse == self.DELTAREUSEALWAYS:

3013

if deltareuse == self.DELTAREUSEALWAYS:

3009

destrevlog._lazydeltabase = True

3014

destrevlog._lazydeltabase = True

3010

elif deltareuse == self.DELTAREUSESAMEREVS:

3015

elif deltareuse == self.DELTAREUSESAMEREVS:

3011

destrevlog._lazydeltabase = False

3016

destrevlog._lazydeltabase = False

3012

3017

3013

destrevlog._deltabothparents = deltabothparents or oldamd

3018

destrevlog._deltabothparents = deltabothparents or oldamd

3014

3019

3015

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

3020

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

3016

self.DELTAREUSESAMEREVS)

3021

self.DELTAREUSESAMEREVS)

3017

3022

3018

deltacomputer = _deltacomputer(destrevlog)

3023

deltacomputer = _deltacomputer(destrevlog)

3019

index = self.index

3024

index = self.index

3020

for rev in self:

3025

for rev in self:

3021

entry = index[rev]

3026

entry = index[rev]

3022

3027

3023

# Some classes override linkrev to take filtered revs into

3028

# Some classes override linkrev to take filtered revs into

3024

# account. Use raw entry from index.

3029

# account. Use raw entry from index.

3025

flags = entry[0] & 0xffff

3030

flags = entry[0] & 0xffff

3026

linkrev = entry[4]

3031

linkrev = entry[4]

3027

p1 = index[entry[5]][7]

3032

p1 = index[entry[5]][7]

3028

p2 = index[entry[6]][7]

3033

p2 = index[entry[6]][7]

3029

node = entry[7]

3034

node = entry[7]

3030

3035

3031

# (Possibly) reuse the delta from the revlog if allowed and

3036

# (Possibly) reuse the delta from the revlog if allowed and

3032

# the revlog chunk is a delta.

3037

# the revlog chunk is a delta.

3033

cachedelta = None

3038

cachedelta = None

3034

rawtext = None

3039

rawtext = None

3035

if populatecachedelta:

3040

if populatecachedelta:

3036

dp = self.deltaparent(rev)

3041

dp = self.deltaparent(rev)

3037

if dp != nullrev:

3042

if dp != nullrev:

3038

cachedelta = (dp, bytes(self._chunk(rev)))

3043

cachedelta = (dp, bytes(self._chunk(rev)))

3039

3044

3040

if not cachedelta:

3045

if not cachedelta:

3041

rawtext = self.revision(rev, raw=True)

3046

rawtext = self.revision(rev, raw=True)

3042

3047

3043

3048

3044

if deltareuse == self.DELTAREUSEFULLADD:

3049

if deltareuse == self.DELTAREUSEFULLADD:

3045

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3050

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3046

cachedelta=cachedelta,

3051

cachedelta=cachedelta,

3047

node=node, flags=flags,

3052

node=node, flags=flags,

3048

deltacomputer=deltacomputer)

3053

deltacomputer=deltacomputer)

3049

else:

3054

else:

3050

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3055

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3051

checkambig=False)

3056

checkambig=False)

3052

dfh = None

3057

dfh = None

3053

if not destrevlog._inline:

3058

if not destrevlog._inline:

3054

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3059

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3055

try:

3060

try:

3056

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3061

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3057

p2, flags, cachedelta, ifh, dfh,

3062

p2, flags, cachedelta, ifh, dfh,

3058

deltacomputer=deltacomputer)

3063

deltacomputer=deltacomputer)

3059

finally:

3064

finally:

3060

if dfh:

3065

if dfh:

3061

dfh.close()

3066

dfh.close()

3062

ifh.close()

3067

ifh.close()

3063

3068

3064

if addrevisioncb:

3069

if addrevisioncb:

3065

addrevisioncb(self, rev, node)

3070

addrevisioncb(self, rev, node)

3066

finally:

3071

finally:

3067

destrevlog._lazydeltabase = oldlazydeltabase

3072

destrevlog._lazydeltabase = oldlazydeltabase

3068

destrevlog._deltabothparents = oldamd

3073

destrevlog._deltabothparents = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def _segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> _segmentspan(revlog, [0, 4])
                 >>> _segmentspan(revlog, [3, 4])
                 >>> _segmentspan(revlog, [1, 2, 3,])
                 >>> _segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = _segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._deltabothparents and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     if not revinfo.textlen:
                         return None # empty file do not need delta
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     deltainfo = None
                     deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         # filter out delta base that will never produce good delta
                         candidaterevs = [r for r in candidaterevs
                                          if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # skip over empty delta (no need to include them in a chain)
                             while candidaterev != nullrev and not deltalength(candidaterev):
                                 candidaterev = deltaparent(candidaterev)
                             # no need to try a delta against nullid, this will be handled
                             # by fulltext later.
                             if candidaterev == nullrev:
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     length = len(self)
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                         if stop > length:
                             stop = length
                     else:
                         stop = length
                     return pycompat.xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                              _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                                  _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
                     p1, p2 = self.parentrevs(rev)
                     if deltap in (p1, p2):
                         return False
                     return self.issnapshot(deltap)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     if self._sparserevlog:
                         # As sparse-read will be used, we can consider that the distance,
                         # instead of being the span of the whole chunk,
                         # is the span of the largest read chunk
                         base = deltainfo.base
                         if base != nullrev:
                             deltachain = self._deltachain(base)[0]
                         else:
                             deltachain = []
+                        # search for the first non-snapshot revision
+                        for idx, r in enumerate(deltachain):
+                            if not self.issnapshot(r):
+                                break
+                        deltachain = deltachain[idx:]
                         chunks = _slicechunk(self, deltachain, deltainfo)
                         all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
                         distance = max(all_span)
                     else:
                         distance = deltainfo.distance
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if self._sparserevlog and maxdist < self._srmingapsize:
                         # In multiple place, we are ignoring irrelevant data range below a
                         # certain size. Be also apply this tradeoff here and relax span
                         # constraint for small enought content.
                         maxdist = self._srmingapsize
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     if maxdist < distance:
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the
                     #   delta will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
                         return False
                     # bad delta from intermediate snapshot size limit
                     #
                     #   If an intermediate snapshot size is higher than the limit.  The
                     #   limit exist to prevent endless chain of intermediate delta to be
                     #   created.
                     if (deltainfo.snapshotdepth is not None and
                             (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                         return False
                     # bad delta if new intermediate snapshot is larger than the previous
                     # snapshot
                     if (deltainfo.snapshotdepth
                             and self.length(deltainfo.base) < deltainfo.deltalen):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd