upstream/mercurial-mirror Commit - r39190:e0da43e2

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

94

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

95

CensoredNodeError = error.CensoredNodeError

95

CensoredNodeError = error.CensoredNodeError

96

ProgrammingError = error.ProgrammingError

96

ProgrammingError = error.ProgrammingError

97

98

# Store flag processors (cf. 'addflagprocessor()' to register)

98

# Store flag processors (cf. 'addflagprocessor()' to register)

99

_flagprocessors = {

99

_flagprocessors = {

100

REVIDX_ISCENSORED: None,

100

REVIDX_ISCENSORED: None,

101

}

101

}

102

103

_mdre = re.compile('\1\n')

103

_mdre = re.compile('\1\n')

104

def parsemeta(text):

104

def parsemeta(text):

105

"""return (metadatadict, metadatasize)"""

105

"""return (metadatadict, metadatasize)"""

106

# text can be buffer, so we can't use .startswith or .index

106

# text can be buffer, so we can't use .startswith or .index

107

if text[:2] != '\1\n':

107

if text[:2] != '\1\n':

108

return None, None

108

return None, None

109

s = _mdre.search(text, 2).start()

109

s = _mdre.search(text, 2).start()

110

mtext = text[2:s]

110

mtext = text[2:s]

111

meta = {}

111

meta = {}

112

for l in mtext.splitlines():

112

for l in mtext.splitlines():

113

k, v = l.split(": ", 1)

113

k, v = l.split(": ", 1)

114

meta[k] = v

114

meta[k] = v

115

return meta, (s + 2)

115

return meta, (s + 2)

116

117

def packmeta(meta, text):

117

def packmeta(meta, text):

118

keys = sorted(meta)

118

keys = sorted(meta)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

120

return "\1\n%s\1\n%s" % (metatext, text)

120

return "\1\n%s\1\n%s" % (metatext, text)

121

122

def _censoredtext(text):

122

def _censoredtext(text):

123

m, offs = parsemeta(text)

123

m, offs = parsemeta(text)

124

return m and "censored" in m

124

return m and "censored" in m

125

126

def addflagprocessor(flag, processor):

126

def addflagprocessor(flag, processor):

127

"""Register a flag processor on a revision data flag.

127

"""Register a flag processor on a revision data flag.

128

129

Invariant:

129

Invariant:

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

132

- Only one flag processor can be registered on a specific flag.

132

- Only one flag processor can be registered on a specific flag.

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

134

following signatures:

134

following signatures:

135

- (read) f(self, rawtext) -> text, bool

135

- (read) f(self, rawtext) -> text, bool

136

- (write) f(self, text) -> rawtext, bool

136

- (write) f(self, text) -> rawtext, bool

137

- (raw) f(self, rawtext) -> bool

137

- (raw) f(self, rawtext) -> bool

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

"text" is presented to the user. "rawtext" is stored in revlog data, not

139

directly visible to the user.

139

directly visible to the user.

140

The boolean returned by these transforms is used to determine whether

140

The boolean returned by these transforms is used to determine whether

141

the returned text can be used for hash integrity checking. For example,

141

the returned text can be used for hash integrity checking. For example,

142

if "write" returns False, then "text" is used to generate hash. If

142

if "write" returns False, then "text" is used to generate hash. If

143

"write" returns True, that basically means "rawtext" returned by "write"

143

"write" returns True, that basically means "rawtext" returned by "write"

144

should be used to generate hash. Usually, "write" and "read" return

144

should be used to generate hash. Usually, "write" and "read" return

145

different booleans. And "raw" returns a same boolean as "write".

145

different booleans. And "raw" returns a same boolean as "write".

146

147

Note: The 'raw' transform is used for changegroup generation and in some

147

Note: The 'raw' transform is used for changegroup generation and in some

148

debug commands. In this case the transform only indicates whether the

148

debug commands. In this case the transform only indicates whether the

149

contents can be used for hash integrity checks.

149

contents can be used for hash integrity checks.

150

"""

150

"""

151

if not flag & REVIDX_KNOWN_FLAGS:

151

if not flag & REVIDX_KNOWN_FLAGS:

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

153

raise ProgrammingError(msg)

153

raise ProgrammingError(msg)

154

if flag not in REVIDX_FLAGS_ORDER:

154

if flag not in REVIDX_FLAGS_ORDER:

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

156

raise ProgrammingError(msg)

156

raise ProgrammingError(msg)

157

if flag in _flagprocessors:

157

if flag in _flagprocessors:

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

159

raise error.Abort(msg)

159

raise error.Abort(msg)

160

_flagprocessors[flag] = processor

160

_flagprocessors[flag] = processor

161

162

def getoffset(q):

162

def getoffset(q):

163

return int(q >> 16)

163

return int(q >> 16)

164

165

def gettype(q):

165

def gettype(q):

166

return int(q & 0xFFFF)

166

return int(q & 0xFFFF)

167

168

def offset_type(offset, type):

168

def offset_type(offset, type):

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

170

raise ValueError('unknown revlog index flags')

170

raise ValueError('unknown revlog index flags')

171

return int(int(offset) << 16 | type)

171

return int(int(offset) << 16 | type)

172

173

_nullhash = hashlib.sha1(nullid)

173

_nullhash = hashlib.sha1(nullid)

174

175

def hash(text, p1, p2):

175

def hash(text, p1, p2):

176

"""generate a hash from the given text and its parent hashes

176

"""generate a hash from the given text and its parent hashes

177

178

This hash combines both the current file contents and its history

178

This hash combines both the current file contents and its history

179

in a manner that makes it easy to distinguish nodes with the same

179

in a manner that makes it easy to distinguish nodes with the same

180

content in the revision graph.

180

content in the revision graph.

181

"""

181

"""

182

# As of now, if one of the parent node is null, p2 is null

182

# As of now, if one of the parent node is null, p2 is null

183

if p2 == nullid:

183

if p2 == nullid:

184

# deep copy of a hash is faster than creating one

184

# deep copy of a hash is faster than creating one

185

s = _nullhash.copy()

185

s = _nullhash.copy()

186

s.update(p1)

186

s.update(p1)

187

else:

187

else:

188

# none of the parent nodes are nullid

188

# none of the parent nodes are nullid

189

if p1 < p2:

189

if p1 < p2:

190

a = p1

190

a = p1

191

b = p2

191

b = p2

192

else:

192

else:

193

a = p2

193

a = p2

194

b = p1

194

b = p1

195

s = hashlib.sha1(a)

195

s = hashlib.sha1(a)

196

s.update(b)

196

s.update(b)

197

s.update(text)

197

s.update(text)

198

return s.digest()

198

return s.digest()

199

200

class _testrevlog(object):

200

class _testrevlog(object):

201

"""minimalist fake revlog to use in doctests"""

201

"""minimalist fake revlog to use in doctests"""

202

203

def __init__(self, data, density=0.5, mingap=0):

203

def __init__(self, data, density=0.5, mingap=0):

204

"""data is an list of revision payload boundaries"""

204

"""data is an list of revision payload boundaries"""

205

self._data = data

205

self._data = data

206

self._srdensitythreshold = density

206

self._srdensitythreshold = density

207

self._srmingapsize = mingap

207

self._srmingapsize = mingap

208

209

def start(self, rev):

209

def start(self, rev):

210

if rev == 0:

210

if rev == 0:

211

return 0

211

return 0

212

return self._data[rev - 1]

212

return self._data[rev - 1]

213

214

def end(self, rev):

214

def end(self, rev):

215

return self._data[rev]

215

return self._data[rev]

216

217

def length(self, rev):

217

def length(self, rev):

218

return self.end(rev) - self.start(rev)

218

return self.end(rev) - self.start(rev)

219

220

def __len__(self):

220

def __len__(self):

221

return len(self._data)

221

return len(self._data)

222

223

def _trimchunk(revlog, revs, startidx, endidx=None):

223

def _trimchunk(revlog, revs, startidx, endidx=None):

224

"""returns revs[startidx:endidx] without empty trailing revs

224

"""returns revs[startidx:endidx] without empty trailing revs

225

226

Doctest Setup

226

Doctest Setup

227

>>> revlog = _testrevlog([

227

>>> revlog = _testrevlog([

228

... 5, #0

228

... 5, #0

229

... 10, #1

229

... 10, #1

230

... 12, #2

230

... 12, #2

231

... 12, #3 (empty)

231

... 12, #3 (empty)

232

... 17, #4

232

... 17, #4

233

... 21, #5

233

... 21, #5

234

... 21, #6 (empty)

234

... 21, #6 (empty)

235

... ])

235

... ])

236

237

Contiguous cases:

237

Contiguous cases:

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

239

[0, 1, 2, 3, 4, 5]

239

[0, 1, 2, 3, 4, 5]

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

241

[0, 1, 2, 3, 4]

241

[0, 1, 2, 3, 4]

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

243

[0, 1, 2]

243

[0, 1, 2]

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

245

[2]

245

[2]

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

246

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

247

[3, 4, 5]

247

[3, 4, 5]

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

248

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

249

[3, 4]

249

[3, 4]

250

251

Discontiguous cases:

251

Discontiguous cases:

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

253

[1, 3, 5]

253

[1, 3, 5]

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

255

[1]

255

[1]

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

256

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

257

[3, 5]

257

[3, 5]

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

258

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

259

[3, 5]

259

[3, 5]

260

"""

260

"""

261

length = revlog.length

261

length = revlog.length

262

263

if endidx is None:

263

if endidx is None:

264

endidx = len(revs)

264

endidx = len(revs)

265

266

# If we have a non-emtpy delta candidate, there are nothing to trim

266

# If we have a non-emtpy delta candidate, there are nothing to trim

267

if revs[endidx - 1] < len(revlog):

267

if revs[endidx - 1] < len(revlog):

268

# Trim empty revs at the end, except the very first revision of a chain

268

# Trim empty revs at the end, except the very first revision of a chain

269

while (endidx > 1

269

while (endidx > 1

270

and endidx > startidx

270

and endidx > startidx

271

and length(revs[endidx - 1]) == 0):

271

and length(revs[endidx - 1]) == 0):

272

endidx -= 1

272

endidx -= 1

273

274

return revs[startidx:endidx]

274

return revs[startidx:endidx]

275

276

def _segmentspan(revlog, revs, deltainfo=None):

276

def _segmentspan(revlog, revs, deltainfo=None):

277

"""Get the byte span of a segment of revisions

277

"""Get the byte span of a segment of revisions

278

279

revs is a sorted array of revision numbers

279

revs is a sorted array of revision numbers

280

281

>>> revlog = _testrevlog([

281

>>> revlog = _testrevlog([

282

... 5, #0

282

... 5, #0

283

... 10, #1

283

... 10, #1

284

... 12, #2

284

... 12, #2

285

... 12, #3 (empty)

285

... 12, #3 (empty)

286

... 17, #4

286

... 17, #4

287

... ])

287

... ])

288

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

289

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

290

17

290

17

291

>>> _segmentspan(revlog, [0, 4])

291

>>> _segmentspan(revlog, [0, 4])

292

17

292

17

293

>>> _segmentspan(revlog, [3, 4])

293

>>> _segmentspan(revlog, [3, 4])

294

5

294

5

295

>>> _segmentspan(revlog, [1, 2, 3,])

295

>>> _segmentspan(revlog, [1, 2, 3,])

296

7

296

7

297

>>> _segmentspan(revlog, [1, 3])

297

>>> _segmentspan(revlog, [1, 3])

298

7

298

7

299

"""

299

"""

300

if not revs:

300

if not revs:

301

return 0

301

return 0

302

if deltainfo is not None and len(revlog) <= revs[-1]:

302

if deltainfo is not None and len(revlog) <= revs[-1]:

303

if len(revs) == 1:

303

if len(revs) == 1:

304

return deltainfo.deltalen

304

return deltainfo.deltalen

305

offset = revlog.end(len(revlog) - 1)

305

offset = revlog.end(len(revlog) - 1)

306

end = deltainfo.deltalen + offset

306

end = deltainfo.deltalen + offset

307

else:

307

else:

308

end = revlog.end(revs[-1])

308

end = revlog.end(revs[-1])

309

return end - revlog.start(revs[0])

309

return end - revlog.start(revs[0])

310

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

311

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

"""slice revs to reduce the amount of unrelated data to be read from disk.

313

314

``revs`` is sliced into groups that should be read in one time.

314

``revs`` is sliced into groups that should be read in one time.

315

Assume that revs are sorted.

315

Assume that revs are sorted.

316

317

The initial chunk is sliced until the overall density (payload/chunks-span

317

The initial chunk is sliced until the overall density (payload/chunks-span

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

318

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

319

`revlog._srmingapsize` is skipped.

319

`revlog._srmingapsize` is skipped.

320

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

321

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

322

For consistency with other slicing choice, this limit won't go lower than

322

For consistency with other slicing choice, this limit won't go lower than

323

`revlog._srmingapsize`.

323

`revlog._srmingapsize`.

324

325

If individual revisions chunk are larger than this limit, they will still

325

If individual revisions chunk are larger than this limit, they will still

326

be raised individually.

326

be raised individually.

327

328

>>> revlog = _testrevlog([

328

>>> revlog = _testrevlog([

329

... 5, #00 (5)

329

... 5, #00 (5)

330

... 10, #01 (5)

330

... 10, #01 (5)

331

... 12, #02 (2)

331

... 12, #02 (2)

332

... 12, #03 (empty)

332

... 12, #03 (empty)

333

... 27, #04 (15)

333

... 27, #04 (15)

334

... 31, #05 (4)

334

... 31, #05 (4)

335

... 31, #06 (empty)

335

... 31, #06 (empty)

336

... 42, #07 (11)

336

... 42, #07 (11)

337

... 47, #08 (5)

337

... 47, #08 (5)

338

... 47, #09 (empty)

338

... 47, #09 (empty)

339

... 48, #10 (1)

339

... 48, #10 (1)

340

... 51, #11 (3)

340

... 51, #11 (3)

341

... 74, #12 (23)

341

... 74, #12 (23)

342

... 85, #13 (11)

342

... 85, #13 (11)

343

... 86, #14 (1)

343

... 86, #14 (1)

344

... 91, #15 (5)

344

... 91, #15 (5)

345

... ])

345

... ])

346

347

>>> list(_slicechunk(revlog, list(range(16))))

347

>>> list(_slicechunk(revlog, list(range(16))))

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

348

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

349

>>> list(_slicechunk(revlog, [0, 15]))

349

>>> list(_slicechunk(revlog, [0, 15]))

350

[[0], [15]]

350

[[0], [15]]

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

351

>>> list(_slicechunk(revlog, [0, 11, 15]))

352

[[0], [11], [15]]

352

[[0], [11], [15]]

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

353

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

354

[[0], [11, 13, 15]]

354

[[0], [11, 13, 15]]

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

355

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

356

[[1, 2], [5, 8, 10, 11], [14]]

356

[[1, 2], [5, 8, 10, 11], [14]]

357

358

Slicing with a maximum chunk size

358

Slicing with a maximum chunk size

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

359

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

360

[[0], [11], [13], [15]]

360

[[0], [11], [13], [15]]

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

361

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

362

[[0], [11], [13, 15]]

362

[[0], [11], [13, 15]]

363

"""

363

"""

364

if targetsize is not None:

364

if targetsize is not None:

365

targetsize = max(targetsize, revlog._srmingapsize)

365

targetsize = max(targetsize, revlog._srmingapsize)

366

# targetsize should not be specified when evaluating delta candidates:

366

# targetsize should not be specified when evaluating delta candidates:

367

# * targetsize is used to ensure we stay within specification when reading,

367

# * targetsize is used to ensure we stay within specification when reading,

368

# * deltainfo is used to pick are good delta chain when writing.

368

# * deltainfo is used to pick are good delta chain when writing.

369

if not (deltainfo is None or targetsize is None):

369

if not (deltainfo is None or targetsize is None):

370

msg = 'cannot use `targetsize` with a `deltainfo`'

370

msg = 'cannot use `targetsize` with a `deltainfo`'

371

raise error.ProgrammingError(msg)

371

raise error.ProgrammingError(msg)

372

for chunk in _slicechunktodensity(revlog, revs,

372

for chunk in _slicechunktodensity(revlog, revs,

373

deltainfo,

373

deltainfo,

374

revlog._srdensitythreshold,

374

revlog._srdensitythreshold,

375

revlog._srmingapsize):

375

revlog._srmingapsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

376

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

377

yield subchunk

377

yield subchunk

378

379

def _slicechunktosize(revlog, revs, targetsize=None):

379

def _slicechunktosize(revlog, revs, targetsize=None):

380

"""slice revs to match the target size

380

"""slice revs to match the target size

381

382

This is intended to be used on chunk that density slicing selected by that

382

This is intended to be used on chunk that density slicing selected by that

383

are still too large compared to the read garantee of revlog. This might

383

are still too large compared to the read garantee of revlog. This might

384

happens when "minimal gap size" interrupted the slicing or when chain are

384

happens when "minimal gap size" interrupted the slicing or when chain are

385

built in a way that create large blocks next to each other.

385

built in a way that create large blocks next to each other.

386

387

>>> revlog = _testrevlog([

387

>>> revlog = _testrevlog([

388

... 3, #0 (3)

388

... 3, #0 (3)

389

... 5, #1 (2)

389

... 5, #1 (2)

390

... 6, #2 (1)

390

... 6, #2 (1)

391

... 8, #3 (2)

391

... 8, #3 (2)

392

... 8, #4 (empty)

392

... 8, #4 (empty)

393

... 11, #5 (3)

393

... 11, #5 (3)

394

... 12, #6 (1)

394

... 12, #6 (1)

395

... 13, #7 (1)

395

... 13, #7 (1)

396

... 14, #8 (1)

396

... 14, #8 (1)

397

... ])

397

... ])

398

399

Cases where chunk is already small enough

399

Cases where chunk is already small enough

400

>>> list(_slicechunktosize(revlog, [0], 3))

400

>>> list(_slicechunktosize(revlog, [0], 3))

401

[[0]]

401

[[0]]

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

402

>>> list(_slicechunktosize(revlog, [6, 7], 3))

403

[[6, 7]]

403

[[6, 7]]

404

>>> list(_slicechunktosize(revlog, [0], None))

404

>>> list(_slicechunktosize(revlog, [0], None))

405

[[0]]

405

[[0]]

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

406

>>> list(_slicechunktosize(revlog, [6, 7], None))

407

[[6, 7]]

407

[[6, 7]]

408

409

cases where we need actual slicing

409

cases where we need actual slicing

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

410

>>> list(_slicechunktosize(revlog, [0, 1], 3))

411

[[0], [1]]

411

[[0], [1]]

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

412

>>> list(_slicechunktosize(revlog, [1, 3], 3))

413

[[1], [3]]

413

[[1], [3]]

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

414

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

415

[[1, 2], [3]]

415

[[1, 2], [3]]

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

416

>>> list(_slicechunktosize(revlog, [3, 5], 3))

417

[[3], [5]]

417

[[3], [5]]

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

418

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

419

[[3], [5]]

419

[[3], [5]]

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

420

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

421

[[5], [6, 7, 8]]

421

[[5], [6, 7, 8]]

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

422

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

423

[[0], [1, 2], [3], [5], [6, 7, 8]]

424

425

Case with too large individual chunk (must return valid chunk)

425

Case with too large individual chunk (must return valid chunk)

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

426

>>> list(_slicechunktosize(revlog, [0, 1], 2))

427

[[0], [1]]

427

[[0], [1]]

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

428

>>> list(_slicechunktosize(revlog, [1, 3], 1))

429

[[1], [3]]

429

[[1], [3]]

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

430

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

431

[[3], [5]]

431

[[3], [5]]

432

"""

432

"""

433

assert targetsize is None or 0 <= targetsize

433

assert targetsize is None or 0 <= targetsize

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

434

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

435

yield revs

435

yield revs

436

return

436

return

437

438

startrevidx = 0

438

startrevidx = 0

439

startdata = revlog.start(revs[0])

439

startdata = revlog.start(revs[0])

440

endrevidx = 0

440

endrevidx = 0

441

iterrevs = enumerate(revs)

441

iterrevs = enumerate(revs)

442

next(iterrevs) # skip first rev.

442

next(iterrevs) # skip first rev.

443

for idx, r in iterrevs:

443

for idx, r in iterrevs:

444

span = revlog.end(r) - startdata

444

span = revlog.end(r) - startdata

445

if span <= targetsize:

445

if span <= targetsize:

446

endrevidx = idx

446

endrevidx = idx

447

else:

447

else:

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

448

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

449

if chunk:

449

if chunk:

450

yield chunk

450

yield chunk

451

startrevidx = idx

451

startrevidx = idx

452

startdata = revlog.start(r)

452

startdata = revlog.start(r)

453

endrevidx = idx

453

endrevidx = idx

454

yield _trimchunk(revlog, revs, startrevidx)

454

yield _trimchunk(revlog, revs, startrevidx)

455

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

456

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

457

mingapsize=0):

457

mingapsize=0):

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

458

"""slice revs to reduce the amount of unrelated data to be read from disk.

459

460

``revs`` is sliced into groups that should be read in one time.

460

``revs`` is sliced into groups that should be read in one time.

461

Assume that revs are sorted.

461

Assume that revs are sorted.

462

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

463

``deltainfo`` is a _deltainfo instance of a revision that we would append

464

to the top of the revlog.

464

to the top of the revlog.

465

466

The initial chunk is sliced until the overall density (payload/chunks-span

466

The initial chunk is sliced until the overall density (payload/chunks-span

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

467

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

468

skipped.

468

skipped.

469

470

>>> revlog = _testrevlog([

470

>>> revlog = _testrevlog([

471

... 5, #00 (5)

471

... 5, #00 (5)

472

... 10, #01 (5)

472

... 10, #01 (5)

473

... 12, #02 (2)

473

... 12, #02 (2)

474

... 12, #03 (empty)

474

... 12, #03 (empty)

475

... 27, #04 (15)

475

... 27, #04 (15)

476

... 31, #05 (4)

476

... 31, #05 (4)

477

... 31, #06 (empty)

477

... 31, #06 (empty)

478

... 42, #07 (11)

478

... 42, #07 (11)

479

... 47, #08 (5)

479

... 47, #08 (5)

480

... 47, #09 (empty)

480

... 47, #09 (empty)

481

... 48, #10 (1)

481

... 48, #10 (1)

482

... 51, #11 (3)

482

... 51, #11 (3)

483

... 74, #12 (23)

483

... 74, #12 (23)

484

... 85, #13 (11)

484

... 85, #13 (11)

485

... 86, #14 (1)

485

... 86, #14 (1)

486

... 91, #15 (5)

486

... 91, #15 (5)

487

... ])

487

... ])

488

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

489

>>> list(_slicechunktodensity(revlog, list(range(16))))

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

490

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

491

>>> list(_slicechunktodensity(revlog, [0, 15]))

492

[[0], [15]]

492

[[0], [15]]

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

493

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

494

[[0], [11], [15]]

494

[[0], [11], [15]]

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

495

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

496

[[0], [11, 13, 15]]

496

[[0], [11, 13, 15]]

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

497

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

498

[[1, 2], [5, 8, 10, 11], [14]]

498

[[1, 2], [5, 8, 10, 11], [14]]

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

499

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

500

... mingapsize=20))

500

... mingapsize=20))

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

501

[[1, 2, 3, 5, 8, 10, 11], [14]]

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

502

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

503

... targetdensity=0.95))

503

... targetdensity=0.95))

504

[[1, 2], [5], [8, 10, 11], [14]]

504

[[1, 2], [5], [8, 10, 11], [14]]

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

505

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

506

... targetdensity=0.95, mingapsize=12))

506

... targetdensity=0.95, mingapsize=12))

507

[[1, 2], [5, 8, 10, 11], [14]]

507

[[1, 2], [5, 8, 10, 11], [14]]

508

"""

508

"""

509

start = revlog.start

509

start = revlog.start

510

length = revlog.length

510

length = revlog.length

511

512

if len(revs) <= 1:

512

if len(revs) <= 1:

513

yield revs

513

yield revs

514

return

514

return

515

516

nextrev = len(revlog)

516

nextrev = len(revlog)

517

nextoffset = revlog.end(nextrev - 1)

517

nextoffset = revlog.end(nextrev - 1)

518

519

if deltainfo is None:

519

if deltainfo is None:

520

deltachainspan = _segmentspan(revlog, revs)

520

deltachainspan = _segmentspan(revlog, revs)

521

chainpayload = sum(length(r) for r in revs)

521

chainpayload = sum(length(r) for r in revs)

522

else:

522

else:

523

deltachainspan = deltainfo.distance

523

deltachainspan = deltainfo.distance

524

chainpayload = deltainfo.compresseddeltalen

524

chainpayload = deltainfo.compresseddeltalen

525

526

if deltachainspan < mingapsize:

526

if deltachainspan < mingapsize:

527

yield revs

527

yield revs

528

return

528

return

529

530

readdata = deltachainspan

530

readdata = deltachainspan

531

532

if deltachainspan:

532

if deltachainspan:

533

density = chainpayload / float(deltachainspan)

533

density = chainpayload / float(deltachainspan)

534

else:

534

else:

535

density = 1.0

535

density = 1.0

536

537

if density >= targetdensity:

537

if density >= targetdensity:

538

yield revs

538

yield revs

539

return

539

return

540

541

if deltainfo is not None and deltainfo.deltalen:

541

if deltainfo is not None and deltainfo.deltalen:

542

revs = list(revs)

542

revs = list(revs)

543

revs.append(nextrev)

543

revs.append(nextrev)

544

545

# Store the gaps in a heap to have them sorted by decreasing size

545

# Store the gaps in a heap to have them sorted by decreasing size

546

gapsheap = []

546

gapsheap = []

547

heapq.heapify(gapsheap)

547

heapq.heapify(gapsheap)

548

prevend = None

548

prevend = None

549

for i, rev in enumerate(revs):

549

for i, rev in enumerate(revs):

550

if rev < nextrev:

550

if rev < nextrev:

551

revstart = start(rev)

551

revstart = start(rev)

552

revlen = length(rev)

552

revlen = length(rev)

553

else:

553

else:

554

revstart = nextoffset

554

revstart = nextoffset

555

revlen = deltainfo.deltalen

555

revlen = deltainfo.deltalen

556

557

# Skip empty revisions to form larger holes

557

# Skip empty revisions to form larger holes

558

if revlen == 0:

558

if revlen == 0:

559

continue

559

continue

560

561

if prevend is not None:

561

if prevend is not None:

562

gapsize = revstart - prevend

562

gapsize = revstart - prevend

563

# only consider holes that are large enough

563

# only consider holes that are large enough

564

if gapsize > mingapsize:

564

if gapsize > mingapsize:

565

heapq.heappush(gapsheap, (-gapsize, i))

565

heapq.heappush(gapsheap, (-gapsize, i))

566

567

prevend = revstart + revlen

567

prevend = revstart + revlen

568

569

# Collect the indices of the largest holes until the density is acceptable

569

# Collect the indices of the largest holes until the density is acceptable

570

indicesheap = []

570

indicesheap = []

571

heapq.heapify(indicesheap)

571

heapq.heapify(indicesheap)

572

while gapsheap and density < targetdensity:

572

while gapsheap and density < targetdensity:

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

573

oppgapsize, gapidx = heapq.heappop(gapsheap)

574

575

heapq.heappush(indicesheap, gapidx)

575

heapq.heappush(indicesheap, gapidx)

576

577

# the gap sizes are stored as negatives to be sorted decreasingly

577

# the gap sizes are stored as negatives to be sorted decreasingly

578

# by the heap

578

# by the heap

579

readdata -= (-oppgapsize)

579

readdata -= (-oppgapsize)

580

if readdata > 0:

580

if readdata > 0:

581

density = chainpayload / float(readdata)

581

density = chainpayload / float(readdata)

582

else:

582

else:

583

density = 1.0

583

density = 1.0

584

585

# Cut the revs at collected indices

585

# Cut the revs at collected indices

586

previdx = 0

586

previdx = 0

587

while indicesheap:

587

while indicesheap:

588

idx = heapq.heappop(indicesheap)

588

idx = heapq.heappop(indicesheap)

589

590

chunk = _trimchunk(revlog, revs, previdx, idx)

590

chunk = _trimchunk(revlog, revs, previdx, idx)

591

if chunk:

591

if chunk:

592

yield chunk

592

yield chunk

593

594

previdx = idx

594

previdx = idx

595

596

chunk = _trimchunk(revlog, revs, previdx)

596

chunk = _trimchunk(revlog, revs, previdx)

597

if chunk:

597

if chunk:

598

yield chunk

598

yield chunk

599

600

@attr.s(slots=True, frozen=True)

600

@attr.s(slots=True, frozen=True)

601

class _deltainfo(object):

601

class _deltainfo(object):

602

distance = attr.ib()

602

distance = attr.ib()

603

deltalen = attr.ib()

603

deltalen = attr.ib()

604

data = attr.ib()

604

data = attr.ib()

605

base = attr.ib()

605

base = attr.ib()

606

chainbase = attr.ib()

606

chainbase = attr.ib()

607

chainlen = attr.ib()

607

chainlen = attr.ib()

608

compresseddeltalen = attr.ib()

608

compresseddeltalen = attr.ib()

609

snapshotdepth = attr.ib()

609

610

class _deltacomputer(object):

611

class _deltacomputer(object):

611

def __init__(self, revlog):

612

def __init__(self, revlog):

612

self.revlog = revlog

613

self.revlog = revlog

613

614

def _getcandidaterevs(self, p1, p2, cachedelta):

615

def _getcandidaterevs(self, p1, p2, cachedelta):

615

"""

616

"""

616

Provides revisions that present an interest to be diffed against,

617

Provides revisions that present an interest to be diffed against,

617

grouped by level of easiness.

618

grouped by level of easiness.

618

"""

619

"""

619

revlog = self.revlog

620

revlog = self.revlog

620

gdelta = revlog._generaldelta

621

gdelta = revlog._generaldelta

621

curr = len(revlog)

622

curr = len(revlog)

622

prev = curr - 1

623

prev = curr - 1

623

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

624

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

624

625

# should we try to build a delta?

626

# should we try to build a delta?

626

if prev != nullrev and revlog.storedeltachains:

627

if prev != nullrev and revlog.storedeltachains:

627

tested = set()

628

tested = set()

628

# This condition is true most of the time when processing

629

# This condition is true most of the time when processing

629

# changegroup data into a generaldelta repo. The only time it

630

# changegroup data into a generaldelta repo. The only time it

630

# isn't true is if this is the first revision in a delta chain

631

# isn't true is if this is the first revision in a delta chain

631

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

632

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

632

if cachedelta and gdelta and revlog._lazydeltabase:

633

if cachedelta and gdelta and revlog._lazydeltabase:

633

# Assume what we received from the server is a good choice

634

# Assume what we received from the server is a good choice

634

# build delta will reuse the cache

635

# build delta will reuse the cache

635

yield (cachedelta[0],)

636

yield (cachedelta[0],)

636

tested.add(cachedelta[0])

637

tested.add(cachedelta[0])

637

638

if gdelta:

639

if gdelta:

639

# exclude already lazy tested base if any

640

# exclude already lazy tested base if any

640

parents = [p for p in (p1r, p2r)

641

parents = [p for p in (p1r, p2r)

641

if p != nullrev and p not in tested]

642

if p != nullrev and p not in tested]

642

643

if not revlog._deltabothparents and len(parents) == 2:

644

if not revlog._deltabothparents and len(parents) == 2:

644

parents.sort()

645

parents.sort()

645

# To minimize the chance of having to build a fulltext,

646

# To minimize the chance of having to build a fulltext,

646

# pick first whichever parent is closest to us (max rev)

647

# pick first whichever parent is closest to us (max rev)

647

yield (parents[1],)

648

yield (parents[1],)

648

# then the other one (min rev) if the first did not fit

649

# then the other one (min rev) if the first did not fit

649

yield (parents[0],)

650

yield (parents[0],)

650

tested.update(parents)

651

tested.update(parents)

651

elif len(parents) > 0:

652

elif len(parents) > 0:

652

# Test all parents (1 or 2), and keep the best candidate

653

# Test all parents (1 or 2), and keep the best candidate

653

yield parents

654

yield parents

654

tested.update(parents)

655

tested.update(parents)

655

656

if prev not in tested:

657

if prev not in tested:

657

# other approach failed try against prev to hopefully save us a

658

# other approach failed try against prev to hopefully save us a

658

# fulltext.

659

# fulltext.

659

yield (prev,)

660

yield (prev,)

660

tested.add(prev)

661

tested.add(prev)

661

662

def buildtext(self, revinfo, fh):

663

def buildtext(self, revinfo, fh):

663

"""Builds a fulltext version of a revision

664

"""Builds a fulltext version of a revision

664

665

revinfo: _revisioninfo instance that contains all needed info

666

revinfo: _revisioninfo instance that contains all needed info

666

fh: file handle to either the .i or the .d revlog file,

667

fh: file handle to either the .i or the .d revlog file,

667

depending on whether it is inlined or not

668

depending on whether it is inlined or not

668

"""

669

"""

669

btext = revinfo.btext

670

btext = revinfo.btext

670

if btext[0] is not None:

671

if btext[0] is not None:

671

return btext[0]

672

return btext[0]

672

673

revlog = self.revlog

674

revlog = self.revlog

674

cachedelta = revinfo.cachedelta

675

cachedelta = revinfo.cachedelta

675

flags = revinfo.flags

676

flags = revinfo.flags

676

node = revinfo.node

677

node = revinfo.node

677

678

baserev = cachedelta[0]

679

baserev = cachedelta[0]

679

delta = cachedelta[1]

680

delta = cachedelta[1]

680

# special case deltas which replace entire base; no need to decode

681

# special case deltas which replace entire base; no need to decode

681

# base revision. this neatly avoids censored bases, which throw when

682

# base revision. this neatly avoids censored bases, which throw when

682

# they're decoded.

683

# they're decoded.

683

hlen = struct.calcsize(">lll")

684

hlen = struct.calcsize(">lll")

684

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

685

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

685

len(delta) - hlen):

686

len(delta) - hlen):

686

btext[0] = delta[hlen:]

687

btext[0] = delta[hlen:]

687

else:

688

else:

688

# deltabase is rawtext before changed by flag processors, which is

689

# deltabase is rawtext before changed by flag processors, which is

689

# equivalent to non-raw text

690

# equivalent to non-raw text

690

basetext = revlog.revision(baserev, _df=fh, raw=False)

691

basetext = revlog.revision(baserev, _df=fh, raw=False)

691

btext[0] = mdiff.patch(basetext, delta)

692

btext[0] = mdiff.patch(basetext, delta)

692

693

try:

694

try:

694

res = revlog._processflags(btext[0], flags, 'read', raw=True)

695

res = revlog._processflags(btext[0], flags, 'read', raw=True)

695

btext[0], validatehash = res

696

btext[0], validatehash = res

696

if validatehash:

697

if validatehash:

697

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

698

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

698

if flags & REVIDX_ISCENSORED:

699

if flags & REVIDX_ISCENSORED:

699

raise RevlogError(_('node %s is not censored') % node)

700

raise RevlogError(_('node %s is not censored') % node)

700

except CensoredNodeError:

701

except CensoredNodeError:

701

# must pass the censored index flag to add censored revisions

702

# must pass the censored index flag to add censored revisions

702

if not flags & REVIDX_ISCENSORED:

703

if not flags & REVIDX_ISCENSORED:

703

raise

704

raise

704

return btext[0]

705

return btext[0]

705

706

def _builddeltadiff(self, base, revinfo, fh):

707

def _builddeltadiff(self, base, revinfo, fh):

707

revlog = self.revlog

708

revlog = self.revlog

708

t = self.buildtext(revinfo, fh)

709

t = self.buildtext(revinfo, fh)

709

if revlog.iscensored(base):

710

if revlog.iscensored(base):

710

# deltas based on a censored revision must replace the

711

# deltas based on a censored revision must replace the

711

# full content in one patch, so delta works everywhere

712

# full content in one patch, so delta works everywhere

712

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

713

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

713

delta = header + t

714

delta = header + t

714

else:

715

else:

715

ptext = revlog.revision(base, _df=fh, raw=True)

716

ptext = revlog.revision(base, _df=fh, raw=True)

716

delta = mdiff.textdiff(ptext, t)

717

delta = mdiff.textdiff(ptext, t)

717

718

return delta

719

return delta

719

720

def _builddeltainfo(self, revinfo, base, fh):

721

def _builddeltainfo(self, revinfo, base, fh):

721

# can we use the cached delta?

722

# can we use the cached delta?

722

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

723

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

723

delta = revinfo.cachedelta[1]

724

delta = revinfo.cachedelta[1]

724

else:

725

else:

725

delta = self._builddeltadiff(base, revinfo, fh)

726

delta = self._builddeltadiff(base, revinfo, fh)

726

revlog = self.revlog

727

revlog = self.revlog

727

header, data = revlog.compress(delta)

728

header, data = revlog.compress(delta)

728

deltalen = len(header) + len(data)

729

deltalen = len(header) + len(data)

729

chainbase = revlog.chainbase(base)

730

chainbase = revlog.chainbase(base)

730

offset = revlog.end(len(revlog) - 1)

731

offset = revlog.end(len(revlog) - 1)

731

dist = deltalen + offset - revlog.start(chainbase)

732

dist = deltalen + offset - revlog.start(chainbase)

732

if revlog._generaldelta:

733

if revlog._generaldelta:

733

deltabase = base

734

deltabase = base

734

else:

735

else:

735

deltabase = chainbase

736

deltabase = chainbase

736

chainlen, compresseddeltalen = revlog._chaininfo(base)

737

chainlen, compresseddeltalen = revlog._chaininfo(base)

737

chainlen += 1

738

chainlen += 1

738

compresseddeltalen += deltalen

739

compresseddeltalen += deltalen

740

741

revlog = self.revlog

742

snapshotdepth = None

743

if deltabase == nullrev:

744

snapshotdepth = 0

745

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

746

# A delta chain should always be one full snapshot,

747

# zero or more semi-snapshots, and zero or more deltas

748

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

749

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

750

snapshotdepth = len(revlog._deltachain(deltabase)[0])

751

739

return _deltainfo(dist, deltalen, (header, data), deltabase,

752

return _deltainfo(dist, deltalen, (header, data), deltabase,

740

chainbase, chainlen, compresseddeltalen)

753

chainbase, chainlen, compresseddeltalen,

754

snapshotdepth)

741

755

742

def finddeltainfo(self, revinfo, fh):

756

def finddeltainfo(self, revinfo, fh):

743

"""Find an acceptable delta against a candidate revision

757

"""Find an acceptable delta against a candidate revision

744

758

745

revinfo: information about the revision (instance of _revisioninfo)

759

revinfo: information about the revision (instance of _revisioninfo)

746

fh: file handle to either the .i or the .d revlog file,

760

fh: file handle to either the .i or the .d revlog file,

747

depending on whether it is inlined or not

761

depending on whether it is inlined or not

748

762

749

Returns the first acceptable candidate revision, as ordered by

763

Returns the first acceptable candidate revision, as ordered by

750

_getcandidaterevs

764

_getcandidaterevs

751

"""

765

"""

752

if not revinfo.textlen:

766

if not revinfo.textlen:

753

return None # empty file do not need delta

767

return None # empty file do not need delta

754

768

755

cachedelta = revinfo.cachedelta

769

cachedelta = revinfo.cachedelta

756

p1 = revinfo.p1

770

p1 = revinfo.p1

757

p2 = revinfo.p2

771

p2 = revinfo.p2

758

revlog = self.revlog

772

revlog = self.revlog

759

773

760

deltalength = self.revlog.length

774

deltalength = self.revlog.length

761

deltaparent = self.revlog.deltaparent

775

deltaparent = self.revlog.deltaparent

762

776

763

deltainfo = None

777

deltainfo = None

764

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

778

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

765

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

779

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

766

# filter out delta base that will never produce good delta

780

# filter out delta base that will never produce good delta

767

candidaterevs = [r for r in candidaterevs

781

candidaterevs = [r for r in candidaterevs

768

if self.revlog.length(r) <= deltas_limit]

782

if self.revlog.length(r) <= deltas_limit]

769

nominateddeltas = []

783

nominateddeltas = []

770

for candidaterev in candidaterevs:

784

for candidaterev in candidaterevs:

771

# skip over empty delta (no need to include them in a chain)

785

# skip over empty delta (no need to include them in a chain)

772

while candidaterev != nullrev and not deltalength(candidaterev):

786

while candidaterev != nullrev and not deltalength(candidaterev):

773

candidaterev = deltaparent(candidaterev)

787

candidaterev = deltaparent(candidaterev)

774

# no need to try a delta against nullid, this will be handled

788

# no need to try a delta against nullid, this will be handled

775

# by fulltext later.

789

# by fulltext later.

776

if candidaterev == nullrev:

790

if candidaterev == nullrev:

777

continue

791

continue

778

# no delta for rawtext-changing revs (see "candelta" for why)

792

# no delta for rawtext-changing revs (see "candelta" for why)

779

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

793

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

780

continue

794

continue

781

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

795

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

782

if revlog._isgooddeltainfo(candidatedelta, revinfo):

796

if revlog._isgooddeltainfo(candidatedelta, revinfo):

783

nominateddeltas.append(candidatedelta)

797

nominateddeltas.append(candidatedelta)

784

if nominateddeltas:

798

if nominateddeltas:

785

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

799

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

786

break

800

break

787

801

788

return deltainfo

802

return deltainfo

789

803

790

@attr.s(slots=True, frozen=True)

804

@attr.s(slots=True, frozen=True)

791

class _revisioninfo(object):

805

class _revisioninfo(object):

792

"""Information about a revision that allows building its fulltext

806

"""Information about a revision that allows building its fulltext

793

node: expected hash of the revision

807

node: expected hash of the revision

794

p1, p2: parent revs of the revision

808

p1, p2: parent revs of the revision

795

btext: built text cache consisting of a one-element list

809

btext: built text cache consisting of a one-element list

796

cachedelta: (baserev, uncompressed_delta) or None

810

cachedelta: (baserev, uncompressed_delta) or None

797

flags: flags associated to the revision storage

811

flags: flags associated to the revision storage

798

812

799

One of btext[0] or cachedelta must be set.

813

One of btext[0] or cachedelta must be set.

800

"""

814

"""

801

node = attr.ib()

815

node = attr.ib()

802

p1 = attr.ib()

816

p1 = attr.ib()

803

p2 = attr.ib()

817

p2 = attr.ib()

804

btext = attr.ib()

818

btext = attr.ib()

805

textlen = attr.ib()

819

textlen = attr.ib()

806

cachedelta = attr.ib()

820

cachedelta = attr.ib()

807

flags = attr.ib()

821

flags = attr.ib()

808

822

809

# index v0:

823

# index v0:

810

# 4 bytes: offset

824

# 4 bytes: offset

811

# 4 bytes: compressed length

825

# 4 bytes: compressed length

812

# 4 bytes: base rev

826

# 4 bytes: base rev

813

# 4 bytes: link rev

827

# 4 bytes: link rev

814

# 20 bytes: parent 1 nodeid

828

# 20 bytes: parent 1 nodeid

815

# 20 bytes: parent 2 nodeid

829

# 20 bytes: parent 2 nodeid

816

# 20 bytes: nodeid

830

# 20 bytes: nodeid

817

indexformatv0 = struct.Struct(">4l20s20s20s")

831

indexformatv0 = struct.Struct(">4l20s20s20s")

818

indexformatv0_pack = indexformatv0.pack

832

indexformatv0_pack = indexformatv0.pack

819

indexformatv0_unpack = indexformatv0.unpack

833

indexformatv0_unpack = indexformatv0.unpack

820

834

821

class revlogoldindex(list):

835

class revlogoldindex(list):

822

def __getitem__(self, i):

836

def __getitem__(self, i):

823

if i == -1:

837

if i == -1:

824

return (0, 0, 0, -1, -1, -1, -1, nullid)

838

return (0, 0, 0, -1, -1, -1, -1, nullid)

825

return list.__getitem__(self, i)

839

return list.__getitem__(self, i)

826

840

827

# maximum <delta-chain-data>/<revision-text-length> ratio

841

# maximum <delta-chain-data>/<revision-text-length> ratio

828

LIMIT_DELTA2TEXT = 2

842

LIMIT_DELTA2TEXT = 2

829

843

830

class revlogoldio(object):

844

class revlogoldio(object):

831

def __init__(self):

845

def __init__(self):

832

self.size = indexformatv0.size

846

self.size = indexformatv0.size

833

847

834

def parseindex(self, data, inline):

848

def parseindex(self, data, inline):

835

s = self.size

849

s = self.size

836

index = []

850

index = []

837

nodemap = {nullid: nullrev}

851

nodemap = {nullid: nullrev}

838

n = off = 0

852

n = off = 0

839

l = len(data)

853

l = len(data)

840

while off + s <= l:

854

while off + s <= l:

841

cur = data[off:off + s]

855

cur = data[off:off + s]

842

off += s

856

off += s

843

e = indexformatv0_unpack(cur)

857

e = indexformatv0_unpack(cur)

844

# transform to revlogv1 format

858

# transform to revlogv1 format

845

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

859

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

846

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

860

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

847

index.append(e2)

861

index.append(e2)

848

nodemap[e[6]] = n

862

nodemap[e[6]] = n

849

n += 1

863

n += 1

850

864

851

return revlogoldindex(index), nodemap, None

865

return revlogoldindex(index), nodemap, None

852

866

853

def packentry(self, entry, node, version, rev):

867

def packentry(self, entry, node, version, rev):

854

if gettype(entry[0]):

868

if gettype(entry[0]):

855

raise RevlogError(_('index entry flags need revlog version 1'))

869

raise RevlogError(_('index entry flags need revlog version 1'))

856

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

870

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

857

node(entry[5]), node(entry[6]), entry[7])

871

node(entry[5]), node(entry[6]), entry[7])

858

return indexformatv0_pack(*e2)

872

return indexformatv0_pack(*e2)

859

873

860

# index ng:

874

# index ng:

861

# 6 bytes: offset

875

# 6 bytes: offset

862

# 2 bytes: flags

876

# 2 bytes: flags

863

# 4 bytes: compressed length

877

# 4 bytes: compressed length

864

# 4 bytes: uncompressed length

878

# 4 bytes: uncompressed length

865

# 4 bytes: base rev

879

# 4 bytes: base rev

866

# 4 bytes: link rev

880

# 4 bytes: link rev

867

# 4 bytes: parent 1 rev

881

# 4 bytes: parent 1 rev

868

# 4 bytes: parent 2 rev

882

# 4 bytes: parent 2 rev

869

# 32 bytes: nodeid

883

# 32 bytes: nodeid

870

indexformatng = struct.Struct(">Qiiiiii20s12x")

884

indexformatng = struct.Struct(">Qiiiiii20s12x")

871

indexformatng_pack = indexformatng.pack

885

indexformatng_pack = indexformatng.pack

872

versionformat = struct.Struct(">I")

886

versionformat = struct.Struct(">I")

873

versionformat_pack = versionformat.pack

887

versionformat_pack = versionformat.pack

874

versionformat_unpack = versionformat.unpack

888

versionformat_unpack = versionformat.unpack

875

889

876

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

890

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

877

# signed integer)

891

# signed integer)

878

_maxentrysize = 0x7fffffff

892

_maxentrysize = 0x7fffffff

879

893

880

class revlogio(object):

894

class revlogio(object):

881

def __init__(self):

895

def __init__(self):

882

self.size = indexformatng.size

896

self.size = indexformatng.size

883

897

884

def parseindex(self, data, inline):

898

def parseindex(self, data, inline):

885

# call the C implementation to parse the index data

899

# call the C implementation to parse the index data

886

index, cache = parsers.parse_index2(data, inline)

900

index, cache = parsers.parse_index2(data, inline)

887

return index, getattr(index, 'nodemap', None), cache

901

return index, getattr(index, 'nodemap', None), cache

888

902

889

def packentry(self, entry, node, version, rev):

903

def packentry(self, entry, node, version, rev):

890

p = indexformatng_pack(*entry)

904

p = indexformatng_pack(*entry)

891

if rev == 0:

905

if rev == 0:

892

p = versionformat_pack(version) + p[4:]

906

p = versionformat_pack(version) + p[4:]

893

return p

907

return p

894

908

895

class revlog(object):

909

class revlog(object):

896

"""

910

"""

897

the underlying revision storage object

911

the underlying revision storage object

898

912

899

A revlog consists of two parts, an index and the revision data.

913

A revlog consists of two parts, an index and the revision data.

900

914

901

The index is a file with a fixed record size containing

915

The index is a file with a fixed record size containing

902

information on each revision, including its nodeid (hash), the

916

information on each revision, including its nodeid (hash), the

903

nodeids of its parents, the position and offset of its data within

917

nodeids of its parents, the position and offset of its data within

904

the data file, and the revision it's based on. Finally, each entry

918

the data file, and the revision it's based on. Finally, each entry

905

contains a linkrev entry that can serve as a pointer to external

919

contains a linkrev entry that can serve as a pointer to external

906

data.

920

data.

907

921

908

The revision data itself is a linear collection of data chunks.

922

The revision data itself is a linear collection of data chunks.

909

Each chunk represents a revision and is usually represented as a

923

Each chunk represents a revision and is usually represented as a

910

delta against the previous chunk. To bound lookup time, runs of

924

delta against the previous chunk. To bound lookup time, runs of

911

deltas are limited to about 2 times the length of the original

925

deltas are limited to about 2 times the length of the original

912

version data. This makes retrieval of a version proportional to

926

version data. This makes retrieval of a version proportional to

913

its size, or O(1) relative to the number of revisions.

927

its size, or O(1) relative to the number of revisions.

914

928

915

Both pieces of the revlog are written to in an append-only

929

Both pieces of the revlog are written to in an append-only

916

fashion, which means we never need to rewrite a file to insert or

930

fashion, which means we never need to rewrite a file to insert or

917

remove data, and can use some simple techniques to avoid the need

931

remove data, and can use some simple techniques to avoid the need

918

for locking while reading.

932

for locking while reading.

919

933

920

If checkambig, indexfile is opened with checkambig=True at

934

If checkambig, indexfile is opened with checkambig=True at

921

writing, to avoid file stat ambiguity.

935

writing, to avoid file stat ambiguity.

922

936

923

If mmaplargeindex is True, and an mmapindexthreshold is set, the

937

If mmaplargeindex is True, and an mmapindexthreshold is set, the

924

index will be mmapped rather than read if it is larger than the

938

index will be mmapped rather than read if it is larger than the

925

configured threshold.

939

configured threshold.

926

940

927

If censorable is True, the revlog can have censored revisions.

941

If censorable is True, the revlog can have censored revisions.

928

"""

942

"""

929

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

943

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

930

mmaplargeindex=False, censorable=False):

944

mmaplargeindex=False, censorable=False):

931

"""

945

"""

932

create a revlog object

946

create a revlog object

933

947

934

opener is a function that abstracts the file opening operation

948

opener is a function that abstracts the file opening operation

935

and can be used to implement COW semantics or the like.

949

and can be used to implement COW semantics or the like.

936

"""

950

"""

937

self.indexfile = indexfile

951

self.indexfile = indexfile

938

self.datafile = datafile or (indexfile[:-2] + ".d")

952

self.datafile = datafile or (indexfile[:-2] + ".d")

939

self.opener = opener

953

self.opener = opener

940

# When True, indexfile is opened with checkambig=True at writing, to

954

# When True, indexfile is opened with checkambig=True at writing, to

941

# avoid file stat ambiguity.

955

# avoid file stat ambiguity.

942

self._checkambig = checkambig

956

self._checkambig = checkambig

943

self._censorable = censorable

957

self._censorable = censorable

944

# 3-tuple of (node, rev, text) for a raw revision.

958

# 3-tuple of (node, rev, text) for a raw revision.

945

self._cache = None

959

self._cache = None

946

# Maps rev to chain base rev.

960

# Maps rev to chain base rev.

947

self._chainbasecache = util.lrucachedict(100)

961

self._chainbasecache = util.lrucachedict(100)

948

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

962

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

949

self._chunkcache = (0, '')

963

self._chunkcache = (0, '')

950

# How much data to read and cache into the raw revlog data cache.

964

# How much data to read and cache into the raw revlog data cache.

951

self._chunkcachesize = 65536

965

self._chunkcachesize = 65536

952

self._maxchainlen = None

966

self._maxchainlen = None

953

self._deltabothparents = True

967

self._deltabothparents = True

954

self.index = []

968

self.index = []

955

# Mapping of partial identifiers to full nodes.

969

# Mapping of partial identifiers to full nodes.

956

self._pcache = {}

970

self._pcache = {}

957

# Mapping of revision integer to full node.

971

# Mapping of revision integer to full node.

958

self._nodecache = {nullid: nullrev}

972

self._nodecache = {nullid: nullrev}

959

self._nodepos = None

973

self._nodepos = None

960

self._compengine = 'zlib'

974

self._compengine = 'zlib'

961

self._maxdeltachainspan = -1

975

self._maxdeltachainspan = -1

962

self._withsparseread = False

976

self._withsparseread = False

963

self._sparserevlog = False

977

self._sparserevlog = False

964

self._srdensitythreshold = 0.50

978

self._srdensitythreshold = 0.50

965

self._srmingapsize = 262144

979

self._srmingapsize = 262144

966

980

967

mmapindexthreshold = None

981

mmapindexthreshold = None

968

v = REVLOG_DEFAULT_VERSION

982

v = REVLOG_DEFAULT_VERSION

969

opts = getattr(opener, 'options', None)

983

opts = getattr(opener, 'options', None)

970

if opts is not None:

984

if opts is not None:

971

if 'revlogv2' in opts:

985

if 'revlogv2' in opts:

972

# version 2 revlogs always use generaldelta.

986

# version 2 revlogs always use generaldelta.

973

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

987

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

974

elif 'revlogv1' in opts:

988

elif 'revlogv1' in opts:

975

if 'generaldelta' in opts:

989

if 'generaldelta' in opts:

976

v |= FLAG_GENERALDELTA

990

v |= FLAG_GENERALDELTA

977

else:

991

else:

978

v = 0

992

v = 0

979

if 'chunkcachesize' in opts:

993

if 'chunkcachesize' in opts:

980

self._chunkcachesize = opts['chunkcachesize']

994

self._chunkcachesize = opts['chunkcachesize']

981

if 'maxchainlen' in opts:

995

if 'maxchainlen' in opts:

982

self._maxchainlen = opts['maxchainlen']

996

self._maxchainlen = opts['maxchainlen']

983

if 'deltabothparents' in opts:

997

if 'deltabothparents' in opts:

984

self._deltabothparents = opts['deltabothparents']

998

self._deltabothparents = opts['deltabothparents']

985

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

999

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

986

if 'compengine' in opts:

1000

if 'compengine' in opts:

987

self._compengine = opts['compengine']

1001

self._compengine = opts['compengine']

988

if 'maxdeltachainspan' in opts:

1002

if 'maxdeltachainspan' in opts:

989

self._maxdeltachainspan = opts['maxdeltachainspan']

1003

self._maxdeltachainspan = opts['maxdeltachainspan']

990

if mmaplargeindex and 'mmapindexthreshold' in opts:

1004

if mmaplargeindex and 'mmapindexthreshold' in opts:

991

mmapindexthreshold = opts['mmapindexthreshold']

1005

mmapindexthreshold = opts['mmapindexthreshold']

992

self._sparserevlog = bool(opts.get('sparse-revlog', False))

1006

self._sparserevlog = bool(opts.get('sparse-revlog', False))

993

withsparseread = bool(opts.get('with-sparse-read', False))

1007

withsparseread = bool(opts.get('with-sparse-read', False))

994

# sparse-revlog forces sparse-read

1008

# sparse-revlog forces sparse-read

995

self._withsparseread = self._sparserevlog or withsparseread

1009

self._withsparseread = self._sparserevlog or withsparseread

996

if 'sparse-read-density-threshold' in opts:

1010

if 'sparse-read-density-threshold' in opts:

997

self._srdensitythreshold = opts['sparse-read-density-threshold']

1011

self._srdensitythreshold = opts['sparse-read-density-threshold']

998

if 'sparse-read-min-gap-size' in opts:

1012

if 'sparse-read-min-gap-size' in opts:

999

self._srmingapsize = opts['sparse-read-min-gap-size']

1013

self._srmingapsize = opts['sparse-read-min-gap-size']

1000

1014

1001

if self._chunkcachesize <= 0:

1015

if self._chunkcachesize <= 0:

1002

raise RevlogError(_('revlog chunk cache size %r is not greater '

1016

raise RevlogError(_('revlog chunk cache size %r is not greater '

1003

'than 0') % self._chunkcachesize)

1017

'than 0') % self._chunkcachesize)

1004

elif self._chunkcachesize & (self._chunkcachesize - 1):

1018

elif self._chunkcachesize & (self._chunkcachesize - 1):

1005

raise RevlogError(_('revlog chunk cache size %r is not a power '

1019

raise RevlogError(_('revlog chunk cache size %r is not a power '

1006

'of 2') % self._chunkcachesize)

1020

'of 2') % self._chunkcachesize)

1007

1021

1008

indexdata = ''

1022

indexdata = ''

1009

self._initempty = True

1023

self._initempty = True

1010

try:

1024

try:

1011

with self._indexfp() as f:

1025

with self._indexfp() as f:

1012

if (mmapindexthreshold is not None and

1026

if (mmapindexthreshold is not None and

1013

self.opener.fstat(f).st_size >= mmapindexthreshold):

1027

self.opener.fstat(f).st_size >= mmapindexthreshold):

1014

indexdata = util.buffer(util.mmapread(f))

1028

indexdata = util.buffer(util.mmapread(f))

1015

else:

1029

else:

1016

indexdata = f.read()

1030

indexdata = f.read()

1017

if len(indexdata) > 0:

1031

if len(indexdata) > 0:

1018

v = versionformat_unpack(indexdata[:4])[0]

1032

v = versionformat_unpack(indexdata[:4])[0]

1019

self._initempty = False

1033

self._initempty = False

1020

except IOError as inst:

1034

except IOError as inst:

1021

if inst.errno != errno.ENOENT:

1035

if inst.errno != errno.ENOENT:

1022

raise

1036

raise

1023

1037

1024

self.version = v

1038

self.version = v

1025

self._inline = v & FLAG_INLINE_DATA

1039

self._inline = v & FLAG_INLINE_DATA

1026

self._generaldelta = v & FLAG_GENERALDELTA

1040

self._generaldelta = v & FLAG_GENERALDELTA

1027

flags = v & ~0xFFFF

1041

flags = v & ~0xFFFF

1028

fmt = v & 0xFFFF

1042

fmt = v & 0xFFFF

1029

if fmt == REVLOGV0:

1043

if fmt == REVLOGV0:

1030

if flags:

1044

if flags:

1031

raise RevlogError(_('unknown flags (%#04x) in version %d '

1045

raise RevlogError(_('unknown flags (%#04x) in version %d '

1032

'revlog %s') %

1046

'revlog %s') %

1033

(flags >> 16, fmt, self.indexfile))

1047

(flags >> 16, fmt, self.indexfile))

1034

elif fmt == REVLOGV1:

1048

elif fmt == REVLOGV1:

1035

if flags & ~REVLOGV1_FLAGS:

1049

if flags & ~REVLOGV1_FLAGS:

1036

raise RevlogError(_('unknown flags (%#04x) in version %d '

1050

raise RevlogError(_('unknown flags (%#04x) in version %d '

1037

'revlog %s') %

1051

'revlog %s') %

1038

(flags >> 16, fmt, self.indexfile))

1052

(flags >> 16, fmt, self.indexfile))

1039

elif fmt == REVLOGV2:

1053

elif fmt == REVLOGV2:

1040

if flags & ~REVLOGV2_FLAGS:

1054

if flags & ~REVLOGV2_FLAGS:

1041

raise RevlogError(_('unknown flags (%#04x) in version %d '

1055

raise RevlogError(_('unknown flags (%#04x) in version %d '

1042

'revlog %s') %

1056

'revlog %s') %

1043

(flags >> 16, fmt, self.indexfile))

1057

(flags >> 16, fmt, self.indexfile))

1044

else:

1058

else:

1045

raise RevlogError(_('unknown version (%d) in revlog %s') %

1059

raise RevlogError(_('unknown version (%d) in revlog %s') %

1046

(fmt, self.indexfile))

1060

(fmt, self.indexfile))

1047

1061

1048

self.storedeltachains = True

1062

self.storedeltachains = True

1049

1063

1050

self._io = revlogio()

1064

self._io = revlogio()

1051

if self.version == REVLOGV0:

1065

if self.version == REVLOGV0:

1052

self._io = revlogoldio()

1066

self._io = revlogoldio()

1053

try:

1067

try:

1054

d = self._io.parseindex(indexdata, self._inline)

1068

d = self._io.parseindex(indexdata, self._inline)

1055

except (ValueError, IndexError):

1069

except (ValueError, IndexError):

1056

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1070

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1057

self.index, nodemap, self._chunkcache = d

1071

self.index, nodemap, self._chunkcache = d

1058

if nodemap is not None:

1072

if nodemap is not None:

1059

self.nodemap = self._nodecache = nodemap

1073

self.nodemap = self._nodecache = nodemap

1060

if not self._chunkcache:

1074

if not self._chunkcache:

1061

self._chunkclear()

1075

self._chunkclear()

1062

# revnum -> (chain-length, sum-delta-length)

1076

# revnum -> (chain-length, sum-delta-length)

1063

self._chaininfocache = {}

1077

self._chaininfocache = {}

1064

# revlog header -> revlog compressor

1078

# revlog header -> revlog compressor

1065

self._decompressors = {}

1079

self._decompressors = {}

1066

1080

1067

@util.propertycache

1081

@util.propertycache

1068

def _compressor(self):

1082

def _compressor(self):

1069

return util.compengines[self._compengine].revlogcompressor()

1083

return util.compengines[self._compengine].revlogcompressor()

1070

1084

1071

def _indexfp(self, mode='r'):

1085

def _indexfp(self, mode='r'):

1072

"""file object for the revlog's index file"""

1086

"""file object for the revlog's index file"""

1073

args = {r'mode': mode}

1087

args = {r'mode': mode}

1074

if mode != 'r':

1088

if mode != 'r':

1075

args[r'checkambig'] = self._checkambig

1089

args[r'checkambig'] = self._checkambig

1076

if mode == 'w':

1090

if mode == 'w':

1077

args[r'atomictemp'] = True

1091

args[r'atomictemp'] = True

1078

return self.opener(self.indexfile, **args)

1092

return self.opener(self.indexfile, **args)

1079

1093

1080

def _datafp(self, mode='r'):

1094

def _datafp(self, mode='r'):

1081

"""file object for the revlog's data file"""

1095

"""file object for the revlog's data file"""

1082

return self.opener(self.datafile, mode=mode)

1096

return self.opener(self.datafile, mode=mode)

1083

1097

1084

@contextlib.contextmanager

1098

@contextlib.contextmanager

1085

def _datareadfp(self, existingfp=None):

1099

def _datareadfp(self, existingfp=None):

1086

"""file object suitable to read data"""

1100

"""file object suitable to read data"""

1087

if existingfp is not None:

1101

if existingfp is not None:

1088

yield existingfp

1102

yield existingfp

1089

else:

1103

else:

1090

if self._inline:

1104

if self._inline:

1091

func = self._indexfp

1105

func = self._indexfp

1092

else:

1106

else:

1093

func = self._datafp

1107

func = self._datafp

1094

with func() as fp:

1108

with func() as fp:

1095

yield fp

1109

yield fp

1096

1110

1097

def tip(self):

1111

def tip(self):

1098

return self.node(len(self.index) - 1)

1112

return self.node(len(self.index) - 1)

1099

def __contains__(self, rev):

1113

def __contains__(self, rev):

1100

return 0 <= rev < len(self)

1114

return 0 <= rev < len(self)

1101

def __len__(self):

1115

def __len__(self):

1102

return len(self.index)

1116

return len(self.index)

1103

def __iter__(self):

1117

def __iter__(self):

1104

return iter(pycompat.xrange(len(self)))

1118

return iter(pycompat.xrange(len(self)))

1105

def revs(self, start=0, stop=None):

1119

def revs(self, start=0, stop=None):

1106

"""iterate over all rev in this revlog (from start to stop)"""

1120

"""iterate over all rev in this revlog (from start to stop)"""

1107

step = 1

1121

step = 1

1108

length = len(self)

1122

length = len(self)

1109

if stop is not None:

1123

if stop is not None:

1110

if start > stop:

1124

if start > stop:

1111

step = -1

1125

step = -1

1112

stop += step

1126

stop += step

1113

if stop > length:

1127

if stop > length:

1114

stop = length

1128

stop = length

1115

else:

1129

else:

1116

stop = length

1130

stop = length

1117

return pycompat.xrange(start, stop, step)

1131

return pycompat.xrange(start, stop, step)

1118

1132

1119

@util.propertycache

1133

@util.propertycache

1120

def nodemap(self):

1134

def nodemap(self):

1121

if self.index:

1135

if self.index:

1122

# populate mapping down to the initial node

1136

# populate mapping down to the initial node

1123

node0 = self.index[0][7] # get around changelog filtering

1137

node0 = self.index[0][7] # get around changelog filtering

1124

self.rev(node0)

1138

self.rev(node0)

1125

return self._nodecache

1139

return self._nodecache

1126

1140

1127

def hasnode(self, node):

1141

def hasnode(self, node):

1128

try:

1142

try:

1129

self.rev(node)

1143

self.rev(node)

1130

return True

1144

return True

1131

except KeyError:

1145

except KeyError:

1132

return False

1146

return False

1133

1147

1134

def candelta(self, baserev, rev):

1148

def candelta(self, baserev, rev):

1135

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1149

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1136

# Disable delta if either rev requires a content-changing flag

1150

# Disable delta if either rev requires a content-changing flag

1137

# processor (ex. LFS). This is because such flag processor can alter

1151

# processor (ex. LFS). This is because such flag processor can alter

1138

# the rawtext content that the delta will be based on, and two clients

1152

# the rawtext content that the delta will be based on, and two clients

1139

# could have a same revlog node with different flags (i.e. different

1153

# could have a same revlog node with different flags (i.e. different

1140

# rawtext contents) and the delta could be incompatible.

1154

# rawtext contents) and the delta could be incompatible.

1141

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1155

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1142

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1156

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1143

return False

1157

return False

1144

return True

1158

return True

1145

1159

1146

def clearcaches(self):

1160

def clearcaches(self):

1147

self._cache = None

1161

self._cache = None

1148

self._chainbasecache.clear()

1162

self._chainbasecache.clear()

1149

self._chunkcache = (0, '')

1163

self._chunkcache = (0, '')

1150

self._pcache = {}

1164

self._pcache = {}

1151

1165

1152

try:

1166

try:

1153

self._nodecache.clearcaches()

1167

self._nodecache.clearcaches()

1154

except AttributeError:

1168

except AttributeError:

1155

self._nodecache = {nullid: nullrev}

1169

self._nodecache = {nullid: nullrev}

1156

self._nodepos = None

1170

self._nodepos = None

1157

1171

1158

def rev(self, node):

1172

def rev(self, node):

1159

try:

1173

try:

1160

return self._nodecache[node]

1174

return self._nodecache[node]

1161

except TypeError:

1175

except TypeError:

1162

raise

1176

raise

1163

except RevlogError:

1177

except RevlogError:

1164

# parsers.c radix tree lookup failed

1178

# parsers.c radix tree lookup failed

1165

if node == wdirid or node in wdirfilenodeids:

1179

if node == wdirid or node in wdirfilenodeids:

1166

raise error.WdirUnsupported

1180

raise error.WdirUnsupported

1167

raise LookupError(node, self.indexfile, _('no node'))

1181

raise LookupError(node, self.indexfile, _('no node'))

1168

except KeyError:

1182

except KeyError:

1169

# pure python cache lookup failed

1183

# pure python cache lookup failed

1170

n = self._nodecache

1184

n = self._nodecache

1171

i = self.index

1185

i = self.index

1172

p = self._nodepos

1186

p = self._nodepos

1173

if p is None:

1187

if p is None:

1174

p = len(i) - 1

1188

p = len(i) - 1

1175

else:

1189

else:

1176

assert p < len(i)

1190

assert p < len(i)

1177

for r in pycompat.xrange(p, -1, -1):

1191

for r in pycompat.xrange(p, -1, -1):

1178

v = i[r][7]

1192

v = i[r][7]

1179

n[v] = r

1193

n[v] = r

1180

if v == node:

1194

if v == node:

1181

self._nodepos = r - 1

1195

self._nodepos = r - 1

1182

return r

1196

return r

1183

if node == wdirid or node in wdirfilenodeids:

1197

if node == wdirid or node in wdirfilenodeids:

1184

raise error.WdirUnsupported

1198

raise error.WdirUnsupported

1185

raise LookupError(node, self.indexfile, _('no node'))

1199

raise LookupError(node, self.indexfile, _('no node'))

1186

1200

1187

# Accessors for index entries.

1201

# Accessors for index entries.

1188

1202

1189

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1203

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1190

# are flags.

1204

# are flags.

1191

def start(self, rev):

1205

def start(self, rev):

1192

return int(self.index[rev][0] >> 16)

1206

return int(self.index[rev][0] >> 16)

1193

1207

1194

def flags(self, rev):

1208

def flags(self, rev):

1195

return self.index[rev][0] & 0xFFFF

1209

return self.index[rev][0] & 0xFFFF

1196

1210

1197

def length(self, rev):

1211

def length(self, rev):

1198

return self.index[rev][1]

1212

return self.index[rev][1]

1199

1213

1200

def rawsize(self, rev):

1214

def rawsize(self, rev):

1201

"""return the length of the uncompressed text for a given revision"""

1215

"""return the length of the uncompressed text for a given revision"""

1202

l = self.index[rev][2]

1216

l = self.index[rev][2]

1203

if l >= 0:

1217

if l >= 0:

1204

return l

1218

return l

1205

1219

1206

t = self.revision(rev, raw=True)

1220

t = self.revision(rev, raw=True)

1207

return len(t)

1221

return len(t)

1208

1222

1209

def size(self, rev):

1223

def size(self, rev):

1210

"""length of non-raw text (processed by a "read" flag processor)"""

1224

"""length of non-raw text (processed by a "read" flag processor)"""

1211

# fast path: if no "read" flag processor could change the content,

1225

# fast path: if no "read" flag processor could change the content,

1212

# size is rawsize. note: ELLIPSIS is known to not change the content.

1226

# size is rawsize. note: ELLIPSIS is known to not change the content.

1213

flags = self.flags(rev)

1227

flags = self.flags(rev)

1214

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1228

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1215

return self.rawsize(rev)

1229

return self.rawsize(rev)

1216

1230

1217

return len(self.revision(rev, raw=False))

1231

return len(self.revision(rev, raw=False))

1218

1232

1219

def chainbase(self, rev):

1233

def chainbase(self, rev):

1220

base = self._chainbasecache.get(rev)

1234

base = self._chainbasecache.get(rev)

1221

if base is not None:

1235

if base is not None:

1222

return base

1236

return base

1223

1237

1224

index = self.index

1238

index = self.index

1225

iterrev = rev

1239

iterrev = rev

1226

base = index[iterrev][3]

1240

base = index[iterrev][3]

1227

while base != iterrev:

1241

while base != iterrev:

1228

iterrev = base

1242

iterrev = base

1229

base = index[iterrev][3]

1243

base = index[iterrev][3]

1230

1244

1231

self._chainbasecache[rev] = base

1245

self._chainbasecache[rev] = base

1232

return base

1246

return base

1233

1247

1234

def linkrev(self, rev):

1248

def linkrev(self, rev):

1235

return self.index[rev][4]

1249

return self.index[rev][4]

1236

1250

1237

def parentrevs(self, rev):

1251

def parentrevs(self, rev):

1238

try:

1252

try:

1239

entry = self.index[rev]

1253

entry = self.index[rev]

1240

except IndexError:

1254

except IndexError:

1241

if rev == wdirrev:

1255

if rev == wdirrev:

1242

raise error.WdirUnsupported

1256

raise error.WdirUnsupported

1243

raise

1257

raise

1244

1258

1245

return entry[5], entry[6]

1259

return entry[5], entry[6]

1246

1260

1247

def node(self, rev):

1261

def node(self, rev):

1248

try:

1262

try:

1249

return self.index[rev][7]

1263

return self.index[rev][7]

1250

except IndexError:

1264

except IndexError:

1251

if rev == wdirrev:

1265

if rev == wdirrev:

1252

raise error.WdirUnsupported

1266

raise error.WdirUnsupported

1253

raise

1267

raise

1254

1268

1255

# Derived from index values.

1269

# Derived from index values.

1256

1270

1257

def end(self, rev):

1271

def end(self, rev):

1258

return self.start(rev) + self.length(rev)

1272

return self.start(rev) + self.length(rev)

1259

1273

1260

def parents(self, node):

1274

def parents(self, node):

1261

i = self.index

1275

i = self.index

1262

d = i[self.rev(node)]

1276

d = i[self.rev(node)]

1263

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1277

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1264

1278

1265

def chainlen(self, rev):

1279

def chainlen(self, rev):

1266

return self._chaininfo(rev)[0]

1280

return self._chaininfo(rev)[0]

1267

1281

1268

def _chaininfo(self, rev):

1282

def _chaininfo(self, rev):

1269

chaininfocache = self._chaininfocache

1283

chaininfocache = self._chaininfocache

1270

if rev in chaininfocache:

1284

if rev in chaininfocache:

1271

return chaininfocache[rev]

1285

return chaininfocache[rev]

1272

index = self.index

1286

index = self.index

1273

generaldelta = self._generaldelta

1287

generaldelta = self._generaldelta

1274

iterrev = rev

1288

iterrev = rev

1275

e = index[iterrev]

1289

e = index[iterrev]

1276

clen = 0

1290

clen = 0

1277

compresseddeltalen = 0

1291

compresseddeltalen = 0

1278

while iterrev != e[3]:

1292

while iterrev != e[3]:

1279

clen += 1

1293

clen += 1

1280

compresseddeltalen += e[1]

1294

compresseddeltalen += e[1]

1281

if generaldelta:

1295

if generaldelta:

1282

iterrev = e[3]

1296

iterrev = e[3]

1283

else:

1297

else:

1284

iterrev -= 1

1298

iterrev -= 1

1285

if iterrev in chaininfocache:

1299

if iterrev in chaininfocache:

1286

t = chaininfocache[iterrev]

1300

t = chaininfocache[iterrev]

1287

clen += t[0]

1301

clen += t[0]

1288

compresseddeltalen += t[1]

1302

compresseddeltalen += t[1]

1289

break

1303

break

1290

e = index[iterrev]

1304

e = index[iterrev]

1291

else:

1305

else:

1292

# Add text length of base since decompressing that also takes

1306

# Add text length of base since decompressing that also takes

1293

# work. For cache hits the length is already included.

1307

# work. For cache hits the length is already included.

1294

compresseddeltalen += e[1]

1308

compresseddeltalen += e[1]

1295

r = (clen, compresseddeltalen)

1309

r = (clen, compresseddeltalen)

1296

chaininfocache[rev] = r

1310

chaininfocache[rev] = r

1297

return r

1311

return r

1298

1312

1299

def _deltachain(self, rev, stoprev=None):

1313

def _deltachain(self, rev, stoprev=None):

1300

"""Obtain the delta chain for a revision.

1314

"""Obtain the delta chain for a revision.

1301

1315

1302

``stoprev`` specifies a revision to stop at. If not specified, we

1316

``stoprev`` specifies a revision to stop at. If not specified, we

1303

stop at the base of the chain.

1317

stop at the base of the chain.

1304

1318

1305

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1319

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1306

revs in ascending order and ``stopped`` is a bool indicating whether

1320

revs in ascending order and ``stopped`` is a bool indicating whether

1307

``stoprev`` was hit.

1321

``stoprev`` was hit.

1308

"""

1322

"""

1309

# Try C implementation.

1323

# Try C implementation.

1310

try:

1324

try:

1311

return self.index.deltachain(rev, stoprev, self._generaldelta)

1325

return self.index.deltachain(rev, stoprev, self._generaldelta)

1312

except AttributeError:

1326

except AttributeError:

1313

pass

1327

pass

1314

1328

1315

chain = []

1329

chain = []

1316

1330

1317

# Alias to prevent attribute lookup in tight loop.

1331

# Alias to prevent attribute lookup in tight loop.

1318

index = self.index

1332

index = self.index

1319

generaldelta = self._generaldelta

1333

generaldelta = self._generaldelta

1320

1334

1321

iterrev = rev

1335

iterrev = rev

1322

e = index[iterrev]

1336

e = index[iterrev]

1323

while iterrev != e[3] and iterrev != stoprev:

1337

while iterrev != e[3] and iterrev != stoprev:

1324

chain.append(iterrev)

1338

chain.append(iterrev)

1325

if generaldelta:

1339

if generaldelta:

1326

iterrev = e[3]

1340

iterrev = e[3]

1327

else:

1341

else:

1328

iterrev -= 1

1342

iterrev -= 1

1329

e = index[iterrev]

1343

e = index[iterrev]

1330

1344

1331

if iterrev == stoprev:

1345

if iterrev == stoprev:

1332

stopped = True

1346

stopped = True

1333

else:

1347

else:

1334

chain.append(iterrev)

1348

chain.append(iterrev)

1335

stopped = False

1349

stopped = False

1336

1350

1337

chain.reverse()

1351

chain.reverse()

1338

return chain, stopped

1352

return chain, stopped

1339

1353

1340

def ancestors(self, revs, stoprev=0, inclusive=False):

1354

def ancestors(self, revs, stoprev=0, inclusive=False):

1341

"""Generate the ancestors of 'revs' in reverse topological order.

1355

"""Generate the ancestors of 'revs' in reverse topological order.

1342

Does not generate revs lower than stoprev.

1356

Does not generate revs lower than stoprev.

1343

1357

1344

See the documentation for ancestor.lazyancestors for more details."""

1358

See the documentation for ancestor.lazyancestors for more details."""

1345

1359

1346

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1360

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1347

inclusive=inclusive)

1361

inclusive=inclusive)

1348

1362

1349

def descendants(self, revs):

1363

def descendants(self, revs):

1350

"""Generate the descendants of 'revs' in revision order.

1364

"""Generate the descendants of 'revs' in revision order.

1351

1365

1352

Yield a sequence of revision numbers starting with a child of

1366

Yield a sequence of revision numbers starting with a child of

1353

some rev in revs, i.e., each revision is *not* considered a

1367

some rev in revs, i.e., each revision is *not* considered a

1354

descendant of itself. Results are ordered by revision number (a

1368

descendant of itself. Results are ordered by revision number (a

1355

topological sort)."""

1369

topological sort)."""

1356

first = min(revs)

1370

first = min(revs)

1357

if first == nullrev:

1371

if first == nullrev:

1358

for i in self:

1372

for i in self:

1359

yield i

1373

yield i

1360

return

1374

return

1361

1375

1362

seen = set(revs)

1376

seen = set(revs)

1363

for i in self.revs(start=first + 1):

1377

for i in self.revs(start=first + 1):

1364

for x in self.parentrevs(i):

1378

for x in self.parentrevs(i):

1365

if x != nullrev and x in seen:

1379

if x != nullrev and x in seen:

1366

seen.add(i)

1380

seen.add(i)

1367

yield i

1381

yield i

1368

break

1382

break

1369

1383

1370

def findcommonmissing(self, common=None, heads=None):

1384

def findcommonmissing(self, common=None, heads=None):

1371

"""Return a tuple of the ancestors of common and the ancestors of heads

1385

"""Return a tuple of the ancestors of common and the ancestors of heads

1372

that are not ancestors of common. In revset terminology, we return the

1386

that are not ancestors of common. In revset terminology, we return the

1373

tuple:

1387

tuple:

1374

1388

1375

::common, (::heads) - (::common)

1389

::common, (::heads) - (::common)

1376

1390

1377

The list is sorted by revision number, meaning it is

1391

The list is sorted by revision number, meaning it is

1378

topologically sorted.

1392

topologically sorted.

1379

1393

1380

'heads' and 'common' are both lists of node IDs. If heads is

1394

'heads' and 'common' are both lists of node IDs. If heads is

1381

not supplied, uses all of the revlog's heads. If common is not

1395

not supplied, uses all of the revlog's heads. If common is not

1382

supplied, uses nullid."""

1396

supplied, uses nullid."""

1383

if common is None:

1397

if common is None:

1384

common = [nullid]

1398

common = [nullid]

1385

if heads is None:

1399

if heads is None:

1386

heads = self.heads()

1400

heads = self.heads()

1387

1401

1388

common = [self.rev(n) for n in common]

1402

common = [self.rev(n) for n in common]

1389

heads = [self.rev(n) for n in heads]

1403

heads = [self.rev(n) for n in heads]

1390

1404

1391

# we want the ancestors, but inclusive

1405

# we want the ancestors, but inclusive

1392

class lazyset(object):

1406

class lazyset(object):

1393

def __init__(self, lazyvalues):

1407

def __init__(self, lazyvalues):

1394

self.addedvalues = set()

1408

self.addedvalues = set()

1395

self.lazyvalues = lazyvalues

1409

self.lazyvalues = lazyvalues

1396

1410

1397

def __contains__(self, value):

1411

def __contains__(self, value):

1398

return value in self.addedvalues or value in self.lazyvalues

1412

return value in self.addedvalues or value in self.lazyvalues

1399

1413

1400

def __iter__(self):

1414

def __iter__(self):

1401

added = self.addedvalues

1415

added = self.addedvalues

1402

for r in added:

1416

for r in added:

1403

yield r

1417

yield r

1404

for r in self.lazyvalues:

1418

for r in self.lazyvalues:

1405

if not r in added:

1419

if not r in added:

1406

yield r

1420

yield r

1407

1421

1408

def add(self, value):

1422

def add(self, value):

1409

self.addedvalues.add(value)

1423

self.addedvalues.add(value)

1410

1424

1411

def update(self, values):

1425

def update(self, values):

1412

self.addedvalues.update(values)

1426

self.addedvalues.update(values)

1413

1427

1414

has = lazyset(self.ancestors(common))

1428

has = lazyset(self.ancestors(common))

1415

has.add(nullrev)

1429

has.add(nullrev)

1416

has.update(common)

1430

has.update(common)

1417

1431

1418

# take all ancestors from heads that aren't in has

1432

# take all ancestors from heads that aren't in has

1419

missing = set()

1433

missing = set()

1420

visit = collections.deque(r for r in heads if r not in has)

1434

visit = collections.deque(r for r in heads if r not in has)

1421

while visit:

1435

while visit:

1422

r = visit.popleft()

1436

r = visit.popleft()

1423

if r in missing:

1437

if r in missing:

1424

continue

1438

continue

1425

else:

1439

else:

1426

missing.add(r)

1440

missing.add(r)

1427

for p in self.parentrevs(r):

1441

for p in self.parentrevs(r):

1428

if p not in has:

1442

if p not in has:

1429

visit.append(p)

1443

visit.append(p)

1430

missing = list(missing)

1444

missing = list(missing)

1431

missing.sort()

1445

missing.sort()

1432

return has, [self.node(miss) for miss in missing]

1446

return has, [self.node(miss) for miss in missing]

1433

1447

1434

def incrementalmissingrevs(self, common=None):

1448

def incrementalmissingrevs(self, common=None):

1435

"""Return an object that can be used to incrementally compute the

1449

"""Return an object that can be used to incrementally compute the

1436

revision numbers of the ancestors of arbitrary sets that are not

1450

revision numbers of the ancestors of arbitrary sets that are not

1437

ancestors of common. This is an ancestor.incrementalmissingancestors

1451

ancestors of common. This is an ancestor.incrementalmissingancestors

1438

object.

1452

object.

1439

1453

1440

'common' is a list of revision numbers. If common is not supplied, uses

1454

'common' is a list of revision numbers. If common is not supplied, uses

1441

nullrev.

1455

nullrev.

1442

"""

1456

"""

1443

if common is None:

1457

if common is None:

1444

common = [nullrev]

1458

common = [nullrev]

1445

1459

1446

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1460

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1447

1461

1448

def findmissingrevs(self, common=None, heads=None):

1462

def findmissingrevs(self, common=None, heads=None):

1449

"""Return the revision numbers of the ancestors of heads that

1463

"""Return the revision numbers of the ancestors of heads that

1450

are not ancestors of common.

1464

are not ancestors of common.

1451

1465

1452

More specifically, return a list of revision numbers corresponding to

1466

More specifically, return a list of revision numbers corresponding to

1453

nodes N such that every N satisfies the following constraints:

1467

nodes N such that every N satisfies the following constraints:

1454

1468

1455

1. N is an ancestor of some node in 'heads'

1469

1. N is an ancestor of some node in 'heads'

1456

2. N is not an ancestor of any node in 'common'

1470

2. N is not an ancestor of any node in 'common'

1457

1471

1458

The list is sorted by revision number, meaning it is

1472

The list is sorted by revision number, meaning it is

1459

topologically sorted.

1473

topologically sorted.

1460

1474

1461

'heads' and 'common' are both lists of revision numbers. If heads is

1475

'heads' and 'common' are both lists of revision numbers. If heads is

1462

not supplied, uses all of the revlog's heads. If common is not

1476

not supplied, uses all of the revlog's heads. If common is not

1463

supplied, uses nullid."""

1477

supplied, uses nullid."""

1464

if common is None:

1478

if common is None:

1465

common = [nullrev]

1479

common = [nullrev]

1466

if heads is None:

1480

if heads is None:

1467

heads = self.headrevs()

1481

heads = self.headrevs()

1468

1482

1469

inc = self.incrementalmissingrevs(common=common)

1483

inc = self.incrementalmissingrevs(common=common)

1470

return inc.missingancestors(heads)

1484

return inc.missingancestors(heads)

1471

1485

1472

def findmissing(self, common=None, heads=None):

1486

def findmissing(self, common=None, heads=None):

1473

"""Return the ancestors of heads that are not ancestors of common.

1487

"""Return the ancestors of heads that are not ancestors of common.

1474

1488

1475

More specifically, return a list of nodes N such that every N

1489

More specifically, return a list of nodes N such that every N

1476

satisfies the following constraints:

1490

satisfies the following constraints:

1477

1491

1478

1. N is an ancestor of some node in 'heads'

1492

1. N is an ancestor of some node in 'heads'

1479

2. N is not an ancestor of any node in 'common'

1493

2. N is not an ancestor of any node in 'common'

1480

1494

1481

The list is sorted by revision number, meaning it is

1495

The list is sorted by revision number, meaning it is

1482

topologically sorted.

1496

topologically sorted.

1483

1497

1484

'heads' and 'common' are both lists of node IDs. If heads is

1498

'heads' and 'common' are both lists of node IDs. If heads is

1485

not supplied, uses all of the revlog's heads. If common is not

1499

not supplied, uses all of the revlog's heads. If common is not

1486

supplied, uses nullid."""

1500

supplied, uses nullid."""

1487

if common is None:

1501

if common is None:

1488

common = [nullid]

1502

common = [nullid]

1489

if heads is None:

1503

if heads is None:

1490

heads = self.heads()

1504

heads = self.heads()

1491

1505

1492

common = [self.rev(n) for n in common]

1506

common = [self.rev(n) for n in common]

1493

heads = [self.rev(n) for n in heads]

1507

heads = [self.rev(n) for n in heads]

1494

1508

1495

inc = self.incrementalmissingrevs(common=common)

1509

inc = self.incrementalmissingrevs(common=common)

1496

return [self.node(r) for r in inc.missingancestors(heads)]

1510

return [self.node(r) for r in inc.missingancestors(heads)]

1497

1511

1498

def nodesbetween(self, roots=None, heads=None):

1512

def nodesbetween(self, roots=None, heads=None):

1499

"""Return a topological path from 'roots' to 'heads'.

1513

"""Return a topological path from 'roots' to 'heads'.

1500

1514

1501

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1515

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1502

topologically sorted list of all nodes N that satisfy both of

1516

topologically sorted list of all nodes N that satisfy both of

1503

these constraints:

1517

these constraints:

1504

1518

1505

1. N is a descendant of some node in 'roots'

1519

1. N is a descendant of some node in 'roots'

1506

2. N is an ancestor of some node in 'heads'

1520

2. N is an ancestor of some node in 'heads'

1507

1521

1508

Every node is considered to be both a descendant and an ancestor

1522

Every node is considered to be both a descendant and an ancestor

1509

of itself, so every reachable node in 'roots' and 'heads' will be

1523

of itself, so every reachable node in 'roots' and 'heads' will be

1510

included in 'nodes'.

1524

included in 'nodes'.

1511

1525

1512

'outroots' is the list of reachable nodes in 'roots', i.e., the

1526

'outroots' is the list of reachable nodes in 'roots', i.e., the

1513

subset of 'roots' that is returned in 'nodes'. Likewise,

1527

subset of 'roots' that is returned in 'nodes'. Likewise,

1514

'outheads' is the subset of 'heads' that is also in 'nodes'.

1528

'outheads' is the subset of 'heads' that is also in 'nodes'.

1515

1529

1516

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1530

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1517

unspecified, uses nullid as the only root. If 'heads' is

1531

unspecified, uses nullid as the only root. If 'heads' is

1518

unspecified, uses list of all of the revlog's heads."""

1532

unspecified, uses list of all of the revlog's heads."""

1519

nonodes = ([], [], [])

1533

nonodes = ([], [], [])

1520

if roots is not None:

1534

if roots is not None:

1521

roots = list(roots)

1535

roots = list(roots)

1522

if not roots:

1536

if not roots:

1523

return nonodes

1537

return nonodes

1524

lowestrev = min([self.rev(n) for n in roots])

1538

lowestrev = min([self.rev(n) for n in roots])

1525

else:

1539

else:

1526

roots = [nullid] # Everybody's a descendant of nullid

1540

roots = [nullid] # Everybody's a descendant of nullid

1527

lowestrev = nullrev

1541

lowestrev = nullrev

1528

if (lowestrev == nullrev) and (heads is None):

1542

if (lowestrev == nullrev) and (heads is None):

1529

# We want _all_ the nodes!

1543

# We want _all_ the nodes!

1530

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1544

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1531

if heads is None:

1545

if heads is None:

1532

# All nodes are ancestors, so the latest ancestor is the last

1546

# All nodes are ancestors, so the latest ancestor is the last

1533

# node.

1547

# node.

1534

highestrev = len(self) - 1

1548

highestrev = len(self) - 1

1535

# Set ancestors to None to signal that every node is an ancestor.

1549

# Set ancestors to None to signal that every node is an ancestor.

1536

ancestors = None

1550

ancestors = None

1537

# Set heads to an empty dictionary for later discovery of heads

1551

# Set heads to an empty dictionary for later discovery of heads

1538

heads = {}

1552

heads = {}

1539

else:

1553

else:

1540

heads = list(heads)

1554

heads = list(heads)

1541

if not heads:

1555

if not heads:

1542

return nonodes

1556

return nonodes

1543

ancestors = set()

1557

ancestors = set()

1544

# Turn heads into a dictionary so we can remove 'fake' heads.

1558

# Turn heads into a dictionary so we can remove 'fake' heads.

1545

# Also, later we will be using it to filter out the heads we can't

1559

# Also, later we will be using it to filter out the heads we can't

1546

# find from roots.

1560

# find from roots.

1547

heads = dict.fromkeys(heads, False)

1561

heads = dict.fromkeys(heads, False)

1548

# Start at the top and keep marking parents until we're done.

1562

# Start at the top and keep marking parents until we're done.

1549

nodestotag = set(heads)

1563

nodestotag = set(heads)

1550

# Remember where the top was so we can use it as a limit later.

1564

# Remember where the top was so we can use it as a limit later.

1551

highestrev = max([self.rev(n) for n in nodestotag])

1565

highestrev = max([self.rev(n) for n in nodestotag])

1552

while nodestotag:

1566

while nodestotag:

1553

# grab a node to tag

1567

# grab a node to tag

1554

n = nodestotag.pop()

1568

n = nodestotag.pop()

1555

# Never tag nullid

1569

# Never tag nullid

1556

if n == nullid:

1570

if n == nullid:

1557

continue

1571

continue

1558

# A node's revision number represents its place in a

1572

# A node's revision number represents its place in a

1559

# topologically sorted list of nodes.

1573

# topologically sorted list of nodes.

1560

r = self.rev(n)

1574

r = self.rev(n)

1561

if r >= lowestrev:

1575

if r >= lowestrev:

1562

if n not in ancestors:

1576

if n not in ancestors:

1563

# If we are possibly a descendant of one of the roots

1577

# If we are possibly a descendant of one of the roots

1564

# and we haven't already been marked as an ancestor

1578

# and we haven't already been marked as an ancestor

1565

ancestors.add(n) # Mark as ancestor

1579

ancestors.add(n) # Mark as ancestor

1566

# Add non-nullid parents to list of nodes to tag.

1580

# Add non-nullid parents to list of nodes to tag.

1567

nodestotag.update([p for p in self.parents(n) if

1581

nodestotag.update([p for p in self.parents(n) if

1568

p != nullid])

1582

p != nullid])

1569

elif n in heads: # We've seen it before, is it a fake head?

1583

elif n in heads: # We've seen it before, is it a fake head?

1570

# So it is, real heads should not be the ancestors of

1584

# So it is, real heads should not be the ancestors of

1571

# any other heads.

1585

# any other heads.

1572

heads.pop(n)

1586

heads.pop(n)

1573

if not ancestors:

1587

if not ancestors:

1574

return nonodes

1588

return nonodes

1575

# Now that we have our set of ancestors, we want to remove any

1589

# Now that we have our set of ancestors, we want to remove any

1576

# roots that are not ancestors.

1590

# roots that are not ancestors.

1577

1591

1578

# If one of the roots was nullid, everything is included anyway.

1592

# If one of the roots was nullid, everything is included anyway.

1579

if lowestrev > nullrev:

1593

if lowestrev > nullrev:

1580

# But, since we weren't, let's recompute the lowest rev to not

1594

# But, since we weren't, let's recompute the lowest rev to not

1581

# include roots that aren't ancestors.

1595

# include roots that aren't ancestors.

1582

1596

1583

# Filter out roots that aren't ancestors of heads

1597

# Filter out roots that aren't ancestors of heads

1584

roots = [root for root in roots if root in ancestors]

1598

roots = [root for root in roots if root in ancestors]

1585

# Recompute the lowest revision

1599

# Recompute the lowest revision

1586

if roots:

1600

if roots:

1587

lowestrev = min([self.rev(root) for root in roots])

1601

lowestrev = min([self.rev(root) for root in roots])

1588

else:

1602

else:

1589

# No more roots? Return empty list

1603

# No more roots? Return empty list

1590

return nonodes

1604

return nonodes

1591

else:

1605

else:

1592

# We are descending from nullid, and don't need to care about

1606

# We are descending from nullid, and don't need to care about

1593

# any other roots.

1607

# any other roots.

1594

lowestrev = nullrev

1608

lowestrev = nullrev

1595

roots = [nullid]

1609

roots = [nullid]

1596

# Transform our roots list into a set.

1610

# Transform our roots list into a set.

1597

descendants = set(roots)

1611

descendants = set(roots)

1598

# Also, keep the original roots so we can filter out roots that aren't

1612

# Also, keep the original roots so we can filter out roots that aren't

1599

# 'real' roots (i.e. are descended from other roots).

1613

# 'real' roots (i.e. are descended from other roots).

1600

roots = descendants.copy()

1614

roots = descendants.copy()

1601

# Our topologically sorted list of output nodes.

1615

# Our topologically sorted list of output nodes.

1602

orderedout = []

1616

orderedout = []

1603

# Don't start at nullid since we don't want nullid in our output list,

1617

# Don't start at nullid since we don't want nullid in our output list,

1604

# and if nullid shows up in descendants, empty parents will look like

1618

# and if nullid shows up in descendants, empty parents will look like

1605

# they're descendants.

1619

# they're descendants.

1606

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1620

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1607

n = self.node(r)

1621

n = self.node(r)

1608

isdescendant = False

1622

isdescendant = False

1609

if lowestrev == nullrev: # Everybody is a descendant of nullid

1623

if lowestrev == nullrev: # Everybody is a descendant of nullid

1610

isdescendant = True

1624

isdescendant = True

1611

elif n in descendants:

1625

elif n in descendants:

1612

# n is already a descendant

1626

# n is already a descendant

1613

isdescendant = True

1627

isdescendant = True

1614

# This check only needs to be done here because all the roots

1628

# This check only needs to be done here because all the roots

1615

# will start being marked is descendants before the loop.

1629

# will start being marked is descendants before the loop.

1616

if n in roots:

1630

if n in roots:

1617

# If n was a root, check if it's a 'real' root.

1631

# If n was a root, check if it's a 'real' root.

1618

p = tuple(self.parents(n))

1632

p = tuple(self.parents(n))

1619

# If any of its parents are descendants, it's not a root.

1633

# If any of its parents are descendants, it's not a root.

1620

if (p[0] in descendants) or (p[1] in descendants):

1634

if (p[0] in descendants) or (p[1] in descendants):

1621

roots.remove(n)

1635

roots.remove(n)

1622

else:

1636

else:

1623

p = tuple(self.parents(n))

1637

p = tuple(self.parents(n))

1624

# A node is a descendant if either of its parents are

1638

# A node is a descendant if either of its parents are

1625

# descendants. (We seeded the dependents list with the roots

1639

# descendants. (We seeded the dependents list with the roots

1626

# up there, remember?)

1640

# up there, remember?)

1627

if (p[0] in descendants) or (p[1] in descendants):

1641

if (p[0] in descendants) or (p[1] in descendants):

1628

descendants.add(n)

1642

descendants.add(n)

1629

isdescendant = True

1643

isdescendant = True

1630

if isdescendant and ((ancestors is None) or (n in ancestors)):

1644

if isdescendant and ((ancestors is None) or (n in ancestors)):

1631

# Only include nodes that are both descendants and ancestors.

1645

# Only include nodes that are both descendants and ancestors.

1632

orderedout.append(n)

1646

orderedout.append(n)

1633

if (ancestors is not None) and (n in heads):

1647

if (ancestors is not None) and (n in heads):

1634

# We're trying to figure out which heads are reachable

1648

# We're trying to figure out which heads are reachable

1635

# from roots.

1649

# from roots.

1636

# Mark this head as having been reached

1650

# Mark this head as having been reached

1637

heads[n] = True

1651

heads[n] = True

1638

elif ancestors is None:

1652

elif ancestors is None:

1639

# Otherwise, we're trying to discover the heads.

1653

# Otherwise, we're trying to discover the heads.

1640

# Assume this is a head because if it isn't, the next step

1654

# Assume this is a head because if it isn't, the next step

1641

# will eventually remove it.

1655

# will eventually remove it.

1642

heads[n] = True

1656

heads[n] = True

1643

# But, obviously its parents aren't.

1657

# But, obviously its parents aren't.

1644

for p in self.parents(n):

1658

for p in self.parents(n):

1645

heads.pop(p, None)

1659

heads.pop(p, None)

1646

heads = [head for head, flag in heads.iteritems() if flag]

1660

heads = [head for head, flag in heads.iteritems() if flag]

1647

roots = list(roots)

1661

roots = list(roots)

1648

assert orderedout

1662

assert orderedout

1649

assert roots

1663

assert roots

1650

assert heads

1664

assert heads

1651

return (orderedout, roots, heads)

1665

return (orderedout, roots, heads)

1652

1666

1653

def headrevs(self):

1667

def headrevs(self):

1654

try:

1668

try:

1655

return self.index.headrevs()

1669

return self.index.headrevs()

1656

except AttributeError:

1670

except AttributeError:

1657

return self._headrevs()

1671

return self._headrevs()

1658

1672

1659

def computephases(self, roots):

1673

def computephases(self, roots):

1660

return self.index.computephasesmapsets(roots)

1674

return self.index.computephasesmapsets(roots)

1661

1675

1662

def _headrevs(self):

1676

def _headrevs(self):

1663

count = len(self)

1677

count = len(self)

1664

if not count:

1678

if not count:

1665

return [nullrev]

1679

return [nullrev]

1666

# we won't iter over filtered rev so nobody is a head at start

1680

# we won't iter over filtered rev so nobody is a head at start

1667

ishead = [0] * (count + 1)

1681

ishead = [0] * (count + 1)

1668

index = self.index

1682

index = self.index

1669

for r in self:

1683

for r in self:

1670

ishead[r] = 1 # I may be an head

1684

ishead[r] = 1 # I may be an head

1671

e = index[r]

1685

e = index[r]

1672

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1686

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1673

return [r for r, val in enumerate(ishead) if val]

1687

return [r for r, val in enumerate(ishead) if val]

1674

1688

1675

def heads(self, start=None, stop=None):

1689

def heads(self, start=None, stop=None):

1676

"""return the list of all nodes that have no children

1690

"""return the list of all nodes that have no children

1677

1691

1678

if start is specified, only heads that are descendants of

1692

if start is specified, only heads that are descendants of

1679

start will be returned

1693

start will be returned

1680

if stop is specified, it will consider all the revs from stop

1694

if stop is specified, it will consider all the revs from stop

1681

as if they had no children

1695

as if they had no children

1682

"""

1696

"""

1683

if start is None and stop is None:

1697

if start is None and stop is None:

1684

if not len(self):

1698

if not len(self):

1685

return [nullid]

1699

return [nullid]

1686

return [self.node(r) for r in self.headrevs()]

1700

return [self.node(r) for r in self.headrevs()]

1687

1701

1688

if start is None:

1702

if start is None:

1689

start = nullid

1703

start = nullid

1690

if stop is None:

1704

if stop is None:

1691

stop = []

1705

stop = []

1692

stoprevs = set([self.rev(n) for n in stop])

1706

stoprevs = set([self.rev(n) for n in stop])

1693

startrev = self.rev(start)

1707

startrev = self.rev(start)

1694

reachable = {startrev}

1708

reachable = {startrev}

1695

heads = {startrev}

1709

heads = {startrev}

1696

1710

1697

parentrevs = self.parentrevs

1711

parentrevs = self.parentrevs

1698

for r in self.revs(start=startrev + 1):

1712

for r in self.revs(start=startrev + 1):

1699

for p in parentrevs(r):

1713

for p in parentrevs(r):

1700

if p in reachable:

1714

if p in reachable:

1701

if r not in stoprevs:

1715

if r not in stoprevs:

1702

reachable.add(r)

1716

reachable.add(r)

1703

heads.add(r)

1717

heads.add(r)

1704

if p in heads and p not in stoprevs:

1718

if p in heads and p not in stoprevs:

1705

heads.remove(p)

1719

heads.remove(p)

1706

1720

1707

return [self.node(r) for r in heads]

1721

return [self.node(r) for r in heads]

1708

1722

1709

def children(self, node):

1723

def children(self, node):

1710

"""find the children of a given node"""

1724

"""find the children of a given node"""

1711

c = []

1725

c = []

1712

p = self.rev(node)

1726

p = self.rev(node)

1713

for r in self.revs(start=p + 1):

1727

for r in self.revs(start=p + 1):

1714

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1728

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1715

if prevs:

1729

if prevs:

1716

for pr in prevs:

1730

for pr in prevs:

1717

if pr == p:

1731

if pr == p:

1718

c.append(self.node(r))

1732

c.append(self.node(r))

1719

elif p == nullrev:

1733

elif p == nullrev:

1720

c.append(self.node(r))

1734

c.append(self.node(r))

1721

return c

1735

return c

1722

1736

1723

def commonancestorsheads(self, a, b):

1737

def commonancestorsheads(self, a, b):

1724

"""calculate all the heads of the common ancestors of nodes a and b"""

1738

"""calculate all the heads of the common ancestors of nodes a and b"""

1725

a, b = self.rev(a), self.rev(b)

1739

a, b = self.rev(a), self.rev(b)

1726

ancs = self._commonancestorsheads(a, b)

1740

ancs = self._commonancestorsheads(a, b)

1727

return pycompat.maplist(self.node, ancs)

1741

return pycompat.maplist(self.node, ancs)

1728

1742

1729

def _commonancestorsheads(self, *revs):

1743

def _commonancestorsheads(self, *revs):

1730

"""calculate all the heads of the common ancestors of revs"""

1744

"""calculate all the heads of the common ancestors of revs"""

1731

try:

1745

try:

1732

ancs = self.index.commonancestorsheads(*revs)

1746

ancs = self.index.commonancestorsheads(*revs)

1733

except (AttributeError, OverflowError): # C implementation failed

1747

except (AttributeError, OverflowError): # C implementation failed

1734

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1748

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1735

return ancs

1749

return ancs

1736

1750

1737

def isancestor(self, a, b):

1751

def isancestor(self, a, b):

1738

"""return True if node a is an ancestor of node b

1752

"""return True if node a is an ancestor of node b

1739

1753

1740

A revision is considered an ancestor of itself."""

1754

A revision is considered an ancestor of itself."""

1741

a, b = self.rev(a), self.rev(b)

1755

a, b = self.rev(a), self.rev(b)

1742

return self.isancestorrev(a, b)

1756

return self.isancestorrev(a, b)

1743

1757

1744

def isancestorrev(self, a, b):

1758

def isancestorrev(self, a, b):

1745

"""return True if revision a is an ancestor of revision b

1759

"""return True if revision a is an ancestor of revision b

1746

1760

1747

A revision is considered an ancestor of itself.

1761

A revision is considered an ancestor of itself.

1748

1762

1749

The implementation of this is trivial but the use of

1763

The implementation of this is trivial but the use of

1750

commonancestorsheads is not."""

1764

commonancestorsheads is not."""

1751

if a == nullrev:

1765

if a == nullrev:

1752

return True

1766

return True

1753

elif a == b:

1767

elif a == b:

1754

return True

1768

return True

1755

elif a > b:

1769

elif a > b:

1756

return False

1770

return False

1757

return a in self._commonancestorsheads(a, b)

1771

return a in self._commonancestorsheads(a, b)

1758

1772

1759

def ancestor(self, a, b):

1773

def ancestor(self, a, b):

1760

"""calculate the "best" common ancestor of nodes a and b"""

1774

"""calculate the "best" common ancestor of nodes a and b"""

1761

1775

1762

a, b = self.rev(a), self.rev(b)

1776

a, b = self.rev(a), self.rev(b)

1763

try:

1777

try:

1764

ancs = self.index.ancestors(a, b)

1778

ancs = self.index.ancestors(a, b)

1765

except (AttributeError, OverflowError):

1779

except (AttributeError, OverflowError):

1766

ancs = ancestor.ancestors(self.parentrevs, a, b)

1780

ancs = ancestor.ancestors(self.parentrevs, a, b)

1767

if ancs:

1781

if ancs:

1768

# choose a consistent winner when there's a tie

1782

# choose a consistent winner when there's a tie

1769

return min(map(self.node, ancs))

1783

return min(map(self.node, ancs))

1770

return nullid

1784

return nullid

1771

1785

1772

def _match(self, id):

1786

def _match(self, id):

1773

if isinstance(id, int):

1787

if isinstance(id, int):

1774

# rev

1788

# rev

1775

return self.node(id)

1789

return self.node(id)

1776

if len(id) == 20:

1790

if len(id) == 20:

1777

# possibly a binary node

1791

# possibly a binary node

1778

# odds of a binary node being all hex in ASCII are 1 in 10**25

1792

# odds of a binary node being all hex in ASCII are 1 in 10**25

1779

try:

1793

try:

1780

node = id

1794

node = id

1781

self.rev(node) # quick search the index

1795

self.rev(node) # quick search the index

1782

return node

1796

return node

1783

except LookupError:

1797

except LookupError:

1784

pass # may be partial hex id

1798

pass # may be partial hex id

1785

try:

1799

try:

1786

# str(rev)

1800

# str(rev)

1787

rev = int(id)

1801

rev = int(id)

1788

if "%d" % rev != id:

1802

if "%d" % rev != id:

1789

raise ValueError

1803

raise ValueError

1790

if rev < 0:

1804

if rev < 0:

1791

rev = len(self) + rev

1805

rev = len(self) + rev

1792

if rev < 0 or rev >= len(self):

1806

if rev < 0 or rev >= len(self):

1793

raise ValueError

1807

raise ValueError

1794

return self.node(rev)

1808

return self.node(rev)

1795

except (ValueError, OverflowError):

1809

except (ValueError, OverflowError):

1796

pass

1810

pass

1797

if len(id) == 40:

1811

if len(id) == 40:

1798

try:

1812

try:

1799

# a full hex nodeid?

1813

# a full hex nodeid?

1800

node = bin(id)

1814

node = bin(id)

1801

self.rev(node)

1815

self.rev(node)

1802

return node

1816

return node

1803

except (TypeError, LookupError):

1817

except (TypeError, LookupError):

1804

pass

1818

pass

1805

1819

1806

def _partialmatch(self, id):

1820

def _partialmatch(self, id):

1807

# we don't care wdirfilenodeids as they should be always full hash

1821

# we don't care wdirfilenodeids as they should be always full hash

1808

maybewdir = wdirhex.startswith(id)

1822

maybewdir = wdirhex.startswith(id)

1809

try:

1823

try:

1810

partial = self.index.partialmatch(id)

1824

partial = self.index.partialmatch(id)

1811

if partial and self.hasnode(partial):

1825

if partial and self.hasnode(partial):

1812

if maybewdir:

1826

if maybewdir:

1813

# single 'ff...' match in radix tree, ambiguous with wdir

1827

# single 'ff...' match in radix tree, ambiguous with wdir

1814

raise RevlogError

1828

raise RevlogError

1815

return partial

1829

return partial

1816

if maybewdir:

1830

if maybewdir:

1817

# no 'ff...' match in radix tree, wdir identified

1831

# no 'ff...' match in radix tree, wdir identified

1818

raise error.WdirUnsupported

1832

raise error.WdirUnsupported

1819

return None

1833

return None

1820

except RevlogError:

1834

except RevlogError:

1821

# parsers.c radix tree lookup gave multiple matches

1835

# parsers.c radix tree lookup gave multiple matches

1822

# fast path: for unfiltered changelog, radix tree is accurate

1836

# fast path: for unfiltered changelog, radix tree is accurate

1823

if not getattr(self, 'filteredrevs', None):

1837

if not getattr(self, 'filteredrevs', None):

1824

raise AmbiguousPrefixLookupError(id, self.indexfile,

1838

raise AmbiguousPrefixLookupError(id, self.indexfile,

1825

_('ambiguous identifier'))

1839

_('ambiguous identifier'))

1826

# fall through to slow path that filters hidden revisions

1840

# fall through to slow path that filters hidden revisions

1827

except (AttributeError, ValueError):

1841

except (AttributeError, ValueError):

1828

# we are pure python, or key was too short to search radix tree

1842

# we are pure python, or key was too short to search radix tree

1829

pass

1843

pass

1830

1844

1831

if id in self._pcache:

1845

if id in self._pcache:

1832

return self._pcache[id]

1846

return self._pcache[id]

1833

1847

1834

if len(id) <= 40:

1848

if len(id) <= 40:

1835

try:

1849

try:

1836

# hex(node)[:...]

1850

# hex(node)[:...]

1837

l = len(id) // 2 # grab an even number of digits

1851

l = len(id) // 2 # grab an even number of digits

1838

prefix = bin(id[:l * 2])

1852

prefix = bin(id[:l * 2])

1839

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1853

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1840

nl = [n for n in nl if hex(n).startswith(id) and

1854

nl = [n for n in nl if hex(n).startswith(id) and

1841

self.hasnode(n)]

1855

self.hasnode(n)]

1842

if len(nl) > 0:

1856

if len(nl) > 0:

1843

if len(nl) == 1 and not maybewdir:

1857

if len(nl) == 1 and not maybewdir:

1844

self._pcache[id] = nl[0]

1858

self._pcache[id] = nl[0]

1845

return nl[0]

1859

return nl[0]

1846

raise AmbiguousPrefixLookupError(id, self.indexfile,

1860

raise AmbiguousPrefixLookupError(id, self.indexfile,

1847

_('ambiguous identifier'))

1861

_('ambiguous identifier'))

1848

if maybewdir:

1862

if maybewdir:

1849

raise error.WdirUnsupported

1863

raise error.WdirUnsupported

1850

return None

1864

return None

1851

except TypeError:

1865

except TypeError:

1852

pass

1866

pass

1853

1867

1854

def lookup(self, id):

1868

def lookup(self, id):

1855

"""locate a node based on:

1869

"""locate a node based on:

1856

- revision number or str(revision number)

1870

- revision number or str(revision number)

1857

- nodeid or subset of hex nodeid

1871

- nodeid or subset of hex nodeid

1858

"""

1872

"""

1859

n = self._match(id)

1873

n = self._match(id)

1860

if n is not None:

1874

if n is not None:

1861

return n

1875

return n

1862

n = self._partialmatch(id)

1876

n = self._partialmatch(id)

1863

if n:

1877

if n:

1864

return n

1878

return n

1865

1879

1866

raise LookupError(id, self.indexfile, _('no match found'))

1880

raise LookupError(id, self.indexfile, _('no match found'))

1867

1881

1868

def shortest(self, node, minlength=1):

1882

def shortest(self, node, minlength=1):

1869

"""Find the shortest unambiguous prefix that matches node."""

1883

"""Find the shortest unambiguous prefix that matches node."""

1870

def isvalid(prefix):

1884

def isvalid(prefix):

1871

try:

1885

try:

1872

node = self._partialmatch(prefix)

1886

node = self._partialmatch(prefix)

1873

except error.RevlogError:

1887

except error.RevlogError:

1874

return False

1888

return False

1875

except error.WdirUnsupported:

1889

except error.WdirUnsupported:

1876

# single 'ff...' match

1890

# single 'ff...' match

1877

return True

1891

return True

1878

if node is None:

1892

if node is None:

1879

raise LookupError(node, self.indexfile, _('no node'))

1893

raise LookupError(node, self.indexfile, _('no node'))

1880

return True

1894

return True

1881

1895

1882

def maybewdir(prefix):

1896

def maybewdir(prefix):

1883

return all(c == 'f' for c in prefix)

1897

return all(c == 'f' for c in prefix)

1884

1898

1885

hexnode = hex(node)

1899

hexnode = hex(node)

1886

1900

1887

def disambiguate(hexnode, minlength):

1901

def disambiguate(hexnode, minlength):

1888

"""Disambiguate against wdirid."""

1902

"""Disambiguate against wdirid."""

1889

for length in range(minlength, 41):

1903

for length in range(minlength, 41):

1890

prefix = hexnode[:length]

1904

prefix = hexnode[:length]

1891

if not maybewdir(prefix):

1905

if not maybewdir(prefix):

1892

return prefix

1906

return prefix

1893

1907

1894

if not getattr(self, 'filteredrevs', None):

1908

if not getattr(self, 'filteredrevs', None):

1895

try:

1909

try:

1896

length = max(self.index.shortest(node), minlength)

1910

length = max(self.index.shortest(node), minlength)

1897

return disambiguate(hexnode, length)

1911

return disambiguate(hexnode, length)

1898

except RevlogError:

1912

except RevlogError:

1899

if node != wdirid:

1913

if node != wdirid:

1900

raise LookupError(node, self.indexfile, _('no node'))

1914

raise LookupError(node, self.indexfile, _('no node'))

1901

except AttributeError:

1915

except AttributeError:

1902

# Fall through to pure code

1916

# Fall through to pure code

1903

pass

1917

pass

1904

1918

1905

if node == wdirid:

1919

if node == wdirid:

1906

for length in range(minlength, 41):

1920

for length in range(minlength, 41):

1907

prefix = hexnode[:length]

1921

prefix = hexnode[:length]

1908

if isvalid(prefix):

1922

if isvalid(prefix):

1909

return prefix

1923

return prefix

1910

1924

1911

for length in range(minlength, 41):

1925

for length in range(minlength, 41):

1912

prefix = hexnode[:length]

1926

prefix = hexnode[:length]

1913

if isvalid(prefix):

1927

if isvalid(prefix):

1914

return disambiguate(hexnode, length)

1928

return disambiguate(hexnode, length)

1915

1929

1916

def cmp(self, node, text):

1930

def cmp(self, node, text):

1917

"""compare text with a given file revision

1931

"""compare text with a given file revision

1918

1932

1919

returns True if text is different than what is stored.

1933

returns True if text is different than what is stored.

1920

"""

1934

"""

1921

p1, p2 = self.parents(node)

1935

p1, p2 = self.parents(node)

1922

return hash(text, p1, p2) != node

1936

return hash(text, p1, p2) != node

1923

1937

1924

def _cachesegment(self, offset, data):

1938

def _cachesegment(self, offset, data):

1925

"""Add a segment to the revlog cache.

1939

"""Add a segment to the revlog cache.

1926

1940

1927

Accepts an absolute offset and the data that is at that location.

1941

Accepts an absolute offset and the data that is at that location.

1928

"""

1942

"""

1929

o, d = self._chunkcache

1943

o, d = self._chunkcache

1930

# try to add to existing cache

1944

# try to add to existing cache

1931

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1945

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1932

self._chunkcache = o, d + data

1946

self._chunkcache = o, d + data

1933

else:

1947

else:

1934

self._chunkcache = offset, data

1948

self._chunkcache = offset, data

1935

1949

1936

def _readsegment(self, offset, length, df=None):

1950

def _readsegment(self, offset, length, df=None):

1937

"""Load a segment of raw data from the revlog.

1951

"""Load a segment of raw data from the revlog.

1938

1952

1939

Accepts an absolute offset, length to read, and an optional existing

1953

Accepts an absolute offset, length to read, and an optional existing

1940

file handle to read from.

1954

file handle to read from.

1941

1955

1942

If an existing file handle is passed, it will be seeked and the

1956

If an existing file handle is passed, it will be seeked and the

1943

original seek position will NOT be restored.

1957

original seek position will NOT be restored.

1944

1958

1945

Returns a str or buffer of raw byte data.

1959

Returns a str or buffer of raw byte data.

1946

"""

1960

"""

1947

# Cache data both forward and backward around the requested

1961

# Cache data both forward and backward around the requested

1948

# data, in a fixed size window. This helps speed up operations

1962

# data, in a fixed size window. This helps speed up operations

1949

# involving reading the revlog backwards.

1963

# involving reading the revlog backwards.

1950

cachesize = self._chunkcachesize

1964

cachesize = self._chunkcachesize

1951

realoffset = offset & ~(cachesize - 1)

1965

realoffset = offset & ~(cachesize - 1)

1952

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1966

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1953

- realoffset)

1967

- realoffset)

1954

with self._datareadfp(df) as df:

1968

with self._datareadfp(df) as df:

1955

df.seek(realoffset)

1969

df.seek(realoffset)

1956

d = df.read(reallength)

1970

d = df.read(reallength)

1957

self._cachesegment(realoffset, d)

1971

self._cachesegment(realoffset, d)

1958

if offset != realoffset or reallength != length:

1972

if offset != realoffset or reallength != length:

1959

return util.buffer(d, offset - realoffset, length)

1973

return util.buffer(d, offset - realoffset, length)

1960

return d

1974

return d

1961

1975

1962

def _getsegment(self, offset, length, df=None):

1976

def _getsegment(self, offset, length, df=None):

1963

"""Obtain a segment of raw data from the revlog.

1977

"""Obtain a segment of raw data from the revlog.

1964

1978

1965

Accepts an absolute offset, length of bytes to obtain, and an

1979

Accepts an absolute offset, length of bytes to obtain, and an

1966

optional file handle to the already-opened revlog. If the file

1980

optional file handle to the already-opened revlog. If the file

1967

handle is used, it's original seek position will not be preserved.

1981

handle is used, it's original seek position will not be preserved.

1968

1982

1969

Requests for data may be returned from a cache.

1983

Requests for data may be returned from a cache.

1970

1984

1971

Returns a str or a buffer instance of raw byte data.

1985

Returns a str or a buffer instance of raw byte data.

1972

"""

1986

"""

1973

o, d = self._chunkcache

1987

o, d = self._chunkcache

1974

l = len(d)

1988

l = len(d)

1975

1989

1976

# is it in the cache?

1990

# is it in the cache?

1977

cachestart = offset - o

1991

cachestart = offset - o

1978

cacheend = cachestart + length

1992

cacheend = cachestart + length

1979

if cachestart >= 0 and cacheend <= l:

1993

if cachestart >= 0 and cacheend <= l:

1980

if cachestart == 0 and cacheend == l:

1994

if cachestart == 0 and cacheend == l:

1981

return d # avoid a copy

1995

return d # avoid a copy

1982

return util.buffer(d, cachestart, cacheend - cachestart)

1996

return util.buffer(d, cachestart, cacheend - cachestart)

1983

1997

1984

return self._readsegment(offset, length, df=df)

1998

return self._readsegment(offset, length, df=df)

1985

1999

1986

def _getsegmentforrevs(self, startrev, endrev, df=None):

2000

def _getsegmentforrevs(self, startrev, endrev, df=None):

1987

"""Obtain a segment of raw data corresponding to a range of revisions.

2001

"""Obtain a segment of raw data corresponding to a range of revisions.

1988

2002

1989

Accepts the start and end revisions and an optional already-open

2003

Accepts the start and end revisions and an optional already-open

1990

file handle to be used for reading. If the file handle is read, its

2004

file handle to be used for reading. If the file handle is read, its

1991

seek position will not be preserved.

2005

seek position will not be preserved.

1992

2006

1993

Requests for data may be satisfied by a cache.

2007

Requests for data may be satisfied by a cache.

1994

2008

1995

Returns a 2-tuple of (offset, data) for the requested range of

2009

Returns a 2-tuple of (offset, data) for the requested range of

1996

revisions. Offset is the integer offset from the beginning of the

2010

revisions. Offset is the integer offset from the beginning of the

1997

revlog and data is a str or buffer of the raw byte data.

2011

revlog and data is a str or buffer of the raw byte data.

1998

2012

1999

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2013

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2000

to determine where each revision's data begins and ends.

2014

to determine where each revision's data begins and ends.

2001

"""

2015

"""

2002

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2016

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2003

# (functions are expensive).

2017

# (functions are expensive).

2004

index = self.index

2018

index = self.index

2005

istart = index[startrev]

2019

istart = index[startrev]

2006

start = int(istart[0] >> 16)

2020

start = int(istart[0] >> 16)

2007

if startrev == endrev:

2021

if startrev == endrev:

2008

end = start + istart[1]

2022

end = start + istart[1]

2009

else:

2023

else:

2010

iend = index[endrev]

2024

iend = index[endrev]

2011

end = int(iend[0] >> 16) + iend[1]

2025

end = int(iend[0] >> 16) + iend[1]

2012

2026

2013

if self._inline:

2027

if self._inline:

2014

start += (startrev + 1) * self._io.size

2028

start += (startrev + 1) * self._io.size

2015

end += (endrev + 1) * self._io.size

2029

end += (endrev + 1) * self._io.size

2016

length = end - start

2030

length = end - start

2017

2031

2018

return start, self._getsegment(start, length, df=df)

2032

return start, self._getsegment(start, length, df=df)

2019

2033

2020

def _chunk(self, rev, df=None):

2034

def _chunk(self, rev, df=None):

2021

"""Obtain a single decompressed chunk for a revision.

2035

"""Obtain a single decompressed chunk for a revision.

2022

2036

2023

Accepts an integer revision and an optional already-open file handle

2037

Accepts an integer revision and an optional already-open file handle

2024

to be used for reading. If used, the seek position of the file will not

2038

to be used for reading. If used, the seek position of the file will not

2025

be preserved.

2039

be preserved.

2026

2040

2027

Returns a str holding uncompressed data for the requested revision.

2041

Returns a str holding uncompressed data for the requested revision.

2028

"""

2042

"""

2029

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2043

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2030

2044

2031

def _chunks(self, revs, df=None, targetsize=None):

2045

def _chunks(self, revs, df=None, targetsize=None):

2032

"""Obtain decompressed chunks for the specified revisions.

2046

"""Obtain decompressed chunks for the specified revisions.

2033

2047

2034

Accepts an iterable of numeric revisions that are assumed to be in

2048

Accepts an iterable of numeric revisions that are assumed to be in

2035

ascending order. Also accepts an optional already-open file handle

2049

ascending order. Also accepts an optional already-open file handle

2036

to be used for reading. If used, the seek position of the file will

2050

to be used for reading. If used, the seek position of the file will

2037

not be preserved.

2051

not be preserved.

2038

2052

2039

This function is similar to calling ``self._chunk()`` multiple times,

2053

This function is similar to calling ``self._chunk()`` multiple times,

2040

but is faster.

2054

but is faster.

2041

2055

2042

Returns a list with decompressed data for each requested revision.

2056

Returns a list with decompressed data for each requested revision.

2043

"""

2057

"""

2044

if not revs:

2058

if not revs:

2045

return []

2059

return []

2046

start = self.start

2060

start = self.start

2047

length = self.length

2061

length = self.length

2048

inline = self._inline

2062

inline = self._inline

2049

iosize = self._io.size

2063

iosize = self._io.size

2050

buffer = util.buffer

2064

buffer = util.buffer

2051

2065

2052

l = []

2066

l = []

2053

ladd = l.append

2067

ladd = l.append

2054

2068

2055

if not self._withsparseread:

2069

if not self._withsparseread:

2056

slicedchunks = (revs,)

2070

slicedchunks = (revs,)

2057

else:

2071

else:

2058

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2072

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2059

2073

2060

for revschunk in slicedchunks:

2074

for revschunk in slicedchunks:

2061

firstrev = revschunk[0]

2075

firstrev = revschunk[0]

2062

# Skip trailing revisions with empty diff

2076

# Skip trailing revisions with empty diff

2063

for lastrev in revschunk[::-1]:

2077

for lastrev in revschunk[::-1]:

2064

if length(lastrev) != 0:

2078

if length(lastrev) != 0:

2065

break

2079

break

2066

2080

2067

try:

2081

try:

2068

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2082

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2069

except OverflowError:

2083

except OverflowError:

2070

# issue4215 - we can't cache a run of chunks greater than

2084

# issue4215 - we can't cache a run of chunks greater than

2071

# 2G on Windows

2085

# 2G on Windows

2072

return [self._chunk(rev, df=df) for rev in revschunk]

2086

return [self._chunk(rev, df=df) for rev in revschunk]

2073

2087

2074

decomp = self.decompress

2088

decomp = self.decompress

2075

for rev in revschunk:

2089

for rev in revschunk:

2076

chunkstart = start(rev)

2090

chunkstart = start(rev)

2077

if inline:

2091

if inline:

2078

chunkstart += (rev + 1) * iosize

2092

chunkstart += (rev + 1) * iosize

2079

chunklength = length(rev)

2093

chunklength = length(rev)

2080

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2094

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2081

2095

2082

return l

2096

return l

2083

2097

2084

def _chunkclear(self):

2098

def _chunkclear(self):

2085

"""Clear the raw chunk cache."""

2099

"""Clear the raw chunk cache."""

2086

self._chunkcache = (0, '')

2100

self._chunkcache = (0, '')

2087

2101

2088

def deltaparent(self, rev):

2102

def deltaparent(self, rev):

2089

"""return deltaparent of the given revision"""

2103

"""return deltaparent of the given revision"""

2090

base = self.index[rev][3]

2104

base = self.index[rev][3]

2091

if base == rev:

2105

if base == rev:

2092

return nullrev

2106

return nullrev

2093

elif self._generaldelta:

2107

elif self._generaldelta:

2094

return base

2108

return base

2095

else:

2109

else:

2096

return rev - 1

2110

return rev - 1

2097

2111

2098

def issnapshot(self, rev):

2112

def issnapshot(self, rev):

2099

"""tells whether rev is a snapshot

2113

"""tells whether rev is a snapshot

2100

"""

2114

"""

2101

if rev == nullrev:

2115

if rev == nullrev:

2102

return True

2116

return True

2103

deltap = self.deltaparent(rev)

2117

deltap = self.deltaparent(rev)

2104

if deltap == nullrev:

2118

if deltap == nullrev:

2105

return True

2119

return True

2106

p1, p2 = self.parentrevs(rev)

2120

p1, p2 = self.parentrevs(rev)

2107

if deltap in (p1, p2):

2121

if deltap in (p1, p2):

2108

return False

2122

return False

2109

return self.issnapshot(deltap)

2123

return self.issnapshot(deltap)

2110

2124

2111

def snapshotdepth(self, rev):

2125

def snapshotdepth(self, rev):

2112

"""number of snapshot in the chain before this one"""

2126

"""number of snapshot in the chain before this one"""

2113

if not self.issnapshot(rev):

2127

if not self.issnapshot(rev):

2114

raise ProgrammingError('revision %d not a snapshot')

2128

raise ProgrammingError('revision %d not a snapshot')

2115

return len(self._deltachain(rev)[0]) - 1

2129

return len(self._deltachain(rev)[0]) - 1

2116

2130

2117

def revdiff(self, rev1, rev2):

2131

def revdiff(self, rev1, rev2):

2118

"""return or calculate a delta between two revisions

2132

"""return or calculate a delta between two revisions

2119

2133

2120

The delta calculated is in binary form and is intended to be written to

2134

The delta calculated is in binary form and is intended to be written to

2121

revlog data directly. So this function needs raw revision data.

2135

revlog data directly. So this function needs raw revision data.

2122

"""

2136

"""

2123

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2137

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2124

return bytes(self._chunk(rev2))

2138

return bytes(self._chunk(rev2))

2125

2139

2126

return mdiff.textdiff(self.revision(rev1, raw=True),

2140

return mdiff.textdiff(self.revision(rev1, raw=True),

2127

self.revision(rev2, raw=True))

2141

self.revision(rev2, raw=True))

2128

2142

2129

def revision(self, nodeorrev, _df=None, raw=False):

2143

def revision(self, nodeorrev, _df=None, raw=False):

2130

"""return an uncompressed revision of a given node or revision

2144

"""return an uncompressed revision of a given node or revision

2131

number.

2145

number.

2132

2146

2133

_df - an existing file handle to read from. (internal-only)

2147

_df - an existing file handle to read from. (internal-only)

2134

raw - an optional argument specifying if the revision data is to be

2148

raw - an optional argument specifying if the revision data is to be

2135

treated as raw data when applying flag transforms. 'raw' should be set

2149

treated as raw data when applying flag transforms. 'raw' should be set

2136

to True when generating changegroups or in debug commands.

2150

to True when generating changegroups or in debug commands.

2137

"""

2151

"""

2138

if isinstance(nodeorrev, int):

2152

if isinstance(nodeorrev, int):

2139

rev = nodeorrev

2153

rev = nodeorrev

2140

node = self.node(rev)

2154

node = self.node(rev)

2141

else:

2155

else:

2142

node = nodeorrev

2156

node = nodeorrev

2143

rev = None

2157

rev = None

2144

2158

2145

cachedrev = None

2159

cachedrev = None

2146

flags = None

2160

flags = None

2147

rawtext = None

2161

rawtext = None

2148

if node == nullid:

2162

if node == nullid:

2149

return ""

2163

return ""

2150

if self._cache:

2164

if self._cache:

2151

if self._cache[0] == node:

2165

if self._cache[0] == node:

2152

# _cache only stores rawtext

2166

# _cache only stores rawtext

2153

if raw:

2167

if raw:

2154

return self._cache[2]

2168

return self._cache[2]

2155

# duplicated, but good for perf

2169

# duplicated, but good for perf

2156

if rev is None:

2170

if rev is None:

2157

rev = self.rev(node)

2171

rev = self.rev(node)

2158

if flags is None:

2172

if flags is None:

2159

flags = self.flags(rev)

2173

flags = self.flags(rev)

2160

# no extra flags set, no flag processor runs, text = rawtext

2174

# no extra flags set, no flag processor runs, text = rawtext

2161

if flags == REVIDX_DEFAULT_FLAGS:

2175

if flags == REVIDX_DEFAULT_FLAGS:

2162

return self._cache[2]

2176

return self._cache[2]

2163

# rawtext is reusable. need to run flag processor

2177

# rawtext is reusable. need to run flag processor

2164

rawtext = self._cache[2]

2178

rawtext = self._cache[2]

2165

2179

2166

cachedrev = self._cache[1]

2180

cachedrev = self._cache[1]

2167

2181

2168

# look up what we need to read

2182

# look up what we need to read

2169

if rawtext is None:

2183

if rawtext is None:

2170

if rev is None:

2184

if rev is None:

2171

rev = self.rev(node)

2185

rev = self.rev(node)

2172

2186

2173

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2187

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2174

if stopped:

2188

if stopped:

2175

rawtext = self._cache[2]

2189

rawtext = self._cache[2]

2176

2190

2177

# drop cache to save memory

2191

# drop cache to save memory

2178

self._cache = None

2192

self._cache = None

2179

2193

2180

targetsize = None

2194

targetsize = None

2181

rawsize = self.index[rev][2]

2195

rawsize = self.index[rev][2]

2182

if 0 <= rawsize:

2196

if 0 <= rawsize:

2183

targetsize = 4 * rawsize

2197

targetsize = 4 * rawsize

2184

2198

2185

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2199

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2186

if rawtext is None:

2200

if rawtext is None:

2187

rawtext = bytes(bins[0])

2201

rawtext = bytes(bins[0])

2188

bins = bins[1:]

2202

bins = bins[1:]

2189

2203

2190

rawtext = mdiff.patches(rawtext, bins)

2204

rawtext = mdiff.patches(rawtext, bins)

2191

self._cache = (node, rev, rawtext)

2205

self._cache = (node, rev, rawtext)

2192

2206

2193

if flags is None:

2207

if flags is None:

2194

if rev is None:

2208

if rev is None:

2195

rev = self.rev(node)

2209

rev = self.rev(node)

2196

flags = self.flags(rev)

2210

flags = self.flags(rev)

2197

2211

2198

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2212

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2199

if validatehash:

2213

if validatehash:

2200

self.checkhash(text, node, rev=rev)

2214

self.checkhash(text, node, rev=rev)

2201

2215

2202

return text

2216

return text

2203

2217

2204

def hash(self, text, p1, p2):

2218

def hash(self, text, p1, p2):

2205

"""Compute a node hash.

2219

"""Compute a node hash.

2206

2220

2207

Available as a function so that subclasses can replace the hash

2221

Available as a function so that subclasses can replace the hash

2208

as needed.

2222

as needed.

2209

"""

2223

"""

2210

return hash(text, p1, p2)

2224

return hash(text, p1, p2)

2211

2225

2212

def _processflags(self, text, flags, operation, raw=False):

2226

def _processflags(self, text, flags, operation, raw=False):

2213

"""Inspect revision data flags and applies transforms defined by

2227

"""Inspect revision data flags and applies transforms defined by

2214

registered flag processors.

2228

registered flag processors.

2215

2229

2216

``text`` - the revision data to process

2230

``text`` - the revision data to process

2217

``flags`` - the revision flags

2231

``flags`` - the revision flags

2218

``operation`` - the operation being performed (read or write)

2232

``operation`` - the operation being performed (read or write)

2219

``raw`` - an optional argument describing if the raw transform should be

2233

``raw`` - an optional argument describing if the raw transform should be

2220

applied.

2234

applied.

2221

2235

2222

This method processes the flags in the order (or reverse order if

2236

This method processes the flags in the order (or reverse order if

2223

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2237

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2224

flag processors registered for present flags. The order of flags defined

2238

flag processors registered for present flags. The order of flags defined

2225

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2239

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2226

2240

2227

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2241

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2228

processed text and ``validatehash`` is a bool indicating whether the

2242

processed text and ``validatehash`` is a bool indicating whether the

2229

returned text should be checked for hash integrity.

2243

returned text should be checked for hash integrity.

2230

2244

2231

Note: If the ``raw`` argument is set, it has precedence over the

2245

Note: If the ``raw`` argument is set, it has precedence over the

2232

operation and will only update the value of ``validatehash``.

2246

operation and will only update the value of ``validatehash``.

2233

"""

2247

"""

2234

# fast path: no flag processors will run

2248

# fast path: no flag processors will run

2235

if flags == 0:

2249

if flags == 0:

2236

return text, True

2250

return text, True

2237

if not operation in ('read', 'write'):

2251

if not operation in ('read', 'write'):

2238

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2252

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2239

# Check all flags are known.

2253

# Check all flags are known.

2240

if flags & ~REVIDX_KNOWN_FLAGS:

2254

if flags & ~REVIDX_KNOWN_FLAGS:

2241

raise RevlogError(_("incompatible revision flag '%#x'") %

2255

raise RevlogError(_("incompatible revision flag '%#x'") %

2242

(flags & ~REVIDX_KNOWN_FLAGS))

2256

(flags & ~REVIDX_KNOWN_FLAGS))

2243

validatehash = True

2257

validatehash = True

2244

# Depending on the operation (read or write), the order might be

2258

# Depending on the operation (read or write), the order might be

2245

# reversed due to non-commutative transforms.

2259

# reversed due to non-commutative transforms.

2246

orderedflags = REVIDX_FLAGS_ORDER

2260

orderedflags = REVIDX_FLAGS_ORDER

2247

if operation == 'write':

2261

if operation == 'write':

2248

orderedflags = reversed(orderedflags)

2262

orderedflags = reversed(orderedflags)

2249

2263

2250

for flag in orderedflags:

2264

for flag in orderedflags:

2251

# If a flagprocessor has been registered for a known flag, apply the

2265

# If a flagprocessor has been registered for a known flag, apply the

2252

# related operation transform and update result tuple.

2266

# related operation transform and update result tuple.

2253

if flag & flags:

2267

if flag & flags:

2254

vhash = True

2268

vhash = True

2255

2269

2256

if flag not in _flagprocessors:

2270

if flag not in _flagprocessors:

2257

message = _("missing processor for flag '%#x'") % (flag)

2271

message = _("missing processor for flag '%#x'") % (flag)

2258

raise RevlogError(message)

2272

raise RevlogError(message)

2259

2273

2260

processor = _flagprocessors[flag]

2274

processor = _flagprocessors[flag]

2261

if processor is not None:

2275

if processor is not None:

2262

readtransform, writetransform, rawtransform = processor

2276

readtransform, writetransform, rawtransform = processor

2263

2277

2264

if raw:

2278

if raw:

2265

vhash = rawtransform(self, text)

2279

vhash = rawtransform(self, text)

2266

elif operation == 'read':

2280

elif operation == 'read':

2267

text, vhash = readtransform(self, text)

2281

text, vhash = readtransform(self, text)

2268

else: # write operation

2282

else: # write operation

2269

text, vhash = writetransform(self, text)

2283

text, vhash = writetransform(self, text)

2270

validatehash = validatehash and vhash

2284

validatehash = validatehash and vhash

2271

2285

2272

return text, validatehash

2286

return text, validatehash

2273

2287

2274

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2288

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2275

"""Check node hash integrity.

2289

"""Check node hash integrity.

2276

2290

2277

Available as a function so that subclasses can extend hash mismatch

2291

Available as a function so that subclasses can extend hash mismatch

2278

behaviors as needed.

2292

behaviors as needed.

2279

"""

2293

"""

2280

try:

2294

try:

2281

if p1 is None and p2 is None:

2295

if p1 is None and p2 is None:

2282

p1, p2 = self.parents(node)

2296

p1, p2 = self.parents(node)

2283

if node != self.hash(text, p1, p2):

2297

if node != self.hash(text, p1, p2):

2284

revornode = rev

2298

revornode = rev

2285

if revornode is None:

2299

if revornode is None:

2286

revornode = templatefilters.short(hex(node))

2300

revornode = templatefilters.short(hex(node))

2287

raise RevlogError(_("integrity check failed on %s:%s")

2301

raise RevlogError(_("integrity check failed on %s:%s")

2288

% (self.indexfile, pycompat.bytestr(revornode)))

2302

% (self.indexfile, pycompat.bytestr(revornode)))

2289

except RevlogError:

2303

except RevlogError:

2290

if self._censorable and _censoredtext(text):

2304

if self._censorable and _censoredtext(text):

2291

raise error.CensoredNodeError(self.indexfile, node, text)

2305

raise error.CensoredNodeError(self.indexfile, node, text)

2292

raise

2306

raise

2293

2307

2294

def _enforceinlinesize(self, tr, fp=None):

2308

def _enforceinlinesize(self, tr, fp=None):

2295

"""Check if the revlog is too big for inline and convert if so.

2309

"""Check if the revlog is too big for inline and convert if so.

2296

2310

2297

This should be called after revisions are added to the revlog. If the

2311

This should be called after revisions are added to the revlog. If the

2298

revlog has grown too large to be an inline revlog, it will convert it

2312

revlog has grown too large to be an inline revlog, it will convert it

2299

to use multiple index and data files.

2313

to use multiple index and data files.

2300

"""

2314

"""

2301

tiprev = len(self) - 1

2315

tiprev = len(self) - 1

2302

if (not self._inline or

2316

if (not self._inline or

2303

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2317

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2304

return

2318

return

2305

2319

2306

trinfo = tr.find(self.indexfile)

2320

trinfo = tr.find(self.indexfile)

2307

if trinfo is None:

2321

if trinfo is None:

2308

raise RevlogError(_("%s not found in the transaction")

2322

raise RevlogError(_("%s not found in the transaction")

2309

% self.indexfile)

2323

% self.indexfile)

2310

2324

2311

trindex = trinfo[2]

2325

trindex = trinfo[2]

2312

if trindex is not None:

2326

if trindex is not None:

2313

dataoff = self.start(trindex)

2327

dataoff = self.start(trindex)

2314

else:

2328

else:

2315

# revlog was stripped at start of transaction, use all leftover data

2329

# revlog was stripped at start of transaction, use all leftover data

2316

trindex = len(self) - 1

2330

trindex = len(self) - 1

2317

dataoff = self.end(tiprev)

2331

dataoff = self.end(tiprev)

2318

2332

2319

tr.add(self.datafile, dataoff)

2333

tr.add(self.datafile, dataoff)

2320

2334

2321

if fp:

2335

if fp:

2322

fp.flush()

2336

fp.flush()

2323

fp.close()

2337

fp.close()

2324

2338

2325

with self._datafp('w') as df:

2339

with self._datafp('w') as df:

2326

for r in self:

2340

for r in self:

2327

df.write(self._getsegmentforrevs(r, r)[1])

2341

df.write(self._getsegmentforrevs(r, r)[1])

2328

2342

2329

with self._indexfp('w') as fp:

2343

with self._indexfp('w') as fp:

2330

self.version &= ~FLAG_INLINE_DATA

2344

self.version &= ~FLAG_INLINE_DATA

2331

self._inline = False

2345

self._inline = False

2332

io = self._io

2346

io = self._io

2333

for i in self:

2347

for i in self:

2334

e = io.packentry(self.index[i], self.node, self.version, i)

2348

e = io.packentry(self.index[i], self.node, self.version, i)

2335

fp.write(e)

2349

fp.write(e)

2336

2350

2337

# the temp file replace the real index when we exit the context

2351

# the temp file replace the real index when we exit the context

2338

# manager

2352

# manager

2339

2353

2340

tr.replace(self.indexfile, trindex * self._io.size)

2354

tr.replace(self.indexfile, trindex * self._io.size)

2341

self._chunkclear()

2355

self._chunkclear()

2342

2356

2343

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2357

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2344

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2358

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2345

"""add a revision to the log

2359

"""add a revision to the log

2346

2360

2347

text - the revision data to add

2361

text - the revision data to add

2348

transaction - the transaction object used for rollback

2362

transaction - the transaction object used for rollback

2349

link - the linkrev data to add

2363

link - the linkrev data to add

2350

p1, p2 - the parent nodeids of the revision

2364

p1, p2 - the parent nodeids of the revision

2351

cachedelta - an optional precomputed delta

2365

cachedelta - an optional precomputed delta

2352

node - nodeid of revision; typically node is not specified, and it is

2366

node - nodeid of revision; typically node is not specified, and it is

2353

computed by default as hash(text, p1, p2), however subclasses might

2367

computed by default as hash(text, p1, p2), however subclasses might

2354

use different hashing method (and override checkhash() in such case)

2368

use different hashing method (and override checkhash() in such case)

2355

flags - the known flags to set on the revision

2369

flags - the known flags to set on the revision

2356

deltacomputer - an optional _deltacomputer instance shared between

2370

deltacomputer - an optional _deltacomputer instance shared between

2357

multiple calls

2371

multiple calls

2358

"""

2372

"""

2359

if link == nullrev:

2373

if link == nullrev:

2360

raise RevlogError(_("attempted to add linkrev -1 to %s")

2374

raise RevlogError(_("attempted to add linkrev -1 to %s")

2361

% self.indexfile)

2375

% self.indexfile)

2362

2376

2363

if flags:

2377

if flags:

2364

node = node or self.hash(text, p1, p2)

2378

node = node or self.hash(text, p1, p2)

2365

2379

2366

rawtext, validatehash = self._processflags(text, flags, 'write')

2380

rawtext, validatehash = self._processflags(text, flags, 'write')

2367

2381

2368

# If the flag processor modifies the revision data, ignore any provided

2382

# If the flag processor modifies the revision data, ignore any provided

2369

# cachedelta.

2383

# cachedelta.

2370

if rawtext != text:

2384

if rawtext != text:

2371

cachedelta = None

2385

cachedelta = None

2372

2386

2373

if len(rawtext) > _maxentrysize:

2387

if len(rawtext) > _maxentrysize:

2374

raise RevlogError(

2388

raise RevlogError(

2375

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2389

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2376

% (self.indexfile, len(rawtext)))

2390

% (self.indexfile, len(rawtext)))

2377

2391

2378

node = node or self.hash(rawtext, p1, p2)

2392

node = node or self.hash(rawtext, p1, p2)

2379

if node in self.nodemap:

2393

if node in self.nodemap:

2380

return node

2394

return node

2381

2395

2382

if validatehash:

2396

if validatehash:

2383

self.checkhash(rawtext, node, p1=p1, p2=p2)

2397

self.checkhash(rawtext, node, p1=p1, p2=p2)

2384

2398

2385

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2399

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2386

flags, cachedelta=cachedelta,

2400

flags, cachedelta=cachedelta,

2387

deltacomputer=deltacomputer)

2401

deltacomputer=deltacomputer)

2388

2402

2389

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2403

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2390

cachedelta=None, deltacomputer=None):

2404

cachedelta=None, deltacomputer=None):

2391

"""add a raw revision with known flags, node and parents

2405

"""add a raw revision with known flags, node and parents

2392

useful when reusing a revision not stored in this revlog (ex: received

2406

useful when reusing a revision not stored in this revlog (ex: received

2393

over wire, or read from an external bundle).

2407

over wire, or read from an external bundle).

2394

"""

2408

"""

2395

dfh = None

2409

dfh = None

2396

if not self._inline:

2410

if not self._inline:

2397

dfh = self._datafp("a+")

2411

dfh = self._datafp("a+")

2398

ifh = self._indexfp("a+")

2412

ifh = self._indexfp("a+")

2399

try:

2413

try:

2400

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2414

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2401

flags, cachedelta, ifh, dfh,

2415

flags, cachedelta, ifh, dfh,

2402

deltacomputer=deltacomputer)

2416

deltacomputer=deltacomputer)

2403

finally:

2417

finally:

2404

if dfh:

2418

if dfh:

2405

dfh.close()

2419

dfh.close()

2406

ifh.close()

2420

ifh.close()

2407

2421

2408

def compress(self, data):

2422

def compress(self, data):

2409

"""Generate a possibly-compressed representation of data."""

2423

"""Generate a possibly-compressed representation of data."""

2410

if not data:

2424

if not data:

2411

return '', data

2425

return '', data

2412

2426

2413

compressed = self._compressor.compress(data)

2427

compressed = self._compressor.compress(data)

2414

2428

2415

if compressed:

2429

if compressed:

2416

# The revlog compressor added the header in the returned data.

2430

# The revlog compressor added the header in the returned data.

2417

return '', compressed

2431

return '', compressed

2418

2432

2419

if data[0:1] == '\0':

2433

if data[0:1] == '\0':

2420

return '', data

2434

return '', data

2421

return 'u', data

2435

return 'u', data

2422

2436

2423

def decompress(self, data):

2437

def decompress(self, data):

2424

"""Decompress a revlog chunk.

2438

"""Decompress a revlog chunk.

2425

2439

2426

The chunk is expected to begin with a header identifying the

2440

The chunk is expected to begin with a header identifying the

2427

format type so it can be routed to an appropriate decompressor.

2441

format type so it can be routed to an appropriate decompressor.

2428

"""

2442

"""

2429

if not data:

2443

if not data:

2430

return data

2444

return data

2431

2445

2432

# Revlogs are read much more frequently than they are written and many

2446

# Revlogs are read much more frequently than they are written and many

2433

# chunks only take microseconds to decompress, so performance is

2447

# chunks only take microseconds to decompress, so performance is

2434

# important here.

2448

# important here.

2435

#

2449

#

2436

# We can make a few assumptions about revlogs:

2450

# We can make a few assumptions about revlogs:

2437

#

2451

#

2438

# 1) the majority of chunks will be compressed (as opposed to inline

2452

# 1) the majority of chunks will be compressed (as opposed to inline

2439

# raw data).

2453

# raw data).

2440

# 2) decompressing *any* data will likely by at least 10x slower than

2454

# 2) decompressing *any* data will likely by at least 10x slower than

2441

# returning raw inline data.

2455

# returning raw inline data.

2442

# 3) we want to prioritize common and officially supported compression

2456

# 3) we want to prioritize common and officially supported compression

2443

# engines

2457

# engines

2444

#

2458

#

2445

# It follows that we want to optimize for "decompress compressed data

2459

# It follows that we want to optimize for "decompress compressed data

2446

# when encoded with common and officially supported compression engines"

2460

# when encoded with common and officially supported compression engines"

2447

# case over "raw data" and "data encoded by less common or non-official

2461

# case over "raw data" and "data encoded by less common or non-official

2448

# compression engines." That is why we have the inline lookup first

2462

# compression engines." That is why we have the inline lookup first

2449

# followed by the compengines lookup.

2463

# followed by the compengines lookup.

2450

#

2464

#

2451

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2465

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2452

# compressed chunks. And this matters for changelog and manifest reads.

2466

# compressed chunks. And this matters for changelog and manifest reads.

2453

t = data[0:1]

2467

t = data[0:1]

2454

2468

2455

if t == 'x':

2469

if t == 'x':

2456

try:

2470

try:

2457

return _zlibdecompress(data)

2471

return _zlibdecompress(data)

2458

except zlib.error as e:

2472

except zlib.error as e:

2459

raise RevlogError(_('revlog decompress error: %s') %

2473

raise RevlogError(_('revlog decompress error: %s') %

2460

stringutil.forcebytestr(e))

2474

stringutil.forcebytestr(e))

2461

# '\0' is more common than 'u' so it goes first.

2475

# '\0' is more common than 'u' so it goes first.

2462

elif t == '\0':

2476

elif t == '\0':

2463

return data

2477

return data

2464

elif t == 'u':

2478

elif t == 'u':

2465

return util.buffer(data, 1)

2479

return util.buffer(data, 1)

2466

2480

2467

try:

2481

try:

2468

compressor = self._decompressors[t]

2482

compressor = self._decompressors[t]

2469

except KeyError:

2483

except KeyError:

2470

try:

2484

try:

2471

engine = util.compengines.forrevlogheader(t)

2485

engine = util.compengines.forrevlogheader(t)

2472

compressor = engine.revlogcompressor()

2486

compressor = engine.revlogcompressor()

2473

self._decompressors[t] = compressor

2487

self._decompressors[t] = compressor

2474

except KeyError:

2488

except KeyError:

2475

raise RevlogError(_('unknown compression type %r') % t)

2489

raise RevlogError(_('unknown compression type %r') % t)

2476

2490

2477

return compressor.decompress(data)

2491

return compressor.decompress(data)

2478

2492

2479

def _isgooddeltainfo(self, deltainfo, revinfo):

2493

def _isgooddeltainfo(self, deltainfo, revinfo):

2480

"""Returns True if the given delta is good. Good means that it is within

2494

"""Returns True if the given delta is good. Good means that it is within

2481

the disk span, disk size, and chain length bounds that we know to be

2495

the disk span, disk size, and chain length bounds that we know to be

2482

performant."""

2496

performant."""

2483

if deltainfo is None:

2497

if deltainfo is None:

2484

return False

2498

return False

2485

2499

2486

# - 'deltainfo.distance' is the distance from the base revision --

2500

# - 'deltainfo.distance' is the distance from the base revision --

2487

# bounding it limits the amount of I/O we need to do.

2501

# bounding it limits the amount of I/O we need to do.

2488

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2502

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2489

# deltas we need to apply -- bounding it limits the amount of CPU

2503

# deltas we need to apply -- bounding it limits the amount of CPU

2490

# we consume.

2504

# we consume.

2491

2505

2492

if self._sparserevlog:

2506

if self._sparserevlog:

2493

# As sparse-read will be used, we can consider that the distance,

2507

# As sparse-read will be used, we can consider that the distance,

2494

# instead of being the span of the whole chunk,

2508

# instead of being the span of the whole chunk,

2495

# is the span of the largest read chunk

2509

# is the span of the largest read chunk

2496

base = deltainfo.base

2510

base = deltainfo.base

2497

2511

2498

if base != nullrev:

2512

if base != nullrev:

2499

deltachain = self._deltachain(base)[0]

2513

deltachain = self._deltachain(base)[0]

2500

else:

2514

else:

2501

deltachain = []

2515

deltachain = []

2502

2516

2503

chunks = _slicechunk(self, deltachain, deltainfo)

2517

chunks = _slicechunk(self, deltachain, deltainfo)

2504

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2518

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2505

distance = max(all_span)

2519

distance = max(all_span)

2506

else:

2520

else:

2507

distance = deltainfo.distance

2521

distance = deltainfo.distance

2508

2522

2509

textlen = revinfo.textlen

2523

textlen = revinfo.textlen

2510

defaultmax = textlen * 4

2524

defaultmax = textlen * 4

2511

maxdist = self._maxdeltachainspan

2525

maxdist = self._maxdeltachainspan

2512

if not maxdist:

2526

if not maxdist:

2513

maxdist = distance # ensure the conditional pass

2527

maxdist = distance # ensure the conditional pass

2514

maxdist = max(maxdist, defaultmax)

2528

maxdist = max(maxdist, defaultmax)

2515

if self._sparserevlog and maxdist < self._srmingapsize:

2529

if self._sparserevlog and maxdist < self._srmingapsize:

2516

# In multiple place, we are ignoring irrelevant data range below a

2530

# In multiple place, we are ignoring irrelevant data range below a

2517

# certain size. Be also apply this tradeoff here and relax span

2531

# certain size. Be also apply this tradeoff here and relax span

2518

# constraint for small enought content.

2532

# constraint for small enought content.

2519

maxdist = self._srmingapsize

2533

maxdist = self._srmingapsize

2520

2534

2521

# Bad delta from read span:

2535

# Bad delta from read span:

2522

#

2536

#

2523

# If the span of data read is larger than the maximum allowed.

2537

# If the span of data read is larger than the maximum allowed.

2524

if maxdist < distance:

2538

if maxdist < distance:

2525

return False

2539

return False

2526

2540

2527

# Bad delta from new delta size:

2541

# Bad delta from new delta size:

2528

#

2542

#

2529

# If the delta size is larger than the target text, storing the

2543

# If the delta size is larger than the target text, storing the

2530

# delta will be inefficient.

2544

# delta will be inefficient.

2531

if textlen < deltainfo.deltalen:

2545

if textlen < deltainfo.deltalen:

2532

return False

2546

return False

2533

2547

2534

# Bad delta from cumulated payload size:

2548

# Bad delta from cumulated payload size:

2535

#

2549

#

2536

# If the sum of delta get larger than K * target text length.

2550

# If the sum of delta get larger than K * target text length.

2537

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2551

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2538

return False

2552

return False

2539

2553

2540

# Bad delta from chain length:

2554

# Bad delta from chain length:

2541

#

2555

#

2542

# If the number of delta in the chain gets too high.

2556

# If the number of delta in the chain gets too high.

2543

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2557

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2544

return False

2558

return False

2545

2559

2546

return True

2560

return True

2547

2561

2548

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2562

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2549

cachedelta, ifh, dfh, alwayscache=False,

2563

cachedelta, ifh, dfh, alwayscache=False,

2550

deltacomputer=None):

2564

deltacomputer=None):

2551

"""internal function to add revisions to the log

2565

"""internal function to add revisions to the log

2552

2566

2553

see addrevision for argument descriptions.

2567

see addrevision for argument descriptions.

2554

2568

2555

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2569

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2556

2570

2557

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2571

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2558

be used.

2572

be used.

2559

2573

2560

invariants:

2574

invariants:

2561

- rawtext is optional (can be None); if not set, cachedelta must be set.

2575

- rawtext is optional (can be None); if not set, cachedelta must be set.

2562

if both are set, they must correspond to each other.

2576

if both are set, they must correspond to each other.

2563

"""

2577

"""

2564

if node == nullid:

2578

if node == nullid:

2565

raise RevlogError(_("%s: attempt to add null revision") %

2579

raise RevlogError(_("%s: attempt to add null revision") %

2566

(self.indexfile))

2580

(self.indexfile))

2567

if node == wdirid or node in wdirfilenodeids:

2581

if node == wdirid or node in wdirfilenodeids:

2568

raise RevlogError(_("%s: attempt to add wdir revision") %

2582

raise RevlogError(_("%s: attempt to add wdir revision") %

2569

(self.indexfile))

2583

(self.indexfile))

2570

2584

2571

if self._inline:

2585

if self._inline:

2572

fh = ifh

2586

fh = ifh

2573

else:

2587

else:

2574

fh = dfh

2588

fh = dfh

2575

2589

2576

btext = [rawtext]

2590

btext = [rawtext]

2577

2591

2578

curr = len(self)

2592

curr = len(self)

2579

prev = curr - 1

2593

prev = curr - 1

2580

offset = self.end(prev)

2594

offset = self.end(prev)

2581

p1r, p2r = self.rev(p1), self.rev(p2)

2595

p1r, p2r = self.rev(p1), self.rev(p2)

2582

2596

2583

# full versions are inserted when the needed deltas

2597

# full versions are inserted when the needed deltas

2584

# become comparable to the uncompressed text

2598

# become comparable to the uncompressed text

2585

if rawtext is None:

2599

if rawtext is None:

2586

# need rawtext size, before changed by flag processors, which is

2600

# need rawtext size, before changed by flag processors, which is

2587

# the non-raw size. use revlog explicitly to avoid filelog's extra

2601

# the non-raw size. use revlog explicitly to avoid filelog's extra

2588

# logic that might remove metadata size.

2602

# logic that might remove metadata size.

2589

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2603

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2590

cachedelta[1])

2604

cachedelta[1])

2591

else:

2605

else:

2592

textlen = len(rawtext)

2606

textlen = len(rawtext)

2593

2607

2594

if deltacomputer is None:

2608

if deltacomputer is None:

2595

deltacomputer = _deltacomputer(self)

2609

deltacomputer = _deltacomputer(self)

2596

2610

2597

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2611

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2598

2612

2599

# no delta for flag processor revision (see "candelta" for why)

2613

# no delta for flag processor revision (see "candelta" for why)

2600

# not calling candelta since only one revision needs test, also to

2614

# not calling candelta since only one revision needs test, also to

2601

# avoid overhead fetching flags again.

2615

# avoid overhead fetching flags again.

2602

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2616

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2603

deltainfo = None

2617

deltainfo = None

2604

else:

2618

else:

2605

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2619

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2606

2620

2607

if deltainfo is not None:

2621

if deltainfo is not None:

2608

base = deltainfo.base

2622

base = deltainfo.base

2609

chainbase = deltainfo.chainbase

2623

chainbase = deltainfo.chainbase

2610

data = deltainfo.data

2624

data = deltainfo.data

2611

l = deltainfo.deltalen

2625

l = deltainfo.deltalen

2612

else:

2626

else:

2613

rawtext = deltacomputer.buildtext(revinfo, fh)

2627

rawtext = deltacomputer.buildtext(revinfo, fh)

2614

data = self.compress(rawtext)

2628

data = self.compress(rawtext)

2615

l = len(data[1]) + len(data[0])

2629

l = len(data[1]) + len(data[0])

2616

base = chainbase = curr

2630

base = chainbase = curr

2617

2631

2618

e = (offset_type(offset, flags), l, textlen,

2632

e = (offset_type(offset, flags), l, textlen,

2619

base, link, p1r, p2r, node)

2633

base, link, p1r, p2r, node)

2620

self.index.append(e)

2634

self.index.append(e)

2621

self.nodemap[node] = curr

2635

self.nodemap[node] = curr

2622

2636

2623

entry = self._io.packentry(e, self.node, self.version, curr)

2637

entry = self._io.packentry(e, self.node, self.version, curr)

2624

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2638

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2625

2639

2626

if alwayscache and rawtext is None:

2640

if alwayscache and rawtext is None:

2627

rawtext = deltacomputer._buildtext(revinfo, fh)

2641

rawtext = deltacomputer._buildtext(revinfo, fh)

2628

2642

2629

if type(rawtext) == bytes: # only accept immutable objects

2643

if type(rawtext) == bytes: # only accept immutable objects

2630

self._cache = (node, curr, rawtext)

2644

self._cache = (node, curr, rawtext)

2631

self._chainbasecache[curr] = chainbase

2645

self._chainbasecache[curr] = chainbase

2632

return node

2646

return node

2633

2647

2634

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2648

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2635

# Files opened in a+ mode have inconsistent behavior on various

2649

# Files opened in a+ mode have inconsistent behavior on various

2636

# platforms. Windows requires that a file positioning call be made

2650

# platforms. Windows requires that a file positioning call be made

2637

# when the file handle transitions between reads and writes. See

2651

# when the file handle transitions between reads and writes. See

2638

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2652

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2639

# platforms, Python or the platform itself can be buggy. Some versions

2653

# platforms, Python or the platform itself can be buggy. Some versions

2640

# of Solaris have been observed to not append at the end of the file

2654

# of Solaris have been observed to not append at the end of the file

2641

# if the file was seeked to before the end. See issue4943 for more.

2655

# if the file was seeked to before the end. See issue4943 for more.

2642

#

2656

#

2643

# We work around this issue by inserting a seek() before writing.

2657

# We work around this issue by inserting a seek() before writing.

2644

# Note: This is likely not necessary on Python 3.

2658

# Note: This is likely not necessary on Python 3.

2645

ifh.seek(0, os.SEEK_END)

2659

ifh.seek(0, os.SEEK_END)

2646

if dfh:

2660

if dfh:

2647

dfh.seek(0, os.SEEK_END)

2661

dfh.seek(0, os.SEEK_END)

2648

2662

2649

curr = len(self) - 1

2663

curr = len(self) - 1

2650

if not self._inline:

2664

if not self._inline:

2651

transaction.add(self.datafile, offset)

2665

transaction.add(self.datafile, offset)

2652

transaction.add(self.indexfile, curr * len(entry))

2666

transaction.add(self.indexfile, curr * len(entry))

2653

if data[0]:

2667

if data[0]:

2654

dfh.write(data[0])

2668

dfh.write(data[0])

2655

dfh.write(data[1])

2669

dfh.write(data[1])

2656

ifh.write(entry)

2670

ifh.write(entry)

2657

else:

2671

else:

2658

offset += curr * self._io.size

2672

offset += curr * self._io.size

2659

transaction.add(self.indexfile, offset, curr)

2673

transaction.add(self.indexfile, offset, curr)

2660

ifh.write(entry)

2674

ifh.write(entry)

2661

ifh.write(data[0])

2675

ifh.write(data[0])

2662

ifh.write(data[1])

2676

ifh.write(data[1])

2663

self._enforceinlinesize(transaction, ifh)

2677

self._enforceinlinesize(transaction, ifh)

2664

2678

2665

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2679

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2666

"""

2680

"""

2667

add a delta group

2681

add a delta group

2668

2682

2669

given a set of deltas, add them to the revision log. the

2683

given a set of deltas, add them to the revision log. the

2670

first delta is against its parent, which should be in our

2684

first delta is against its parent, which should be in our

2671

log, the rest are against the previous delta.

2685

log, the rest are against the previous delta.

2672

2686

2673

If ``addrevisioncb`` is defined, it will be called with arguments of

2687

If ``addrevisioncb`` is defined, it will be called with arguments of

2674

this revlog and the node that was added.

2688

this revlog and the node that was added.

2675

"""

2689

"""

2676

2690

2677

nodes = []

2691

nodes = []

2678

2692

2679

r = len(self)

2693

r = len(self)

2680

end = 0

2694

end = 0

2681

if r:

2695

if r:

2682

end = self.end(r - 1)

2696

end = self.end(r - 1)

2683

ifh = self._indexfp("a+")

2697

ifh = self._indexfp("a+")

2684

isize = r * self._io.size

2698

isize = r * self._io.size

2685

if self._inline:

2699

if self._inline:

2686

transaction.add(self.indexfile, end + isize, r)

2700

transaction.add(self.indexfile, end + isize, r)

2687

dfh = None

2701

dfh = None

2688

else:

2702

else:

2689

transaction.add(self.indexfile, isize, r)

2703

transaction.add(self.indexfile, isize, r)

2690

transaction.add(self.datafile, end)

2704

transaction.add(self.datafile, end)

2691

dfh = self._datafp("a+")

2705

dfh = self._datafp("a+")

2692

def flush():

2706

def flush():

2693

if dfh:

2707

if dfh:

2694

dfh.flush()

2708

dfh.flush()

2695

ifh.flush()

2709

ifh.flush()

2696

try:

2710

try:

2697

deltacomputer = _deltacomputer(self)

2711

deltacomputer = _deltacomputer(self)

2698

# loop through our set of deltas

2712

# loop through our set of deltas

2699

for data in deltas:

2713

for data in deltas:

2700

node, p1, p2, linknode, deltabase, delta, flags = data

2714

node, p1, p2, linknode, deltabase, delta, flags = data

2701

link = linkmapper(linknode)

2715

link = linkmapper(linknode)

2702

flags = flags or REVIDX_DEFAULT_FLAGS

2716

flags = flags or REVIDX_DEFAULT_FLAGS

2703

2717

2704

nodes.append(node)

2718

nodes.append(node)

2705

2719

2706

if node in self.nodemap:

2720

if node in self.nodemap:

2707

# this can happen if two branches make the same change

2721

# this can happen if two branches make the same change

2708

continue

2722

continue

2709

2723

2710

for p in (p1, p2):

2724

for p in (p1, p2):

2711

if p not in self.nodemap:

2725

if p not in self.nodemap:

2712

raise LookupError(p, self.indexfile,

2726

raise LookupError(p, self.indexfile,

2713

_('unknown parent'))

2727

_('unknown parent'))

2714

2728

2715

if deltabase not in self.nodemap:

2729

if deltabase not in self.nodemap:

2716

raise LookupError(deltabase, self.indexfile,

2730

raise LookupError(deltabase, self.indexfile,

2717

_('unknown delta base'))

2731

_('unknown delta base'))

2718

2732

2719

baserev = self.rev(deltabase)

2733

baserev = self.rev(deltabase)

2720

2734

2721

if baserev != nullrev and self.iscensored(baserev):

2735

if baserev != nullrev and self.iscensored(baserev):

2722

# if base is censored, delta must be full replacement in a

2736

# if base is censored, delta must be full replacement in a

2723

# single patch operation

2737

# single patch operation

2724

hlen = struct.calcsize(">lll")

2738

hlen = struct.calcsize(">lll")

2725

oldlen = self.rawsize(baserev)

2739

oldlen = self.rawsize(baserev)

2726

newlen = len(delta) - hlen

2740

newlen = len(delta) - hlen

2727

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2741

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2728

raise error.CensoredBaseError(self.indexfile,

2742

raise error.CensoredBaseError(self.indexfile,

2729

self.node(baserev))

2743

self.node(baserev))

2730

2744

2731

if not flags and self._peek_iscensored(baserev, delta, flush):

2745

if not flags and self._peek_iscensored(baserev, delta, flush):

2732

flags |= REVIDX_ISCENSORED

2746

flags |= REVIDX_ISCENSORED

2733

2747

2734

# We assume consumers of addrevisioncb will want to retrieve

2748

# We assume consumers of addrevisioncb will want to retrieve

2735

# the added revision, which will require a call to

2749

# the added revision, which will require a call to

2736

# revision(). revision() will fast path if there is a cache

2750

# revision(). revision() will fast path if there is a cache

2737

# hit. So, we tell _addrevision() to always cache in this case.

2751

# hit. So, we tell _addrevision() to always cache in this case.

2738

# We're only using addgroup() in the context of changegroup

2752

# We're only using addgroup() in the context of changegroup

2739

# generation so the revision data can always be handled as raw

2753

# generation so the revision data can always be handled as raw

2740

# by the flagprocessor.

2754

# by the flagprocessor.

2741

self._addrevision(node, None, transaction, link,

2755

self._addrevision(node, None, transaction, link,

2742

p1, p2, flags, (baserev, delta),

2756

p1, p2, flags, (baserev, delta),

2743

ifh, dfh,

2757

ifh, dfh,

2744

alwayscache=bool(addrevisioncb),

2758

alwayscache=bool(addrevisioncb),

2745

deltacomputer=deltacomputer)

2759

deltacomputer=deltacomputer)

2746

2760

2747

if addrevisioncb:

2761

if addrevisioncb:

2748

addrevisioncb(self, node)

2762

addrevisioncb(self, node)

2749

2763

2750

if not dfh and not self._inline:

2764

if not dfh and not self._inline:

2751

# addrevision switched from inline to conventional

2765

# addrevision switched from inline to conventional

2752

# reopen the index

2766

# reopen the index

2753

ifh.close()

2767

ifh.close()

2754

dfh = self._datafp("a+")

2768

dfh = self._datafp("a+")

2755

ifh = self._indexfp("a+")

2769

ifh = self._indexfp("a+")

2756

finally:

2770

finally:

2757

if dfh:

2771

if dfh:

2758

dfh.close()

2772

dfh.close()

2759

ifh.close()

2773

ifh.close()

2760

2774

2761

return nodes

2775

return nodes

2762

2776

2763

def iscensored(self, rev):

2777

def iscensored(self, rev):

2764

"""Check if a file revision is censored."""

2778

"""Check if a file revision is censored."""

2765

if not self._censorable:

2779

if not self._censorable:

2766

return False

2780

return False

2767

2781

2768

return self.flags(rev) & REVIDX_ISCENSORED

2782

return self.flags(rev) & REVIDX_ISCENSORED

2769

2783

2770

def _peek_iscensored(self, baserev, delta, flush):

2784

def _peek_iscensored(self, baserev, delta, flush):

2771

"""Quickly check if a delta produces a censored revision."""

2785

"""Quickly check if a delta produces a censored revision."""

2772

if not self._censorable:

2786

if not self._censorable:

2773

return False

2787

return False

2774

2788

2775

# Fragile heuristic: unless new file meta keys are added alphabetically

2789

# Fragile heuristic: unless new file meta keys are added alphabetically

2776

# preceding "censored", all censored revisions are prefixed by

2790

# preceding "censored", all censored revisions are prefixed by

2777

# "\1\ncensored:". A delta producing such a censored revision must be a

2791

# "\1\ncensored:". A delta producing such a censored revision must be a

2778

# full-replacement delta, so we inspect the first and only patch in the

2792

# full-replacement delta, so we inspect the first and only patch in the

2779

# delta for this prefix.

2793

# delta for this prefix.

2780

hlen = struct.calcsize(">lll")

2794

hlen = struct.calcsize(">lll")

2781

if len(delta) <= hlen:

2795

if len(delta) <= hlen:

2782

return False

2796

return False

2783

2797

2784

oldlen = self.rawsize(baserev)

2798

oldlen = self.rawsize(baserev)

2785

newlen = len(delta) - hlen

2799

newlen = len(delta) - hlen

2786

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2800

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2787

return False

2801

return False

2788

2802

2789

add = "\1\ncensored:"

2803

add = "\1\ncensored:"

2790

addlen = len(add)

2804

addlen = len(add)

2791

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2805

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2792

2806

2793

def getstrippoint(self, minlink):

2807

def getstrippoint(self, minlink):

2794

"""find the minimum rev that must be stripped to strip the linkrev

2808

"""find the minimum rev that must be stripped to strip the linkrev

2795

2809

2796

Returns a tuple containing the minimum rev and a set of all revs that

2810

Returns a tuple containing the minimum rev and a set of all revs that

2797

have linkrevs that will be broken by this strip.

2811

have linkrevs that will be broken by this strip.

2798

"""

2812

"""

2799

brokenrevs = set()

2813

brokenrevs = set()

2800

strippoint = len(self)

2814

strippoint = len(self)

2801

2815

2802

heads = {}

2816

heads = {}

2803

futurelargelinkrevs = set()

2817

futurelargelinkrevs = set()

2804

for head in self.headrevs():

2818

for head in self.headrevs():

2805

headlinkrev = self.linkrev(head)

2819

headlinkrev = self.linkrev(head)

2806

heads[head] = headlinkrev

2820

heads[head] = headlinkrev

2807

if headlinkrev >= minlink:

2821

if headlinkrev >= minlink:

2808

futurelargelinkrevs.add(headlinkrev)

2822

futurelargelinkrevs.add(headlinkrev)

2809

2823

2810

# This algorithm involves walking down the rev graph, starting at the

2824

# This algorithm involves walking down the rev graph, starting at the

2811

# heads. Since the revs are topologically sorted according to linkrev,

2825

# heads. Since the revs are topologically sorted according to linkrev,

2812

# once all head linkrevs are below the minlink, we know there are

2826

# once all head linkrevs are below the minlink, we know there are

2813

# no more revs that could have a linkrev greater than minlink.

2827

# no more revs that could have a linkrev greater than minlink.

2814

# So we can stop walking.

2828

# So we can stop walking.

2815

while futurelargelinkrevs:

2829

while futurelargelinkrevs:

2816

strippoint -= 1

2830

strippoint -= 1

2817

linkrev = heads.pop(strippoint)

2831

linkrev = heads.pop(strippoint)

2818

2832

2819

if linkrev < minlink:

2833

if linkrev < minlink:

2820

brokenrevs.add(strippoint)

2834

brokenrevs.add(strippoint)

2821

else:

2835

else:

2822

futurelargelinkrevs.remove(linkrev)

2836

futurelargelinkrevs.remove(linkrev)

2823

2837

2824

for p in self.parentrevs(strippoint):

2838

for p in self.parentrevs(strippoint):

2825

if p != nullrev:

2839

if p != nullrev:

2826

plinkrev = self.linkrev(p)

2840

plinkrev = self.linkrev(p)

2827

heads[p] = plinkrev

2841

heads[p] = plinkrev

2828

if plinkrev >= minlink:

2842

if plinkrev >= minlink:

2829

futurelargelinkrevs.add(plinkrev)

2843

futurelargelinkrevs.add(plinkrev)

2830

2844

2831

return strippoint, brokenrevs

2845

return strippoint, brokenrevs

2832

2846

2833

def strip(self, minlink, transaction):

2847

def strip(self, minlink, transaction):

2834

"""truncate the revlog on the first revision with a linkrev >= minlink

2848

"""truncate the revlog on the first revision with a linkrev >= minlink

2835

2849

2836

This function is called when we're stripping revision minlink and

2850

This function is called when we're stripping revision minlink and

2837

its descendants from the repository.

2851

its descendants from the repository.

2838

2852

2839

We have to remove all revisions with linkrev >= minlink, because

2853

We have to remove all revisions with linkrev >= minlink, because

2840

the equivalent changelog revisions will be renumbered after the

2854

the equivalent changelog revisions will be renumbered after the

2841

strip.

2855

strip.

2842

2856

2843

So we truncate the revlog on the first of these revisions, and

2857

So we truncate the revlog on the first of these revisions, and

2844

trust that the caller has saved the revisions that shouldn't be

2858

trust that the caller has saved the revisions that shouldn't be

2845

removed and that it'll re-add them after this truncation.

2859

removed and that it'll re-add them after this truncation.

2846

"""

2860

"""

2847

if len(self) == 0:

2861

if len(self) == 0:

2848

return

2862

return

2849

2863

2850

rev, _ = self.getstrippoint(minlink)

2864

rev, _ = self.getstrippoint(minlink)

2851

if rev == len(self):

2865

if rev == len(self):

2852

return

2866

return

2853

2867

2854

# first truncate the files on disk

2868

# first truncate the files on disk

2855

end = self.start(rev)

2869

end = self.start(rev)

2856

if not self._inline:

2870

if not self._inline:

2857

transaction.add(self.datafile, end)

2871

transaction.add(self.datafile, end)

2858

end = rev * self._io.size

2872

end = rev * self._io.size

2859

else:

2873

else:

2860

end += rev * self._io.size

2874

end += rev * self._io.size

2861

2875

2862

transaction.add(self.indexfile, end)

2876

transaction.add(self.indexfile, end)

2863

2877

2864

# then reset internal state in memory to forget those revisions

2878

# then reset internal state in memory to forget those revisions

2865

self._cache = None

2879

self._cache = None

2866

self._chaininfocache = {}

2880

self._chaininfocache = {}

2867

self._chunkclear()

2881

self._chunkclear()

2868

for x in pycompat.xrange(rev, len(self)):

2882

for x in pycompat.xrange(rev, len(self)):

2869

del self.nodemap[self.node(x)]

2883

del self.nodemap[self.node(x)]

2870

2884

2871

del self.index[rev:-1]

2885

del self.index[rev:-1]

2872

self._nodepos = None

2886

self._nodepos = None

2873

2887

2874

def checksize(self):

2888

def checksize(self):

2875

expected = 0

2889

expected = 0

2876

if len(self):

2890

if len(self):

2877

expected = max(0, self.end(len(self) - 1))

2891

expected = max(0, self.end(len(self) - 1))

2878

2892

2879

try:

2893

try:

2880

with self._datafp() as f:

2894

with self._datafp() as f:

2881

f.seek(0, 2)

2895

f.seek(0, 2)

2882

actual = f.tell()

2896

actual = f.tell()

2883

dd = actual - expected

2897

dd = actual - expected

2884

except IOError as inst:

2898

except IOError as inst:

2885

if inst.errno != errno.ENOENT:

2899

if inst.errno != errno.ENOENT:

2886

raise

2900

raise

2887

dd = 0

2901

dd = 0

2888

2902

2889

try:

2903

try:

2890

f = self.opener(self.indexfile)

2904

f = self.opener(self.indexfile)

2891

f.seek(0, 2)

2905

f.seek(0, 2)

2892

actual = f.tell()

2906

actual = f.tell()

2893

f.close()

2907

f.close()

2894

s = self._io.size

2908

s = self._io.size

2895

i = max(0, actual // s)

2909

i = max(0, actual // s)

2896

di = actual - (i * s)

2910

di = actual - (i * s)

2897

if self._inline:

2911

if self._inline:

2898

databytes = 0

2912

databytes = 0

2899

for r in self:

2913

for r in self:

2900

databytes += max(0, self.length(r))

2914

databytes += max(0, self.length(r))

2901

dd = 0

2915

dd = 0

2902

di = actual - len(self) * s - databytes

2916

di = actual - len(self) * s - databytes

2903

except IOError as inst:

2917

except IOError as inst:

2904

if inst.errno != errno.ENOENT:

2918

if inst.errno != errno.ENOENT:

2905

raise

2919

raise

2906

di = 0

2920

di = 0

2907

2921

2908

return (dd, di)

2922

return (dd, di)

2909

2923

2910

def files(self):

2924

def files(self):

2911

res = [self.indexfile]

2925

res = [self.indexfile]

2912

if not self._inline:

2926

if not self._inline:

2913

res.append(self.datafile)

2927

res.append(self.datafile)

2914

return res

2928

return res

2915

2929

2916

DELTAREUSEALWAYS = 'always'

2930

DELTAREUSEALWAYS = 'always'

2917

DELTAREUSESAMEREVS = 'samerevs'

2931

DELTAREUSESAMEREVS = 'samerevs'

2918

DELTAREUSENEVER = 'never'

2932

DELTAREUSENEVER = 'never'

2919

2933

2920

DELTAREUSEFULLADD = 'fulladd'

2934

DELTAREUSEFULLADD = 'fulladd'

2921

2935

2922

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2936

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2923

2937

2924

def clone(self, tr, destrevlog, addrevisioncb=None,

2938

def clone(self, tr, destrevlog, addrevisioncb=None,

2925

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2939

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2926

"""Copy this revlog to another, possibly with format changes.

2940

"""Copy this revlog to another, possibly with format changes.

2927

2941

2928

The destination revlog will contain the same revisions and nodes.

2942

The destination revlog will contain the same revisions and nodes.

2929

However, it may not be bit-for-bit identical due to e.g. delta encoding

2943

However, it may not be bit-for-bit identical due to e.g. delta encoding

2930

differences.

2944

differences.

2931

2945

2932

The ``deltareuse`` argument control how deltas from the existing revlog

2946

The ``deltareuse`` argument control how deltas from the existing revlog

2933

are preserved in the destination revlog. The argument can have the

2947

are preserved in the destination revlog. The argument can have the

2934

following values:

2948

following values:

2935

2949

2936

DELTAREUSEALWAYS

2950

DELTAREUSEALWAYS

2937

Deltas will always be reused (if possible), even if the destination

2951

Deltas will always be reused (if possible), even if the destination

2938

revlog would not select the same revisions for the delta. This is the

2952

revlog would not select the same revisions for the delta. This is the

2939

fastest mode of operation.

2953

fastest mode of operation.

2940

DELTAREUSESAMEREVS

2954

DELTAREUSESAMEREVS

2941

Deltas will be reused if the destination revlog would pick the same

2955

Deltas will be reused if the destination revlog would pick the same

2942

revisions for the delta. This mode strikes a balance between speed

2956

revisions for the delta. This mode strikes a balance between speed

2943

and optimization.

2957

and optimization.

2944

DELTAREUSENEVER

2958

DELTAREUSENEVER

2945

Deltas will never be reused. This is the slowest mode of execution.

2959

Deltas will never be reused. This is the slowest mode of execution.

2946

This mode can be used to recompute deltas (e.g. if the diff/delta

2960

This mode can be used to recompute deltas (e.g. if the diff/delta

2947

algorithm changes).

2961

algorithm changes).

2948

2962

2949

Delta computation can be slow, so the choice of delta reuse policy can

2963

Delta computation can be slow, so the choice of delta reuse policy can

2950

significantly affect run time.

2964

significantly affect run time.

2951

2965

2952

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2966

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2953

two extremes. Deltas will be reused if they are appropriate. But if the

2967

two extremes. Deltas will be reused if they are appropriate. But if the

2954

delta could choose a better revision, it will do so. This means if you

2968

delta could choose a better revision, it will do so. This means if you

2955

are converting a non-generaldelta revlog to a generaldelta revlog,

2969

are converting a non-generaldelta revlog to a generaldelta revlog,

2956

deltas will be recomputed if the delta's parent isn't a parent of the

2970

deltas will be recomputed if the delta's parent isn't a parent of the

2957

revision.

2971

revision.

2958

2972

2959

In addition to the delta policy, the ``deltabothparents`` argument

2973

In addition to the delta policy, the ``deltabothparents`` argument

2960

controls whether to compute deltas against both parents for merges.

2974

controls whether to compute deltas against both parents for merges.

2961

By default, the current default is used.

2975

By default, the current default is used.

2962

"""

2976

"""

2963

if deltareuse not in self.DELTAREUSEALL:

2977

if deltareuse not in self.DELTAREUSEALL:

2964

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2978

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2965

2979

2966

if len(destrevlog):

2980

if len(destrevlog):

2967

raise ValueError(_('destination revlog is not empty'))

2981

raise ValueError(_('destination revlog is not empty'))

2968

2982

2969

if getattr(self, 'filteredrevs', None):

2983

if getattr(self, 'filteredrevs', None):

2970

raise ValueError(_('source revlog has filtered revisions'))

2984

raise ValueError(_('source revlog has filtered revisions'))

2971

if getattr(destrevlog, 'filteredrevs', None):

2985

if getattr(destrevlog, 'filteredrevs', None):

2972

raise ValueError(_('destination revlog has filtered revisions'))

2986

raise ValueError(_('destination revlog has filtered revisions'))

2973

2987

2974

# lazydeltabase controls whether to reuse a cached delta, if possible.

2988

# lazydeltabase controls whether to reuse a cached delta, if possible.

2975

oldlazydeltabase = destrevlog._lazydeltabase

2989

oldlazydeltabase = destrevlog._lazydeltabase

2976

oldamd = destrevlog._deltabothparents

2990

oldamd = destrevlog._deltabothparents

2977

2991

2978

try:

2992

try:

2979

if deltareuse == self.DELTAREUSEALWAYS:

2993

if deltareuse == self.DELTAREUSEALWAYS:

2980

destrevlog._lazydeltabase = True

2994

destrevlog._lazydeltabase = True

2981

elif deltareuse == self.DELTAREUSESAMEREVS:

2995

elif deltareuse == self.DELTAREUSESAMEREVS:

2982

destrevlog._lazydeltabase = False

2996

destrevlog._lazydeltabase = False

2983

2997

2984

destrevlog._deltabothparents = deltabothparents or oldamd

2998

destrevlog._deltabothparents = deltabothparents or oldamd

2985

2999

2986

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

3000

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2987

self.DELTAREUSESAMEREVS)

3001

self.DELTAREUSESAMEREVS)

2988

3002

2989

deltacomputer = _deltacomputer(destrevlog)

3003

deltacomputer = _deltacomputer(destrevlog)

2990

index = self.index

3004

index = self.index

2991

for rev in self:

3005

for rev in self:

2992

entry = index[rev]

3006

entry = index[rev]

2993

3007

2994

# Some classes override linkrev to take filtered revs into

3008

# Some classes override linkrev to take filtered revs into

2995

# account. Use raw entry from index.

3009

# account. Use raw entry from index.

2996

flags = entry[0] & 0xffff

3010

flags = entry[0] & 0xffff

2997

linkrev = entry[4]

3011

linkrev = entry[4]

2998

p1 = index[entry[5]][7]

3012

p1 = index[entry[5]][7]

2999

p2 = index[entry[6]][7]

3013

p2 = index[entry[6]][7]

3000

node = entry[7]

3014

node = entry[7]

3001

3015

3002

# (Possibly) reuse the delta from the revlog if allowed and

3016

# (Possibly) reuse the delta from the revlog if allowed and

3003

# the revlog chunk is a delta.

3017

# the revlog chunk is a delta.

3004

cachedelta = None

3018

cachedelta = None

3005

rawtext = None

3019

rawtext = None

3006

if populatecachedelta:

3020

if populatecachedelta:

3007

dp = self.deltaparent(rev)

3021

dp = self.deltaparent(rev)

3008

if dp != nullrev:

3022

if dp != nullrev:

3009

cachedelta = (dp, bytes(self._chunk(rev)))

3023

cachedelta = (dp, bytes(self._chunk(rev)))

3010

3024

3011

if not cachedelta:

3025

if not cachedelta:

3012

rawtext = self.revision(rev, raw=True)

3026

rawtext = self.revision(rev, raw=True)

3013

3027

3014

3028

3015

if deltareuse == self.DELTAREUSEFULLADD:

3029

if deltareuse == self.DELTAREUSEFULLADD:

3016

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3030

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3017

cachedelta=cachedelta,

3031

cachedelta=cachedelta,

3018

node=node, flags=flags,

3032

node=node, flags=flags,

3019

deltacomputer=deltacomputer)

3033

deltacomputer=deltacomputer)

3020

else:

3034

else:

3021

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3035

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3022

checkambig=False)

3036

checkambig=False)

3023

dfh = None

3037

dfh = None

3024

if not destrevlog._inline:

3038

if not destrevlog._inline:

3025

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3039

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3026

try:

3040

try:

3027

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3041

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3028

p2, flags, cachedelta, ifh, dfh,

3042

p2, flags, cachedelta, ifh, dfh,

3029

deltacomputer=deltacomputer)

3043

deltacomputer=deltacomputer)

3030

finally:

3044

finally:

3031

if dfh:

3045

if dfh:

3032

dfh.close()

3046

dfh.close()

3033

ifh.close()

3047

ifh.close()

3034

3048

3035

if addrevisioncb:

3049

if addrevisioncb:

3036

addrevisioncb(self, rev, node)

3050

addrevisioncb(self, rev, node)

3037

finally:

3051

finally:

3038

destrevlog._lazydeltabase = oldlazydeltabase

3052

destrevlog._lazydeltabase = oldlazydeltabase

3039

destrevlog._deltabothparents = oldamd

3053

destrevlog._deltabothparents = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def _segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> _segmentspan(revlog, [0, 4])
                 >>> _segmentspan(revlog, [3, 4])
                 >>> _segmentspan(revlog, [1, 2, 3,])
                 >>> _segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = _segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
+                snapshotdepth = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._deltabothparents and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
+                    revlog = self.revlog
+                    snapshotdepth = None
+                    if deltabase == nullrev:
+                        snapshotdepth = 0
+                    elif revlog._sparserevlog and revlog.issnapshot(deltabase):
+                        # A delta chain should always be one full snapshot,
+                        # zero or more semi-snapshots, and zero or more deltas
+                        p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
+                        if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
+                            snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
-                                     chainbase, chainlen, compresseddeltalen)
+                                      chainbase, chainlen, compresseddeltalen,
+                                      snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     if not revinfo.textlen:
                         return None # empty file do not need delta
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     deltainfo = None
                     deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         # filter out delta base that will never produce good delta
                         candidaterevs = [r for r in candidaterevs
                                          if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # skip over empty delta (no need to include them in a chain)
                             while candidaterev != nullrev and not deltalength(candidaterev):
                                 candidaterev = deltaparent(candidaterev)
                             # no need to try a delta against nullid, this will be handled
                             # by fulltext later.
                             if candidaterev == nullrev:
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     length = len(self)
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                         if stop > length:
                             stop = length
                     else:
                         stop = length
                     return pycompat.xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                              _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                                  _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
                     p1, p2 = self.parentrevs(rev)
                     if deltap in (p1, p2):
                         return False
                     return self.issnapshot(deltap)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     if self._sparserevlog:
                         # As sparse-read will be used, we can consider that the distance,
                         # instead of being the span of the whole chunk,
                         # is the span of the largest read chunk
                         base = deltainfo.base
                         if base != nullrev:
                             deltachain = self._deltachain(base)[0]
                         else:
                             deltachain = []
                         chunks = _slicechunk(self, deltachain, deltainfo)
                         all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
                         distance = max(all_span)
                     else:
                         distance = deltainfo.distance
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if self._sparserevlog and maxdist < self._srmingapsize:
                         # In multiple place, we are ignoring irrelevant data range below a
                         # certain size. Be also apply this tradeoff here and relax span
                         # constraint for small enought content.
                         maxdist = self._srmingapsize
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     if maxdist < distance:
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the
                     #   delta will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd