upstream/mercurial-mirror Commit - r39228:dbc5ead9

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullhex,

30

nullhex,

31

nullid,

31

nullid,

32

nullrev,

32

nullrev,

33

wdirfilenodeids,

33

wdirfilenodeids,

34

wdirhex,

34

wdirhex,

35

wdirid,

35

wdirid,

36

wdirrev,

36

wdirrev,

37

)

37

)

38

from .i18n import _

38

from .i18n import _

39

from .thirdparty import (

39

from .thirdparty import (

40

attr,

40

attr,

41

)

41

)

42

from . import (

42

from . import (

43

ancestor,

43

ancestor,

44

error,

44

error,

45

mdiff,

45

mdiff,

46

policy,

46

policy,

47

pycompat,

47

pycompat,

48

templatefilters,

48

templatefilters,

49

util,

49

util,

50

)

50

)

51

from .utils import (

51

from .utils import (

52

stringutil,

52

stringutil,

53

)

53

)

54

55

parsers = policy.importmod(r'parsers')

55

parsers = policy.importmod(r'parsers')

56

57

# Aliased for performance.

57

# Aliased for performance.

58

_zlibdecompress = zlib.decompress

58

_zlibdecompress = zlib.decompress

59

60

# revlog header flags

60

# revlog header flags

61

REVLOGV0 = 0

61

REVLOGV0 = 0

62

REVLOGV1 = 1

62

REVLOGV1 = 1

63

# Dummy value until file format is finalized.

63

# Dummy value until file format is finalized.

64

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

# Reminder: change the bounds check in revlog.__init__ when this is changed.

65

REVLOGV2 = 0xDEAD

65

REVLOGV2 = 0xDEAD

66

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_INLINE_DATA = (1 << 16)

67

FLAG_GENERALDELTA = (1 << 17)

67

FLAG_GENERALDELTA = (1 << 17)

68

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

69

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_FORMAT = REVLOGV1

70

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

71

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

72

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

REVLOGV2_FLAGS = REVLOGV1_FLAGS

73

74

# revlog index flags

74

# revlog index flags

75

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

76

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

77

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

78

REVIDX_DEFAULT_FLAGS = 0

78

REVIDX_DEFAULT_FLAGS = 0

79

# stable order in which flags need to be processed and their processors applied

79

# stable order in which flags need to be processed and their processors applied

80

REVIDX_FLAGS_ORDER = [

80

REVIDX_FLAGS_ORDER = [

81

REVIDX_ISCENSORED,

81

REVIDX_ISCENSORED,

82

REVIDX_ELLIPSIS,

82

REVIDX_ELLIPSIS,

83

REVIDX_EXTSTORED,

83

REVIDX_EXTSTORED,

84

]

84

]

85

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

86

# bitmark for flags that could cause rawdata content change

86

# bitmark for flags that could cause rawdata content change

87

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

88

89

# max size of revlog with inline data

89

# max size of revlog with inline data

90

_maxinline = 131072

90

_maxinline = 131072

91

_chunksize = 1048576

91

_chunksize = 1048576

92

93

RevlogError = error.RevlogError

93

RevlogError = error.RevlogError

94

LookupError = error.LookupError

94

LookupError = error.LookupError

95

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

95

AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError

96

CensoredNodeError = error.CensoredNodeError

96

CensoredNodeError = error.CensoredNodeError

97

ProgrammingError = error.ProgrammingError

97

ProgrammingError = error.ProgrammingError

98

99

# Store flag processors (cf. 'addflagprocessor()' to register)

99

# Store flag processors (cf. 'addflagprocessor()' to register)

100

_flagprocessors = {

100

_flagprocessors = {

101

REVIDX_ISCENSORED: None,

101

REVIDX_ISCENSORED: None,

102

}

102

}

103

104

_mdre = re.compile('\1\n')

104

_mdre = re.compile('\1\n')

105

def parsemeta(text):

105

def parsemeta(text):

106

"""return (metadatadict, metadatasize)"""

106

"""return (metadatadict, metadatasize)"""

107

# text can be buffer, so we can't use .startswith or .index

107

# text can be buffer, so we can't use .startswith or .index

108

if text[:2] != '\1\n':

108

if text[:2] != '\1\n':

109

return None, None

109

return None, None

110

s = _mdre.search(text, 2).start()

110

s = _mdre.search(text, 2).start()

111

mtext = text[2:s]

111

mtext = text[2:s]

112

meta = {}

112

meta = {}

113

for l in mtext.splitlines():

113

for l in mtext.splitlines():

114

k, v = l.split(": ", 1)

114

k, v = l.split(": ", 1)

115

meta[k] = v

115

meta[k] = v

116

return meta, (s + 2)

116

return meta, (s + 2)

117

118

def packmeta(meta, text):

118

def packmeta(meta, text):

119

keys = sorted(meta)

119

keys = sorted(meta)

120

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

120

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

121

return "\1\n%s\1\n%s" % (metatext, text)

121

return "\1\n%s\1\n%s" % (metatext, text)

122

123

def _censoredtext(text):

123

def _censoredtext(text):

124

m, offs = parsemeta(text)

124

m, offs = parsemeta(text)

125

return m and "censored" in m

125

return m and "censored" in m

126

127

def addflagprocessor(flag, processor):

127

def addflagprocessor(flag, processor):

128

"""Register a flag processor on a revision data flag.

128

"""Register a flag processor on a revision data flag.

129

130

Invariant:

130

Invariant:

131

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

131

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

132

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

132

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

133

- Only one flag processor can be registered on a specific flag.

133

- Only one flag processor can be registered on a specific flag.

134

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

134

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

135

following signatures:

135

following signatures:

136

- (read) f(self, rawtext) -> text, bool

136

- (read) f(self, rawtext) -> text, bool

137

- (write) f(self, text) -> rawtext, bool

137

- (write) f(self, text) -> rawtext, bool

138

- (raw) f(self, rawtext) -> bool

138

- (raw) f(self, rawtext) -> bool

139

"text" is presented to the user. "rawtext" is stored in revlog data, not

139

"text" is presented to the user. "rawtext" is stored in revlog data, not

140

directly visible to the user.

140

directly visible to the user.

141

The boolean returned by these transforms is used to determine whether

141

The boolean returned by these transforms is used to determine whether

142

the returned text can be used for hash integrity checking. For example,

142

the returned text can be used for hash integrity checking. For example,

143

if "write" returns False, then "text" is used to generate hash. If

143

if "write" returns False, then "text" is used to generate hash. If

144

"write" returns True, that basically means "rawtext" returned by "write"

144

"write" returns True, that basically means "rawtext" returned by "write"

145

should be used to generate hash. Usually, "write" and "read" return

145

should be used to generate hash. Usually, "write" and "read" return

146

different booleans. And "raw" returns a same boolean as "write".

146

different booleans. And "raw" returns a same boolean as "write".

147

148

Note: The 'raw' transform is used for changegroup generation and in some

148

Note: The 'raw' transform is used for changegroup generation and in some

149

debug commands. In this case the transform only indicates whether the

149

debug commands. In this case the transform only indicates whether the

150

contents can be used for hash integrity checks.

150

contents can be used for hash integrity checks.

151

"""

151

"""

152

if not flag & REVIDX_KNOWN_FLAGS:

152

if not flag & REVIDX_KNOWN_FLAGS:

153

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

153

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

154

raise ProgrammingError(msg)

154

raise ProgrammingError(msg)

155

if flag not in REVIDX_FLAGS_ORDER:

155

if flag not in REVIDX_FLAGS_ORDER:

156

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

156

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

157

raise ProgrammingError(msg)

157

raise ProgrammingError(msg)

158

if flag in _flagprocessors:

158

if flag in _flagprocessors:

159

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

159

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

160

raise error.Abort(msg)

160

raise error.Abort(msg)

161

_flagprocessors[flag] = processor

161

_flagprocessors[flag] = processor

162

163

def getoffset(q):

163

def getoffset(q):

164

return int(q >> 16)

164

return int(q >> 16)

165

166

def gettype(q):

166

def gettype(q):

167

return int(q & 0xFFFF)

167

return int(q & 0xFFFF)

168

169

def offset_type(offset, type):

169

def offset_type(offset, type):

170

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

170

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

171

raise ValueError('unknown revlog index flags')

171

raise ValueError('unknown revlog index flags')

172

return int(int(offset) << 16 | type)

172

return int(int(offset) << 16 | type)

173

174

_nullhash = hashlib.sha1(nullid)

174

_nullhash = hashlib.sha1(nullid)

175

176

def hash(text, p1, p2):

176

def hash(text, p1, p2):

177

"""generate a hash from the given text and its parent hashes

177

"""generate a hash from the given text and its parent hashes

178

179

This hash combines both the current file contents and its history

179

This hash combines both the current file contents and its history

180

in a manner that makes it easy to distinguish nodes with the same

180

in a manner that makes it easy to distinguish nodes with the same

181

content in the revision graph.

181

content in the revision graph.

182

"""

182

"""

183

# As of now, if one of the parent node is null, p2 is null

183

# As of now, if one of the parent node is null, p2 is null

184

if p2 == nullid:

184

if p2 == nullid:

185

# deep copy of a hash is faster than creating one

185

# deep copy of a hash is faster than creating one

186

s = _nullhash.copy()

186

s = _nullhash.copy()

187

s.update(p1)

187

s.update(p1)

188

else:

188

else:

189

# none of the parent nodes are nullid

189

# none of the parent nodes are nullid

190

if p1 < p2:

190

if p1 < p2:

191

a = p1

191

a = p1

192

b = p2

192

b = p2

193

else:

193

else:

194

a = p2

194

a = p2

195

b = p1

195

b = p1

196

s = hashlib.sha1(a)

196

s = hashlib.sha1(a)

197

s.update(b)

197

s.update(b)

198

s.update(text)

198

s.update(text)

199

return s.digest()

199

return s.digest()

200

201

class _testrevlog(object):

201

class _testrevlog(object):

202

"""minimalist fake revlog to use in doctests"""

202

"""minimalist fake revlog to use in doctests"""

203

204

def __init__(self, data, density=0.5, mingap=0):

204

def __init__(self, data, density=0.5, mingap=0):

205

"""data is an list of revision payload boundaries"""

205

"""data is an list of revision payload boundaries"""

206

self._data = data

206

self._data = data

207

self._srdensitythreshold = density

207

self._srdensitythreshold = density

208

self._srmingapsize = mingap

208

self._srmingapsize = mingap

209

210

def start(self, rev):

210

def start(self, rev):

211

if rev == 0:

211

if rev == 0:

212

return 0

212

return 0

213

return self._data[rev - 1]

213

return self._data[rev - 1]

214

215

def end(self, rev):

215

def end(self, rev):

216

return self._data[rev]

216

return self._data[rev]

217

218

def length(self, rev):

218

def length(self, rev):

219

return self.end(rev) - self.start(rev)

219

return self.end(rev) - self.start(rev)

220

221

def __len__(self):

221

def __len__(self):

222

return len(self._data)

222

return len(self._data)

223

224

def _trimchunk(revlog, revs, startidx, endidx=None):

224

def _trimchunk(revlog, revs, startidx, endidx=None):

225

"""returns revs[startidx:endidx] without empty trailing revs

225

"""returns revs[startidx:endidx] without empty trailing revs

226

227

Doctest Setup

227

Doctest Setup

228

>>> revlog = _testrevlog([

228

>>> revlog = _testrevlog([

229

... 5, #0

229

... 5, #0

230

... 10, #1

230

... 10, #1

231

... 12, #2

231

... 12, #2

232

... 12, #3 (empty)

232

... 12, #3 (empty)

233

... 17, #4

233

... 17, #4

234

... 21, #5

234

... 21, #5

235

... 21, #6 (empty)

235

... 21, #6 (empty)

236

... ])

236

... ])

237

238

Contiguous cases:

238

Contiguous cases:

239

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

239

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

240

[0, 1, 2, 3, 4, 5]

240

[0, 1, 2, 3, 4, 5]

241

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

241

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

242

[0, 1, 2, 3, 4]

242

[0, 1, 2, 3, 4]

243

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

243

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

244

[0, 1, 2]

244

[0, 1, 2]

245

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

245

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

246

[2]

246

[2]

247

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

247

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

248

[3, 4, 5]

248

[3, 4, 5]

249

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

249

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

250

[3, 4]

250

[3, 4]

251

252

Discontiguous cases:

252

Discontiguous cases:

253

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

253

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

254

[1, 3, 5]

254

[1, 3, 5]

255

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

255

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

256

[1]

256

[1]

257

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

257

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

258

[3, 5]

258

[3, 5]

259

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

259

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

260

[3, 5]

260

[3, 5]

261

"""

261

"""

262

length = revlog.length

262

length = revlog.length

263

264

if endidx is None:

264

if endidx is None:

265

endidx = len(revs)

265

endidx = len(revs)

266

267

# If we have a non-emtpy delta candidate, there are nothing to trim

267

# If we have a non-emtpy delta candidate, there are nothing to trim

268

if revs[endidx - 1] < len(revlog):

268

if revs[endidx - 1] < len(revlog):

269

# Trim empty revs at the end, except the very first revision of a chain

269

# Trim empty revs at the end, except the very first revision of a chain

270

while (endidx > 1

270

while (endidx > 1

271

and endidx > startidx

271

and endidx > startidx

272

and length(revs[endidx - 1]) == 0):

272

and length(revs[endidx - 1]) == 0):

273

endidx -= 1

273

endidx -= 1

274

275

return revs[startidx:endidx]

275

return revs[startidx:endidx]

276

277

def _segmentspan(revlog, revs, deltainfo=None):

277

def _segmentspan(revlog, revs, deltainfo=None):

278

"""Get the byte span of a segment of revisions

278

"""Get the byte span of a segment of revisions

279

280

revs is a sorted array of revision numbers

280

revs is a sorted array of revision numbers

281

282

>>> revlog = _testrevlog([

282

>>> revlog = _testrevlog([

283

... 5, #0

283

... 5, #0

284

... 10, #1

284

... 10, #1

285

... 12, #2

285

... 12, #2

286

... 12, #3 (empty)

286

... 12, #3 (empty)

287

... 17, #4

287

... 17, #4

288

... ])

288

... ])

289

290

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

290

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

291

17

291

17

292

>>> _segmentspan(revlog, [0, 4])

292

>>> _segmentspan(revlog, [0, 4])

293

17

293

17

294

>>> _segmentspan(revlog, [3, 4])

294

>>> _segmentspan(revlog, [3, 4])

295

5

295

5

296

>>> _segmentspan(revlog, [1, 2, 3,])

296

>>> _segmentspan(revlog, [1, 2, 3,])

297

7

297

7

298

>>> _segmentspan(revlog, [1, 3])

298

>>> _segmentspan(revlog, [1, 3])

299

7

299

7

300

"""

300

"""

301

if not revs:

301

if not revs:

302

return 0

302

return 0

303

if deltainfo is not None and len(revlog) <= revs[-1]:

303

if deltainfo is not None and len(revlog) <= revs[-1]:

304

if len(revs) == 1:

304

if len(revs) == 1:

305

return deltainfo.deltalen

305

return deltainfo.deltalen

306

offset = revlog.end(len(revlog) - 1)

306

offset = revlog.end(len(revlog) - 1)

307

end = deltainfo.deltalen + offset

307

end = deltainfo.deltalen + offset

308

else:

308

else:

309

end = revlog.end(revs[-1])

309

end = revlog.end(revs[-1])

310

return end - revlog.start(revs[0])

310

return end - revlog.start(revs[0])

311

312

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

312

def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):

313

"""slice revs to reduce the amount of unrelated data to be read from disk.

313

"""slice revs to reduce the amount of unrelated data to be read from disk.

314

315

``revs`` is sliced into groups that should be read in one time.

315

``revs`` is sliced into groups that should be read in one time.

316

Assume that revs are sorted.

316

Assume that revs are sorted.

317

318

The initial chunk is sliced until the overall density (payload/chunks-span

318

The initial chunk is sliced until the overall density (payload/chunks-span

319

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

319

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

320

`revlog._srmingapsize` is skipped.

320

`revlog._srmingapsize` is skipped.

321

322

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

322

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

323

For consistency with other slicing choice, this limit won't go lower than

323

For consistency with other slicing choice, this limit won't go lower than

324

`revlog._srmingapsize`.

324

`revlog._srmingapsize`.

325

326

If individual revisions chunk are larger than this limit, they will still

326

If individual revisions chunk are larger than this limit, they will still

327

be raised individually.

327

be raised individually.

328

329

>>> revlog = _testrevlog([

329

>>> revlog = _testrevlog([

330

... 5, #00 (5)

330

... 5, #00 (5)

331

... 10, #01 (5)

331

... 10, #01 (5)

332

... 12, #02 (2)

332

... 12, #02 (2)

333

... 12, #03 (empty)

333

... 12, #03 (empty)

334

... 27, #04 (15)

334

... 27, #04 (15)

335

... 31, #05 (4)

335

... 31, #05 (4)

336

... 31, #06 (empty)

336

... 31, #06 (empty)

337

... 42, #07 (11)

337

... 42, #07 (11)

338

... 47, #08 (5)

338

... 47, #08 (5)

339

... 47, #09 (empty)

339

... 47, #09 (empty)

340

... 48, #10 (1)

340

... 48, #10 (1)

341

... 51, #11 (3)

341

... 51, #11 (3)

342

... 74, #12 (23)

342

... 74, #12 (23)

343

... 85, #13 (11)

343

... 85, #13 (11)

344

... 86, #14 (1)

344

... 86, #14 (1)

345

... 91, #15 (5)

345

... 91, #15 (5)

346

... ])

346

... ])

347

348

>>> list(_slicechunk(revlog, list(range(16))))

348

>>> list(_slicechunk(revlog, list(range(16))))

349

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

349

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

350

>>> list(_slicechunk(revlog, [0, 15]))

350

>>> list(_slicechunk(revlog, [0, 15]))

351

[[0], [15]]

351

[[0], [15]]

352

>>> list(_slicechunk(revlog, [0, 11, 15]))

352

>>> list(_slicechunk(revlog, [0, 11, 15]))

353

[[0], [11], [15]]

353

[[0], [11], [15]]

354

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

354

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

355

[[0], [11, 13, 15]]

355

[[0], [11, 13, 15]]

356

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

356

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

357

[[1, 2], [5, 8, 10, 11], [14]]

357

[[1, 2], [5, 8, 10, 11], [14]]

358

359

Slicing with a maximum chunk size

359

Slicing with a maximum chunk size

360

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

360

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

361

[[0], [11], [13], [15]]

361

[[0], [11], [13], [15]]

362

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

362

>>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

363

[[0], [11], [13, 15]]

363

[[0], [11], [13, 15]]

364

"""

364

"""

365

if targetsize is not None:

365

if targetsize is not None:

366

targetsize = max(targetsize, revlog._srmingapsize)

366

targetsize = max(targetsize, revlog._srmingapsize)

367

# targetsize should not be specified when evaluating delta candidates:

367

# targetsize should not be specified when evaluating delta candidates:

368

# * targetsize is used to ensure we stay within specification when reading,

368

# * targetsize is used to ensure we stay within specification when reading,

369

# * deltainfo is used to pick are good delta chain when writing.

369

# * deltainfo is used to pick are good delta chain when writing.

370

if not (deltainfo is None or targetsize is None):

370

if not (deltainfo is None or targetsize is None):

371

msg = 'cannot use `targetsize` with a `deltainfo`'

371

msg = 'cannot use `targetsize` with a `deltainfo`'

372

raise error.ProgrammingError(msg)

372

raise error.ProgrammingError(msg)

373

for chunk in _slicechunktodensity(revlog, revs,

373

for chunk in _slicechunktodensity(revlog, revs,

374

deltainfo,

374

deltainfo,

375

revlog._srdensitythreshold,

375

revlog._srdensitythreshold,

376

revlog._srmingapsize):

376

revlog._srmingapsize):

377

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

377

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

378

yield subchunk

378

yield subchunk

379

380

def _slicechunktosize(revlog, revs, targetsize=None):

380

def _slicechunktosize(revlog, revs, targetsize=None):

381

"""slice revs to match the target size

381

"""slice revs to match the target size

382

383

This is intended to be used on chunk that density slicing selected by that

383

This is intended to be used on chunk that density slicing selected by that

384

are still too large compared to the read garantee of revlog. This might

384

are still too large compared to the read garantee of revlog. This might

385

happens when "minimal gap size" interrupted the slicing or when chain are

385

happens when "minimal gap size" interrupted the slicing or when chain are

386

built in a way that create large blocks next to each other.

386

built in a way that create large blocks next to each other.

387

388

>>> revlog = _testrevlog([

388

>>> revlog = _testrevlog([

389

... 3, #0 (3)

389

... 3, #0 (3)

390

... 5, #1 (2)

390

... 5, #1 (2)

391

... 6, #2 (1)

391

... 6, #2 (1)

392

... 8, #3 (2)

392

... 8, #3 (2)

393

... 8, #4 (empty)

393

... 8, #4 (empty)

394

... 11, #5 (3)

394

... 11, #5 (3)

395

... 12, #6 (1)

395

... 12, #6 (1)

396

... 13, #7 (1)

396

... 13, #7 (1)

397

... 14, #8 (1)

397

... 14, #8 (1)

398

... ])

398

... ])

399

400

Cases where chunk is already small enough

400

Cases where chunk is already small enough

401

>>> list(_slicechunktosize(revlog, [0], 3))

401

>>> list(_slicechunktosize(revlog, [0], 3))

402

[[0]]

402

[[0]]

403

>>> list(_slicechunktosize(revlog, [6, 7], 3))

403

>>> list(_slicechunktosize(revlog, [6, 7], 3))

404

[[6, 7]]

404

[[6, 7]]

405

>>> list(_slicechunktosize(revlog, [0], None))

405

>>> list(_slicechunktosize(revlog, [0], None))

406

[[0]]

406

[[0]]

407

>>> list(_slicechunktosize(revlog, [6, 7], None))

407

>>> list(_slicechunktosize(revlog, [6, 7], None))

408

[[6, 7]]

408

[[6, 7]]

409

410

cases where we need actual slicing

410

cases where we need actual slicing

411

>>> list(_slicechunktosize(revlog, [0, 1], 3))

411

>>> list(_slicechunktosize(revlog, [0, 1], 3))

412

[[0], [1]]

412

[[0], [1]]

413

>>> list(_slicechunktosize(revlog, [1, 3], 3))

413

>>> list(_slicechunktosize(revlog, [1, 3], 3))

414

[[1], [3]]

414

[[1], [3]]

415

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

415

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

416

[[1, 2], [3]]

416

[[1, 2], [3]]

417

>>> list(_slicechunktosize(revlog, [3, 5], 3))

417

>>> list(_slicechunktosize(revlog, [3, 5], 3))

418

[[3], [5]]

418

[[3], [5]]

419

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

419

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

420

[[3], [5]]

420

[[3], [5]]

421

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

421

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

422

[[5], [6, 7, 8]]

422

[[5], [6, 7, 8]]

423

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

423

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

424

[[0], [1, 2], [3], [5], [6, 7, 8]]

424

[[0], [1, 2], [3], [5], [6, 7, 8]]

425

426

Case with too large individual chunk (must return valid chunk)

426

Case with too large individual chunk (must return valid chunk)

427

>>> list(_slicechunktosize(revlog, [0, 1], 2))

427

>>> list(_slicechunktosize(revlog, [0, 1], 2))

428

[[0], [1]]

428

[[0], [1]]

429

>>> list(_slicechunktosize(revlog, [1, 3], 1))

429

>>> list(_slicechunktosize(revlog, [1, 3], 1))

430

[[1], [3]]

430

[[1], [3]]

431

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

431

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

432

[[3], [5]]

432

[[3], [5]]

433

"""

433

"""

434

assert targetsize is None or 0 <= targetsize

434

assert targetsize is None or 0 <= targetsize

435

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

435

if targetsize is None or _segmentspan(revlog, revs) <= targetsize:

436

yield revs

436

yield revs

437

return

437

return

438

439

startrevidx = 0

439

startrevidx = 0

440

startdata = revlog.start(revs[0])

440

startdata = revlog.start(revs[0])

441

endrevidx = 0

441

endrevidx = 0

442

iterrevs = enumerate(revs)

442

iterrevs = enumerate(revs)

443

next(iterrevs) # skip first rev.

443

next(iterrevs) # skip first rev.

444

for idx, r in iterrevs:

444

for idx, r in iterrevs:

445

span = revlog.end(r) - startdata

445

span = revlog.end(r) - startdata

446

if span <= targetsize:

446

if span <= targetsize:

447

endrevidx = idx

447

endrevidx = idx

448

else:

448

else:

449

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

449

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

450

if chunk:

450

if chunk:

451

yield chunk

451

yield chunk

452

startrevidx = idx

452

startrevidx = idx

453

startdata = revlog.start(r)

453

startdata = revlog.start(r)

454

endrevidx = idx

454

endrevidx = idx

455

yield _trimchunk(revlog, revs, startrevidx)

455

yield _trimchunk(revlog, revs, startrevidx)

456

457

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

457

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

458

mingapsize=0):

458

mingapsize=0):

459

"""slice revs to reduce the amount of unrelated data to be read from disk.

459

"""slice revs to reduce the amount of unrelated data to be read from disk.

460

461

``revs`` is sliced into groups that should be read in one time.

461

``revs`` is sliced into groups that should be read in one time.

462

Assume that revs are sorted.

462

Assume that revs are sorted.

463

464

``deltainfo`` is a _deltainfo instance of a revision that we would append

464

``deltainfo`` is a _deltainfo instance of a revision that we would append

465

to the top of the revlog.

465

to the top of the revlog.

466

467

The initial chunk is sliced until the overall density (payload/chunks-span

467

The initial chunk is sliced until the overall density (payload/chunks-span

468

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

468

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

469

skipped.

469

skipped.

470

471

>>> revlog = _testrevlog([

471

>>> revlog = _testrevlog([

472

... 5, #00 (5)

472

... 5, #00 (5)

473

... 10, #01 (5)

473

... 10, #01 (5)

474

... 12, #02 (2)

474

... 12, #02 (2)

475

... 12, #03 (empty)

475

... 12, #03 (empty)

476

... 27, #04 (15)

476

... 27, #04 (15)

477

... 31, #05 (4)

477

... 31, #05 (4)

478

... 31, #06 (empty)

478

... 31, #06 (empty)

479

... 42, #07 (11)

479

... 42, #07 (11)

480

... 47, #08 (5)

480

... 47, #08 (5)

481

... 47, #09 (empty)

481

... 47, #09 (empty)

482

... 48, #10 (1)

482

... 48, #10 (1)

483

... 51, #11 (3)

483

... 51, #11 (3)

484

... 74, #12 (23)

484

... 74, #12 (23)

485

... 85, #13 (11)

485

... 85, #13 (11)

486

... 86, #14 (1)

486

... 86, #14 (1)

487

... 91, #15 (5)

487

... 91, #15 (5)

488

... ])

488

... ])

489

490

>>> list(_slicechunktodensity(revlog, list(range(16))))

490

>>> list(_slicechunktodensity(revlog, list(range(16))))

491

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

491

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

492

>>> list(_slicechunktodensity(revlog, [0, 15]))

492

>>> list(_slicechunktodensity(revlog, [0, 15]))

493

[[0], [15]]

493

[[0], [15]]

494

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

494

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

495

[[0], [11], [15]]

495

[[0], [11], [15]]

496

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

496

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

497

[[0], [11, 13, 15]]

497

[[0], [11, 13, 15]]

498

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

498

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

499

[[1, 2], [5, 8, 10, 11], [14]]

499

[[1, 2], [5, 8, 10, 11], [14]]

500

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

500

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

501

... mingapsize=20))

501

... mingapsize=20))

502

[[1, 2, 3, 5, 8, 10, 11], [14]]

502

[[1, 2, 3, 5, 8, 10, 11], [14]]

503

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

503

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

504

... targetdensity=0.95))

504

... targetdensity=0.95))

505

[[1, 2], [5], [8, 10, 11], [14]]

505

[[1, 2], [5], [8, 10, 11], [14]]

506

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

506

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

507

... targetdensity=0.95, mingapsize=12))

507

... targetdensity=0.95, mingapsize=12))

508

[[1, 2], [5, 8, 10, 11], [14]]

508

[[1, 2], [5, 8, 10, 11], [14]]

509

"""

509

"""

510

start = revlog.start

510

start = revlog.start

511

length = revlog.length

511

length = revlog.length

512

513

if len(revs) <= 1:

513

if len(revs) <= 1:

514

yield revs

514

yield revs

515

return

515

return

516

517

nextrev = len(revlog)

517

nextrev = len(revlog)

518

nextoffset = revlog.end(nextrev - 1)

518

nextoffset = revlog.end(nextrev - 1)

519

520

if deltainfo is None:

520

if deltainfo is None:

521

deltachainspan = _segmentspan(revlog, revs)

521

deltachainspan = _segmentspan(revlog, revs)

522

chainpayload = sum(length(r) for r in revs)

522

chainpayload = sum(length(r) for r in revs)

523

else:

523

else:

524

deltachainspan = deltainfo.distance

524

deltachainspan = deltainfo.distance

525

chainpayload = deltainfo.compresseddeltalen

525

chainpayload = deltainfo.compresseddeltalen

526

527

if deltachainspan < mingapsize:

527

if deltachainspan < mingapsize:

528

yield revs

528

yield revs

529

return

529

return

530

531

readdata = deltachainspan

531

readdata = deltachainspan

532

533

if deltachainspan:

533

if deltachainspan:

534

density = chainpayload / float(deltachainspan)

534

density = chainpayload / float(deltachainspan)

535

else:

535

else:

536

density = 1.0

536

density = 1.0

537

538

if density >= targetdensity:

538

if density >= targetdensity:

539

yield revs

539

yield revs

540

return

540

return

541

542

if deltainfo is not None and deltainfo.deltalen:

542

if deltainfo is not None and deltainfo.deltalen:

543

revs = list(revs)

543

revs = list(revs)

544

revs.append(nextrev)

544

revs.append(nextrev)

545

546

# Store the gaps in a heap to have them sorted by decreasing size

546

# Store the gaps in a heap to have them sorted by decreasing size

547

gapsheap = []

547

gapsheap = []

548

heapq.heapify(gapsheap)

548

heapq.heapify(gapsheap)

549

prevend = None

549

prevend = None

550

for i, rev in enumerate(revs):

550

for i, rev in enumerate(revs):

551

if rev < nextrev:

551

if rev < nextrev:

552

revstart = start(rev)

552

revstart = start(rev)

553

revlen = length(rev)

553

revlen = length(rev)

554

else:

554

else:

555

revstart = nextoffset

555

revstart = nextoffset

556

revlen = deltainfo.deltalen

556

revlen = deltainfo.deltalen

557

558

# Skip empty revisions to form larger holes

558

# Skip empty revisions to form larger holes

559

if revlen == 0:

559

if revlen == 0:

560

continue

560

continue

561

562

if prevend is not None:

562

if prevend is not None:

563

gapsize = revstart - prevend

563

gapsize = revstart - prevend

564

# only consider holes that are large enough

564

# only consider holes that are large enough

565

if gapsize > mingapsize:

565

if gapsize > mingapsize:

566

heapq.heappush(gapsheap, (-gapsize, i))

566

heapq.heappush(gapsheap, (-gapsize, i))

567

568

prevend = revstart + revlen

568

prevend = revstart + revlen

569

570

# Collect the indices of the largest holes until the density is acceptable

570

# Collect the indices of the largest holes until the density is acceptable

571

indicesheap = []

571

indicesheap = []

572

heapq.heapify(indicesheap)

572

heapq.heapify(indicesheap)

573

while gapsheap and density < targetdensity:

573

while gapsheap and density < targetdensity:

574

oppgapsize, gapidx = heapq.heappop(gapsheap)

574

oppgapsize, gapidx = heapq.heappop(gapsheap)

575

576

heapq.heappush(indicesheap, gapidx)

576

heapq.heappush(indicesheap, gapidx)

577

578

# the gap sizes are stored as negatives to be sorted decreasingly

578

# the gap sizes are stored as negatives to be sorted decreasingly

579

# by the heap

579

# by the heap

580

readdata -= (-oppgapsize)

580

readdata -= (-oppgapsize)

581

if readdata > 0:

581

if readdata > 0:

582

density = chainpayload / float(readdata)

582

density = chainpayload / float(readdata)

583

else:

583

else:

584

density = 1.0

584

density = 1.0

585

586

# Cut the revs at collected indices

586

# Cut the revs at collected indices

587

previdx = 0

587

previdx = 0

588

while indicesheap:

588

while indicesheap:

589

idx = heapq.heappop(indicesheap)

589

idx = heapq.heappop(indicesheap)

590

591

chunk = _trimchunk(revlog, revs, previdx, idx)

591

chunk = _trimchunk(revlog, revs, previdx, idx)

592

if chunk:

592

if chunk:

593

yield chunk

593

yield chunk

594

595

previdx = idx

595

previdx = idx

596

597

chunk = _trimchunk(revlog, revs, previdx)

597

chunk = _trimchunk(revlog, revs, previdx)

598

if chunk:

598

if chunk:

599

yield chunk

599

yield chunk

600

601

@attr.s(slots=True, frozen=True)

601

@attr.s(slots=True, frozen=True)

602

class _deltainfo(object):

602

class _deltainfo(object):

603

distance = attr.ib()

603

distance = attr.ib()

604

deltalen = attr.ib()

604

deltalen = attr.ib()

605

data = attr.ib()

605

data = attr.ib()

606

base = attr.ib()

606

base = attr.ib()

607

chainbase = attr.ib()

607

chainbase = attr.ib()

608

chainlen = attr.ib()

608

chainlen = attr.ib()

609

compresseddeltalen = attr.ib()

609

compresseddeltalen = attr.ib()

610

snapshotdepth = attr.ib()

610

snapshotdepth = attr.ib()

611

612

class _deltacomputer(object):

612

class _deltacomputer(object):

613

def __init__(self, revlog):

613

def __init__(self, revlog):

614

self.revlog = revlog

614

self.revlog = revlog

615

616

def _getcandidaterevs(self, p1, p2, cachedelta):

616

def _getcandidaterevs(self, p1, p2, cachedelta):

617

"""

617

"""

618

Provides revisions that present an interest to be diffed against,

618

Provides revisions that present an interest to be diffed against,

619

grouped by level of easiness.

619

grouped by level of easiness.

620

"""

620

"""

621

revlog = self.revlog

621

revlog = self.revlog

622

gdelta = revlog._generaldelta

622

gdelta = revlog._generaldelta

623

curr = len(revlog)

623

curr = len(revlog)

624

prev = curr - 1

624

prev = curr - 1

625

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

625

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

626

627

# should we try to build a delta?

627

# should we try to build a delta?

628

if prev != nullrev and revlog.storedeltachains:

628

if prev != nullrev and revlog.storedeltachains:

629

tested = set()

629

tested = set()

630

# This condition is true most of the time when processing

630

# This condition is true most of the time when processing

631

# changegroup data into a generaldelta repo. The only time it

631

# changegroup data into a generaldelta repo. The only time it

632

# isn't true is if this is the first revision in a delta chain

632

# isn't true is if this is the first revision in a delta chain

633

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

633

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

634

if cachedelta and gdelta and revlog._lazydeltabase:

634

if cachedelta and gdelta and revlog._lazydeltabase:

635

# Assume what we received from the server is a good choice

635

# Assume what we received from the server is a good choice

636

# build delta will reuse the cache

636

# build delta will reuse the cache

637

yield (cachedelta[0],)

637

yield (cachedelta[0],)

638

tested.add(cachedelta[0])

638

tested.add(cachedelta[0])

639

640

if gdelta:

640

if gdelta:

641

# exclude already lazy tested base if any

641

# exclude already lazy tested base if any

642

parents = [p for p in (p1r, p2r)

642

parents = [p for p in (p1r, p2r)

643

if p != nullrev and p not in tested]

643

if p != nullrev and p not in tested]

644

645

if not revlog._deltabothparents and len(parents) == 2:

645

if not revlog._deltabothparents and len(parents) == 2:

646

parents.sort()

646

parents.sort()

647

# To minimize the chance of having to build a fulltext,

647

# To minimize the chance of having to build a fulltext,

648

# pick first whichever parent is closest to us (max rev)

648

# pick first whichever parent is closest to us (max rev)

649

yield (parents[1],)

649

yield (parents[1],)

650

# then the other one (min rev) if the first did not fit

650

# then the other one (min rev) if the first did not fit

651

yield (parents[0],)

651

yield (parents[0],)

652

tested.update(parents)

652

tested.update(parents)

653

elif len(parents) > 0:

653

elif len(parents) > 0:

654

# Test all parents (1 or 2), and keep the best candidate

654

# Test all parents (1 or 2), and keep the best candidate

655

yield parents

655

yield parents

656

tested.update(parents)

656

tested.update(parents)

657

658

if prev not in tested:

658

if prev not in tested:

659

# other approach failed try against prev to hopefully save us a

659

# other approach failed try against prev to hopefully save us a

660

# fulltext.

660

# fulltext.

661

yield (prev,)

661

yield (prev,)

662

tested.add(prev)

662

tested.add(prev)

663

664

def buildtext(self, revinfo, fh):

664

def buildtext(self, revinfo, fh):

665

"""Builds a fulltext version of a revision

665

"""Builds a fulltext version of a revision

666

667

revinfo: _revisioninfo instance that contains all needed info

667

revinfo: _revisioninfo instance that contains all needed info

668

fh: file handle to either the .i or the .d revlog file,

668

fh: file handle to either the .i or the .d revlog file,

669

depending on whether it is inlined or not

669

depending on whether it is inlined or not

670

"""

670

"""

671

btext = revinfo.btext

671

btext = revinfo.btext

672

if btext[0] is not None:

672

if btext[0] is not None:

673

return btext[0]

673

return btext[0]

674

675

revlog = self.revlog

675

revlog = self.revlog

676

cachedelta = revinfo.cachedelta

676

cachedelta = revinfo.cachedelta

677

flags = revinfo.flags

677

flags = revinfo.flags

678

node = revinfo.node

678

node = revinfo.node

679

680

baserev = cachedelta[0]

680

baserev = cachedelta[0]

681

delta = cachedelta[1]

681

delta = cachedelta[1]

682

# special case deltas which replace entire base; no need to decode

682

# special case deltas which replace entire base; no need to decode

683

# base revision. this neatly avoids censored bases, which throw when

683

# base revision. this neatly avoids censored bases, which throw when

684

# they're decoded.

684

# they're decoded.

685

hlen = struct.calcsize(">lll")

685

hlen = struct.calcsize(">lll")

686

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

686

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

687

len(delta) - hlen):

687

len(delta) - hlen):

688

btext[0] = delta[hlen:]

688

btext[0] = delta[hlen:]

689

else:

689

else:

690

# deltabase is rawtext before changed by flag processors, which is

690

# deltabase is rawtext before changed by flag processors, which is

691

# equivalent to non-raw text

691

# equivalent to non-raw text

692

basetext = revlog.revision(baserev, _df=fh, raw=False)

692

basetext = revlog.revision(baserev, _df=fh, raw=False)

693

btext[0] = mdiff.patch(basetext, delta)

693

btext[0] = mdiff.patch(basetext, delta)

694

695

try:

695

try:

696

res = revlog._processflags(btext[0], flags, 'read', raw=True)

696

res = revlog._processflags(btext[0], flags, 'read', raw=True)

697

btext[0], validatehash = res

697

btext[0], validatehash = res

698

if validatehash:

698

if validatehash:

699

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

699

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

700

if flags & REVIDX_ISCENSORED:

700

if flags & REVIDX_ISCENSORED:

701

raise RevlogError(_('node %s is not censored') % node)

701

raise RevlogError(_('node %s is not censored') % node)

702

except CensoredNodeError:

702

except CensoredNodeError:

703

# must pass the censored index flag to add censored revisions

703

# must pass the censored index flag to add censored revisions

704

if not flags & REVIDX_ISCENSORED:

704

if not flags & REVIDX_ISCENSORED:

705

raise

705

raise

706

return btext[0]

706

return btext[0]

707

708

def _builddeltadiff(self, base, revinfo, fh):

708

def _builddeltadiff(self, base, revinfo, fh):

709

revlog = self.revlog

709

revlog = self.revlog

710

t = self.buildtext(revinfo, fh)

710

t = self.buildtext(revinfo, fh)

711

if revlog.iscensored(base):

711

if revlog.iscensored(base):

712

# deltas based on a censored revision must replace the

712

# deltas based on a censored revision must replace the

713

# full content in one patch, so delta works everywhere

713

# full content in one patch, so delta works everywhere

714

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

714

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

715

delta = header + t

715

delta = header + t

716

else:

716

else:

717

ptext = revlog.revision(base, _df=fh, raw=True)

717

ptext = revlog.revision(base, _df=fh, raw=True)

718

delta = mdiff.textdiff(ptext, t)

718

delta = mdiff.textdiff(ptext, t)

719

720

return delta

720

return delta

721

722

def _builddeltainfo(self, revinfo, base, fh):

722

def _builddeltainfo(self, revinfo, base, fh):

723

# can we use the cached delta?

723

# can we use the cached delta?

724

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

724

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

725

delta = revinfo.cachedelta[1]

725

delta = revinfo.cachedelta[1]

726

else:

726

else:

727

delta = self._builddeltadiff(base, revinfo, fh)

727

delta = self._builddeltadiff(base, revinfo, fh)

728

revlog = self.revlog

728

revlog = self.revlog

729

header, data = revlog.compress(delta)

729

header, data = revlog.compress(delta)

730

deltalen = len(header) + len(data)

730

deltalen = len(header) + len(data)

731

chainbase = revlog.chainbase(base)

731

chainbase = revlog.chainbase(base)

732

offset = revlog.end(len(revlog) - 1)

732

offset = revlog.end(len(revlog) - 1)

733

dist = deltalen + offset - revlog.start(chainbase)

733

dist = deltalen + offset - revlog.start(chainbase)

734

if revlog._generaldelta:

734

if revlog._generaldelta:

735

deltabase = base

735

deltabase = base

736

else:

736

else:

737

deltabase = chainbase

737

deltabase = chainbase

738

chainlen, compresseddeltalen = revlog._chaininfo(base)

738

chainlen, compresseddeltalen = revlog._chaininfo(base)

739

chainlen += 1

739

chainlen += 1

740

compresseddeltalen += deltalen

740

compresseddeltalen += deltalen

741

742

revlog = self.revlog

742

revlog = self.revlog

743

snapshotdepth = None

743

snapshotdepth = None

744

if deltabase == nullrev:

744

if deltabase == nullrev:

745

snapshotdepth = 0

745

snapshotdepth = 0

746

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

746

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

747

# A delta chain should always be one full snapshot,

747

# A delta chain should always be one full snapshot,

748

# zero or more semi-snapshots, and zero or more deltas

748

# zero or more semi-snapshots, and zero or more deltas

749

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

749

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

750

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

750

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

751

snapshotdepth = len(revlog._deltachain(deltabase)[0])

751

snapshotdepth = len(revlog._deltachain(deltabase)[0])

752

753

return _deltainfo(dist, deltalen, (header, data), deltabase,

753

return _deltainfo(dist, deltalen, (header, data), deltabase,

754

chainbase, chainlen, compresseddeltalen,

754

chainbase, chainlen, compresseddeltalen,

755

snapshotdepth)

755

snapshotdepth)

756

757

def finddeltainfo(self, revinfo, fh):

757

def finddeltainfo(self, revinfo, fh):

758

"""Find an acceptable delta against a candidate revision

758

"""Find an acceptable delta against a candidate revision

759

760

revinfo: information about the revision (instance of _revisioninfo)

760

revinfo: information about the revision (instance of _revisioninfo)

761

fh: file handle to either the .i or the .d revlog file,

761

fh: file handle to either the .i or the .d revlog file,

762

depending on whether it is inlined or not

762

depending on whether it is inlined or not

763

764

Returns the first acceptable candidate revision, as ordered by

764

Returns the first acceptable candidate revision, as ordered by

765

_getcandidaterevs

765

_getcandidaterevs

766

"""

766

"""

767

if not revinfo.textlen:

767

if not revinfo.textlen:

768

return None # empty file do not need delta

768

return None # empty file do not need delta

769

770

cachedelta = revinfo.cachedelta

770

cachedelta = revinfo.cachedelta

771

p1 = revinfo.p1

771

p1 = revinfo.p1

772

p2 = revinfo.p2

772

p2 = revinfo.p2

773

revlog = self.revlog

773

revlog = self.revlog

774

775

deltalength = self.revlog.length

775

deltalength = self.revlog.length

776

deltaparent = self.revlog.deltaparent

776

deltaparent = self.revlog.deltaparent

777

778

deltainfo = None

778

deltainfo = None

779

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

779

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

780

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

780

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

781

# filter out delta base that will never produce good delta

781

# filter out delta base that will never produce good delta

782

candidaterevs = [r for r in candidaterevs

782

candidaterevs = [r for r in candidaterevs

783

if self.revlog.length(r) <= deltas_limit]

783

if self.revlog.length(r) <= deltas_limit]

784

nominateddeltas = []

784

nominateddeltas = []

785

for candidaterev in candidaterevs:

785

for candidaterev in candidaterevs:

786

# skip over empty delta (no need to include them in a chain)

786

# skip over empty delta (no need to include them in a chain)

787

while candidaterev != nullrev and not deltalength(candidaterev):

787

while candidaterev != nullrev and not deltalength(candidaterev):

788

candidaterev = deltaparent(candidaterev)

788

candidaterev = deltaparent(candidaterev)

789

# no need to try a delta against nullid, this will be handled

789

# no need to try a delta against nullid, this will be handled

790

# by fulltext later.

790

# by fulltext later.

791

if candidaterev == nullrev:

791

if candidaterev == nullrev:

792

continue

792

continue

793

# no delta for rawtext-changing revs (see "candelta" for why)

793

# no delta for rawtext-changing revs (see "candelta" for why)

794

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

794

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

795

continue

795

continue

796

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

796

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

797

if revlog._isgooddeltainfo(candidatedelta, revinfo):

797

if revlog._isgooddeltainfo(candidatedelta, revinfo):

798

nominateddeltas.append(candidatedelta)

798

nominateddeltas.append(candidatedelta)

799

if nominateddeltas:

799

if nominateddeltas:

800

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

800

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

801

break

801

break

802

803

return deltainfo

803

return deltainfo

804

805

@attr.s(slots=True, frozen=True)

805

@attr.s(slots=True, frozen=True)

806

class _revisioninfo(object):

806

class _revisioninfo(object):

807

"""Information about a revision that allows building its fulltext

807

"""Information about a revision that allows building its fulltext

808

node: expected hash of the revision

808

node: expected hash of the revision

809

p1, p2: parent revs of the revision

809

p1, p2: parent revs of the revision

810

btext: built text cache consisting of a one-element list

810

btext: built text cache consisting of a one-element list

811

cachedelta: (baserev, uncompressed_delta) or None

811

cachedelta: (baserev, uncompressed_delta) or None

812

flags: flags associated to the revision storage

812

flags: flags associated to the revision storage

813

814

One of btext[0] or cachedelta must be set.

814

One of btext[0] or cachedelta must be set.

815

"""

815

"""

816

node = attr.ib()

816

node = attr.ib()

817

p1 = attr.ib()

817

p1 = attr.ib()

818

p2 = attr.ib()

818

p2 = attr.ib()

819

btext = attr.ib()

819

btext = attr.ib()

820

textlen = attr.ib()

820

textlen = attr.ib()

821

cachedelta = attr.ib()

821

cachedelta = attr.ib()

822

flags = attr.ib()

822

flags = attr.ib()

823

824

# index v0:

824

# index v0:

825

# 4 bytes: offset

825

# 4 bytes: offset

826

# 4 bytes: compressed length

826

# 4 bytes: compressed length

827

# 4 bytes: base rev

827

# 4 bytes: base rev

828

# 4 bytes: link rev

828

# 4 bytes: link rev

829

# 20 bytes: parent 1 nodeid

829

# 20 bytes: parent 1 nodeid

830

# 20 bytes: parent 2 nodeid

830

# 20 bytes: parent 2 nodeid

831

# 20 bytes: nodeid

831

# 20 bytes: nodeid

832

indexformatv0 = struct.Struct(">4l20s20s20s")

832

indexformatv0 = struct.Struct(">4l20s20s20s")

833

indexformatv0_pack = indexformatv0.pack

833

indexformatv0_pack = indexformatv0.pack

834

indexformatv0_unpack = indexformatv0.unpack

834

indexformatv0_unpack = indexformatv0.unpack

835

836

class revlogoldindex(list):

836

class revlogoldindex(list):

837

def __getitem__(self, i):

837

def __getitem__(self, i):

838

if i == -1:

838

if i == -1:

839

return (0, 0, 0, -1, -1, -1, -1, nullid)

839

return (0, 0, 0, -1, -1, -1, -1, nullid)

840

return list.__getitem__(self, i)

840

return list.__getitem__(self, i)

841

842

# maximum <delta-chain-data>/<revision-text-length> ratio

842

# maximum <delta-chain-data>/<revision-text-length> ratio

843

LIMIT_DELTA2TEXT = 2

843

LIMIT_DELTA2TEXT = 2

844

845

class revlogoldio(object):

845

class revlogoldio(object):

846

def __init__(self):

846

def __init__(self):

847

self.size = indexformatv0.size

847

self.size = indexformatv0.size

848

849

def parseindex(self, data, inline):

849

def parseindex(self, data, inline):

850

s = self.size

850

s = self.size

851

index = []

851

index = []

852

nodemap = {nullid: nullrev}

852

nodemap = {nullid: nullrev}

853

n = off = 0

853

n = off = 0

854

l = len(data)

854

l = len(data)

855

while off + s <= l:

855

while off + s <= l:

856

cur = data[off:off + s]

856

cur = data[off:off + s]

857

off += s

857

off += s

858

e = indexformatv0_unpack(cur)

858

e = indexformatv0_unpack(cur)

859

# transform to revlogv1 format

859

# transform to revlogv1 format

860

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

860

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

861

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

861

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

862

index.append(e2)

862

index.append(e2)

863

nodemap[e[6]] = n

863

nodemap[e[6]] = n

864

n += 1

864

n += 1

865

866

return revlogoldindex(index), nodemap, None

866

return revlogoldindex(index), nodemap, None

867

868

def packentry(self, entry, node, version, rev):

868

def packentry(self, entry, node, version, rev):

869

if gettype(entry[0]):

869

if gettype(entry[0]):

870

raise RevlogError(_('index entry flags need revlog version 1'))

870

raise RevlogError(_('index entry flags need revlog version 1'))

871

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

871

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

872

node(entry[5]), node(entry[6]), entry[7])

872

node(entry[5]), node(entry[6]), entry[7])

873

return indexformatv0_pack(*e2)

873

return indexformatv0_pack(*e2)

874

875

# index ng:

875

# index ng:

876

# 6 bytes: offset

876

# 6 bytes: offset

877

# 2 bytes: flags

877

# 2 bytes: flags

878

# 4 bytes: compressed length

878

# 4 bytes: compressed length

879

# 4 bytes: uncompressed length

879

# 4 bytes: uncompressed length

880

# 4 bytes: base rev

880

# 4 bytes: base rev

881

# 4 bytes: link rev

881

# 4 bytes: link rev

882

# 4 bytes: parent 1 rev

882

# 4 bytes: parent 1 rev

883

# 4 bytes: parent 2 rev

883

# 4 bytes: parent 2 rev

884

# 32 bytes: nodeid

884

# 32 bytes: nodeid

885

indexformatng = struct.Struct(">Qiiiiii20s12x")

885

indexformatng = struct.Struct(">Qiiiiii20s12x")

886

indexformatng_pack = indexformatng.pack

886

indexformatng_pack = indexformatng.pack

887

versionformat = struct.Struct(">I")

887

versionformat = struct.Struct(">I")

888

versionformat_pack = versionformat.pack

888

versionformat_pack = versionformat.pack

889

versionformat_unpack = versionformat.unpack

889

versionformat_unpack = versionformat.unpack

890

891

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

891

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

892

# signed integer)

892

# signed integer)

893

_maxentrysize = 0x7fffffff

893

_maxentrysize = 0x7fffffff

894

895

class revlogio(object):

895

class revlogio(object):

896

def __init__(self):

896

def __init__(self):

897

self.size = indexformatng.size

897

self.size = indexformatng.size

898

899

def parseindex(self, data, inline):

899

def parseindex(self, data, inline):

900

# call the C implementation to parse the index data

900

# call the C implementation to parse the index data

901

index, cache = parsers.parse_index2(data, inline)

901

index, cache = parsers.parse_index2(data, inline)

902

return index, getattr(index, 'nodemap', None), cache

902

return index, getattr(index, 'nodemap', None), cache

903

904

def packentry(self, entry, node, version, rev):

904

def packentry(self, entry, node, version, rev):

905

p = indexformatng_pack(*entry)

905

p = indexformatng_pack(*entry)

906

if rev == 0:

906

if rev == 0:

907

p = versionformat_pack(version) + p[4:]

907

p = versionformat_pack(version) + p[4:]

908

return p

908

return p

909

910

class revlog(object):

910

class revlog(object):

911

"""

911

"""

912

the underlying revision storage object

912

the underlying revision storage object

913

914

A revlog consists of two parts, an index and the revision data.

914

A revlog consists of two parts, an index and the revision data.

915

916

The index is a file with a fixed record size containing

916

The index is a file with a fixed record size containing

917

information on each revision, including its nodeid (hash), the

917

information on each revision, including its nodeid (hash), the

918

nodeids of its parents, the position and offset of its data within

918

nodeids of its parents, the position and offset of its data within

919

the data file, and the revision it's based on. Finally, each entry

919

the data file, and the revision it's based on. Finally, each entry

920

contains a linkrev entry that can serve as a pointer to external

920

contains a linkrev entry that can serve as a pointer to external

921

data.

921

data.

922

923

The revision data itself is a linear collection of data chunks.

923

The revision data itself is a linear collection of data chunks.

924

Each chunk represents a revision and is usually represented as a

924

Each chunk represents a revision and is usually represented as a

925

delta against the previous chunk. To bound lookup time, runs of

925

delta against the previous chunk. To bound lookup time, runs of

926

deltas are limited to about 2 times the length of the original

926

deltas are limited to about 2 times the length of the original

927

version data. This makes retrieval of a version proportional to

927

version data. This makes retrieval of a version proportional to

928

its size, or O(1) relative to the number of revisions.

928

its size, or O(1) relative to the number of revisions.

929

930

Both pieces of the revlog are written to in an append-only

930

Both pieces of the revlog are written to in an append-only

931

fashion, which means we never need to rewrite a file to insert or

931

fashion, which means we never need to rewrite a file to insert or

932

remove data, and can use some simple techniques to avoid the need

932

remove data, and can use some simple techniques to avoid the need

933

for locking while reading.

933

for locking while reading.

934

935

If checkambig, indexfile is opened with checkambig=True at

935

If checkambig, indexfile is opened with checkambig=True at

936

writing, to avoid file stat ambiguity.

936

writing, to avoid file stat ambiguity.

937

938

If mmaplargeindex is True, and an mmapindexthreshold is set, the

938

If mmaplargeindex is True, and an mmapindexthreshold is set, the

939

index will be mmapped rather than read if it is larger than the

939

index will be mmapped rather than read if it is larger than the

940

configured threshold.

940

configured threshold.

941

942

If censorable is True, the revlog can have censored revisions.

942

If censorable is True, the revlog can have censored revisions.

943

"""

943

"""

944

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

944

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

945

mmaplargeindex=False, censorable=False):

945

mmaplargeindex=False, censorable=False):

946

"""

946

"""

947

create a revlog object

947

create a revlog object

948

949

opener is a function that abstracts the file opening operation

949

opener is a function that abstracts the file opening operation

950

and can be used to implement COW semantics or the like.

950

and can be used to implement COW semantics or the like.

951

"""

951

"""

952

self.indexfile = indexfile

952

self.indexfile = indexfile

953

self.datafile = datafile or (indexfile[:-2] + ".d")

953

self.datafile = datafile or (indexfile[:-2] + ".d")

954

self.opener = opener

954

self.opener = opener

955

# When True, indexfile is opened with checkambig=True at writing, to

955

# When True, indexfile is opened with checkambig=True at writing, to

956

# avoid file stat ambiguity.

956

# avoid file stat ambiguity.

957

self._checkambig = checkambig

957

self._checkambig = checkambig

958

self._censorable = censorable

958

self._censorable = censorable

959

# 3-tuple of (node, rev, text) for a raw revision.

959

# 3-tuple of (node, rev, text) for a raw revision.

960

self._cache = None

960

self._cache = None

961

# Maps rev to chain base rev.

961

# Maps rev to chain base rev.

962

self._chainbasecache = util.lrucachedict(100)

962

self._chainbasecache = util.lrucachedict(100)

963

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

963

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

964

self._chunkcache = (0, '')

964

self._chunkcache = (0, '')

965

# How much data to read and cache into the raw revlog data cache.

965

# How much data to read and cache into the raw revlog data cache.

966

self._chunkcachesize = 65536

966

self._chunkcachesize = 65536

967

self._maxchainlen = None

967

self._maxchainlen = None

968

self._deltabothparents = True

968

self._deltabothparents = True

969

self.index = []

969

self.index = []

970

# Mapping of partial identifiers to full nodes.

970

# Mapping of partial identifiers to full nodes.

971

self._pcache = {}

971

self._pcache = {}

972

# Mapping of revision integer to full node.

972

# Mapping of revision integer to full node.

973

self._nodecache = {nullid: nullrev}

973

self._nodecache = {nullid: nullrev}

974

self._nodepos = None

974

self._nodepos = None

975

self._compengine = 'zlib'

975

self._compengine = 'zlib'

976

self._maxdeltachainspan = -1

976

self._maxdeltachainspan = -1

977

self._withsparseread = False

977

self._withsparseread = False

978

self._sparserevlog = False

978

self._sparserevlog = False

979

self._srdensitythreshold = 0.50

979

self._srdensitythreshold = 0.50

980

self._srmingapsize = 262144

980

self._srmingapsize = 262144

981

982

mmapindexthreshold = None

982

mmapindexthreshold = None

983

v = REVLOG_DEFAULT_VERSION

983

v = REVLOG_DEFAULT_VERSION

984

opts = getattr(opener, 'options', None)

984

opts = getattr(opener, 'options', None)

985

if opts is not None:

985

if opts is not None:

986

if 'revlogv2' in opts:

986

if 'revlogv2' in opts:

987

# version 2 revlogs always use generaldelta.

987

# version 2 revlogs always use generaldelta.

988

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

988

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

989

elif 'revlogv1' in opts:

989

elif 'revlogv1' in opts:

990

if 'generaldelta' in opts:

990

if 'generaldelta' in opts:

991

v |= FLAG_GENERALDELTA

991

v |= FLAG_GENERALDELTA

992

else:

992

else:

993

v = 0

993

v = 0

994

if 'chunkcachesize' in opts:

994

if 'chunkcachesize' in opts:

995

self._chunkcachesize = opts['chunkcachesize']

995

self._chunkcachesize = opts['chunkcachesize']

996

if 'maxchainlen' in opts:

996

if 'maxchainlen' in opts:

997

self._maxchainlen = opts['maxchainlen']

997

self._maxchainlen = opts['maxchainlen']

998

if 'deltabothparents' in opts:

998

if 'deltabothparents' in opts:

999

self._deltabothparents = opts['deltabothparents']

999

self._deltabothparents = opts['deltabothparents']

1000

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

1000

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

1001

if 'compengine' in opts:

1001

if 'compengine' in opts:

1002

self._compengine = opts['compengine']

1002

self._compengine = opts['compengine']

1003

if 'maxdeltachainspan' in opts:

1003

if 'maxdeltachainspan' in opts:

1004

self._maxdeltachainspan = opts['maxdeltachainspan']

1004

self._maxdeltachainspan = opts['maxdeltachainspan']

1005

if mmaplargeindex and 'mmapindexthreshold' in opts:

1005

if mmaplargeindex and 'mmapindexthreshold' in opts:

1006

mmapindexthreshold = opts['mmapindexthreshold']

1006

mmapindexthreshold = opts['mmapindexthreshold']

1007

self._sparserevlog = bool(opts.get('sparse-revlog', False))

1007

self._sparserevlog = bool(opts.get('sparse-revlog', False))

1008

withsparseread = bool(opts.get('with-sparse-read', False))

1008

withsparseread = bool(opts.get('with-sparse-read', False))

1009

# sparse-revlog forces sparse-read

1009

# sparse-revlog forces sparse-read

1010

self._withsparseread = self._sparserevlog or withsparseread

1010

self._withsparseread = self._sparserevlog or withsparseread

1011

if 'sparse-read-density-threshold' in opts:

1011

if 'sparse-read-density-threshold' in opts:

1012

self._srdensitythreshold = opts['sparse-read-density-threshold']

1012

self._srdensitythreshold = opts['sparse-read-density-threshold']

1013

if 'sparse-read-min-gap-size' in opts:

1013

if 'sparse-read-min-gap-size' in opts:

1014

self._srmingapsize = opts['sparse-read-min-gap-size']

1014

self._srmingapsize = opts['sparse-read-min-gap-size']

1015

1016

if self._chunkcachesize <= 0:

1016

if self._chunkcachesize <= 0:

1017

raise RevlogError(_('revlog chunk cache size %r is not greater '

1017

raise RevlogError(_('revlog chunk cache size %r is not greater '

1018

'than 0') % self._chunkcachesize)

1018

'than 0') % self._chunkcachesize)

1019

elif self._chunkcachesize & (self._chunkcachesize - 1):

1019

elif self._chunkcachesize & (self._chunkcachesize - 1):

1020

raise RevlogError(_('revlog chunk cache size %r is not a power '

1020

raise RevlogError(_('revlog chunk cache size %r is not a power '

1021

'of 2') % self._chunkcachesize)

1021

'of 2') % self._chunkcachesize)

1022

1023

indexdata = ''

1023

indexdata = ''

1024

self._initempty = True

1024

self._initempty = True

1025

try:

1025

try:

1026

with self._indexfp() as f:

1026

with self._indexfp() as f:

1027

if (mmapindexthreshold is not None and

1027

if (mmapindexthreshold is not None and

1028

self.opener.fstat(f).st_size >= mmapindexthreshold):

1028

self.opener.fstat(f).st_size >= mmapindexthreshold):

1029

indexdata = util.buffer(util.mmapread(f))

1029

indexdata = util.buffer(util.mmapread(f))

1030

else:

1030

else:

1031

indexdata = f.read()

1031

indexdata = f.read()

1032

if len(indexdata) > 0:

1032

if len(indexdata) > 0:

1033

v = versionformat_unpack(indexdata[:4])[0]

1033

v = versionformat_unpack(indexdata[:4])[0]

1034

self._initempty = False

1034

self._initempty = False

1035

except IOError as inst:

1035

except IOError as inst:

1036

if inst.errno != errno.ENOENT:

1036

if inst.errno != errno.ENOENT:

1037

raise

1037

raise

1038

1039

self.version = v

1039

self.version = v

1040

self._inline = v & FLAG_INLINE_DATA

1040

self._inline = v & FLAG_INLINE_DATA

1041

self._generaldelta = v & FLAG_GENERALDELTA

1041

self._generaldelta = v & FLAG_GENERALDELTA

1042

flags = v & ~0xFFFF

1042

flags = v & ~0xFFFF

1043

fmt = v & 0xFFFF

1043

fmt = v & 0xFFFF

1044

if fmt == REVLOGV0:

1044

if fmt == REVLOGV0:

1045

if flags:

1045

if flags:

1046

raise RevlogError(_('unknown flags (%#04x) in version %d '

1046

raise RevlogError(_('unknown flags (%#04x) in version %d '

1047

'revlog %s') %

1047

'revlog %s') %

1048

(flags >> 16, fmt, self.indexfile))

1048

(flags >> 16, fmt, self.indexfile))

1049

elif fmt == REVLOGV1:

1049

elif fmt == REVLOGV1:

1050

if flags & ~REVLOGV1_FLAGS:

1050

if flags & ~REVLOGV1_FLAGS:

1051

raise RevlogError(_('unknown flags (%#04x) in version %d '

1051

raise RevlogError(_('unknown flags (%#04x) in version %d '

1052

'revlog %s') %

1052

'revlog %s') %

1053

(flags >> 16, fmt, self.indexfile))

1053

(flags >> 16, fmt, self.indexfile))

1054

elif fmt == REVLOGV2:

1054

elif fmt == REVLOGV2:

1055

if flags & ~REVLOGV2_FLAGS:

1055

if flags & ~REVLOGV2_FLAGS:

1056

raise RevlogError(_('unknown flags (%#04x) in version %d '

1056

raise RevlogError(_('unknown flags (%#04x) in version %d '

1057

'revlog %s') %

1057

'revlog %s') %

1058

(flags >> 16, fmt, self.indexfile))

1058

(flags >> 16, fmt, self.indexfile))

1059

else:

1059

else:

1060

raise RevlogError(_('unknown version (%d) in revlog %s') %

1060

raise RevlogError(_('unknown version (%d) in revlog %s') %

1061

(fmt, self.indexfile))

1061

(fmt, self.indexfile))

1062

1063

self.storedeltachains = True

1063

self.storedeltachains = True

1064

1065

self._io = revlogio()

1065

self._io = revlogio()

1066

if self.version == REVLOGV0:

1066

if self.version == REVLOGV0:

1067

self._io = revlogoldio()

1067

self._io = revlogoldio()

1068

try:

1068

try:

1069

d = self._io.parseindex(indexdata, self._inline)

1069

d = self._io.parseindex(indexdata, self._inline)

1070

except (ValueError, IndexError):

1070

except (ValueError, IndexError):

1071

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1071

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

1072

self.index, nodemap, self._chunkcache = d

1072

self.index, nodemap, self._chunkcache = d

1073

if nodemap is not None:

1073

if nodemap is not None:

1074

self.nodemap = self._nodecache = nodemap

1074

self.nodemap = self._nodecache = nodemap

1075

if not self._chunkcache:

1075

if not self._chunkcache:

1076

self._chunkclear()

1076

self._chunkclear()

1077

# revnum -> (chain-length, sum-delta-length)

1077

# revnum -> (chain-length, sum-delta-length)

1078

self._chaininfocache = {}

1078

self._chaininfocache = {}

1079

# revlog header -> revlog compressor

1079

# revlog header -> revlog compressor

1080

self._decompressors = {}

1080

self._decompressors = {}

1081

1082

@util.propertycache

1082

@util.propertycache

1083

def _compressor(self):

1083

def _compressor(self):

1084

return util.compengines[self._compengine].revlogcompressor()

1084

return util.compengines[self._compengine].revlogcompressor()

1085

1086

def _indexfp(self, mode='r'):

1086

def _indexfp(self, mode='r'):

1087

"""file object for the revlog's index file"""

1087

"""file object for the revlog's index file"""

1088

args = {r'mode': mode}

1088

args = {r'mode': mode}

1089

if mode != 'r':

1089

if mode != 'r':

1090

args[r'checkambig'] = self._checkambig

1090

args[r'checkambig'] = self._checkambig

1091

if mode == 'w':

1091

if mode == 'w':

1092

args[r'atomictemp'] = True

1092

args[r'atomictemp'] = True

1093

return self.opener(self.indexfile, **args)

1093

return self.opener(self.indexfile, **args)

1094

1095

def _datafp(self, mode='r'):

1095

def _datafp(self, mode='r'):

1096

"""file object for the revlog's data file"""

1096

"""file object for the revlog's data file"""

1097

return self.opener(self.datafile, mode=mode)

1097

return self.opener(self.datafile, mode=mode)

1098

1099

@contextlib.contextmanager

1099

@contextlib.contextmanager

1100

def _datareadfp(self, existingfp=None):

1100

def _datareadfp(self, existingfp=None):

1101

"""file object suitable to read data"""

1101

"""file object suitable to read data"""

1102

if existingfp is not None:

1102

if existingfp is not None:

1103

yield existingfp

1103

yield existingfp

1104

else:

1104

else:

1105

if self._inline:

1105

if self._inline:

1106

func = self._indexfp

1106

func = self._indexfp

1107

else:

1107

else:

1108

func = self._datafp

1108

func = self._datafp

1109

with func() as fp:

1109

with func() as fp:

1110

yield fp

1110

yield fp

1111

1112

def tip(self):

1112

def tip(self):

1113

return self.node(len(self.index) - 1)

1113

return self.node(len(self.index) - 1)

1114

def __contains__(self, rev):

1114

def __contains__(self, rev):

1115

return 0 <= rev < len(self)

1115

return 0 <= rev < len(self)

1116

def __len__(self):

1116

def __len__(self):

1117

return len(self.index)

1117

return len(self.index)

1118

def __iter__(self):

1118

def __iter__(self):

1119

return iter(pycompat.xrange(len(self)))

1119

return iter(pycompat.xrange(len(self)))

1120

def revs(self, start=0, stop=None):

1120

def revs(self, start=0, stop=None):

1121

"""iterate over all rev in this revlog (from start to stop)"""

1121

"""iterate over all rev in this revlog (from start to stop)"""

1122

step = 1

1122

step = 1

1123

length = len(self)

1123

length = len(self)

1124

if stop is not None:

1124

if stop is not None:

1125

if start > stop:

1125

if start > stop:

1126

step = -1

1126

step = -1

1127

stop += step

1127

stop += step

1128

if stop > length:

1128

if stop > length:

1129

stop = length

1129

stop = length

1130

else:

1130

else:

1131

stop = length

1131

stop = length

1132

return pycompat.xrange(start, stop, step)

1132

return pycompat.xrange(start, stop, step)

1133

1134

@util.propertycache

1134

@util.propertycache

1135

def nodemap(self):

1135

def nodemap(self):

1136

if self.index:

1136

if self.index:

1137

# populate mapping down to the initial node

1137

# populate mapping down to the initial node

1138

node0 = self.index[0][7] # get around changelog filtering

1138

node0 = self.index[0][7] # get around changelog filtering

1139

self.rev(node0)

1139

self.rev(node0)

1140

return self._nodecache

1140

return self._nodecache

1141

1142

def hasnode(self, node):

1142

def hasnode(self, node):

1143

try:

1143

try:

1144

self.rev(node)

1144

self.rev(node)

1145

return True

1145

return True

1146

except KeyError:

1146

except KeyError:

1147

return False

1147

return False

1148

1149

def candelta(self, baserev, rev):

1149

def candelta(self, baserev, rev):

1150

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1150

"""whether two revisions (baserev, rev) can be delta-ed or not"""

1151

# Disable delta if either rev requires a content-changing flag

1151

# Disable delta if either rev requires a content-changing flag

1152

# processor (ex. LFS). This is because such flag processor can alter

1152

# processor (ex. LFS). This is because such flag processor can alter

1153

# the rawtext content that the delta will be based on, and two clients

1153

# the rawtext content that the delta will be based on, and two clients

1154

# could have a same revlog node with different flags (i.e. different

1154

# could have a same revlog node with different flags (i.e. different

1155

# rawtext contents) and the delta could be incompatible.

1155

# rawtext contents) and the delta could be incompatible.

1156

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1156

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

1157

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1157

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

1158

return False

1158

return False

1159

return True

1159

return True

1160

1161

def clearcaches(self):

1161

def clearcaches(self):

1162

self._cache = None

1162

self._cache = None

1163

self._chainbasecache.clear()

1163

self._chainbasecache.clear()

1164

self._chunkcache = (0, '')

1164

self._chunkcache = (0, '')

1165

self._pcache = {}

1165

self._pcache = {}

1166

1167

try:

1167

try:

1168

self._nodecache.clearcaches()

1168

self._nodecache.clearcaches()

1169

except AttributeError:

1169

except AttributeError:

1170

self._nodecache = {nullid: nullrev}

1170

self._nodecache = {nullid: nullrev}

1171

self._nodepos = None

1171

self._nodepos = None

1172

1173

def rev(self, node):

1173

def rev(self, node):

1174

try:

1174

try:

1175

return self._nodecache[node]

1175

return self._nodecache[node]

1176

except TypeError:

1176

except TypeError:

1177

raise

1177

raise

1178

except RevlogError:

1178

except RevlogError:

1179

# parsers.c radix tree lookup failed

1179

# parsers.c radix tree lookup failed

1180

if node == wdirid or node in wdirfilenodeids:

1180

if node == wdirid or node in wdirfilenodeids:

1181

raise error.WdirUnsupported

1181

raise error.WdirUnsupported

1182

raise LookupError(node, self.indexfile, _('no node'))

1182

raise LookupError(node, self.indexfile, _('no node'))

1183

except KeyError:

1183

except KeyError:

1184

# pure python cache lookup failed

1184

# pure python cache lookup failed

1185

n = self._nodecache

1185

n = self._nodecache

1186

i = self.index

1186

i = self.index

1187

p = self._nodepos

1187

p = self._nodepos

1188

if p is None:

1188

if p is None:

1189

p = len(i) - 1

1189

p = len(i) - 1

1190

else:

1190

else:

1191

assert p < len(i)

1191

assert p < len(i)

1192

for r in pycompat.xrange(p, -1, -1):

1192

for r in pycompat.xrange(p, -1, -1):

1193

v = i[r][7]

1193

v = i[r][7]

1194

n[v] = r

1194

n[v] = r

1195

if v == node:

1195

if v == node:

1196

self._nodepos = r - 1

1196

self._nodepos = r - 1

1197

return r

1197

return r

1198

if node == wdirid or node in wdirfilenodeids:

1198

if node == wdirid or node in wdirfilenodeids:

1199

raise error.WdirUnsupported

1199

raise error.WdirUnsupported

1200

raise LookupError(node, self.indexfile, _('no node'))

1200

raise LookupError(node, self.indexfile, _('no node'))

1201

1202

# Accessors for index entries.

1202

# Accessors for index entries.

1203

1204

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1204

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

1205

# are flags.

1205

# are flags.

1206

def start(self, rev):

1206

def start(self, rev):

1207

return int(self.index[rev][0] >> 16)

1207

return int(self.index[rev][0] >> 16)

1208

1209

def flags(self, rev):

1209

def flags(self, rev):

1210

return self.index[rev][0] & 0xFFFF

1210

return self.index[rev][0] & 0xFFFF

1211

1212

def length(self, rev):

1212

def length(self, rev):

1213

return self.index[rev][1]

1213

return self.index[rev][1]

1214

1215

def rawsize(self, rev):

1215

def rawsize(self, rev):

1216

"""return the length of the uncompressed text for a given revision"""

1216

"""return the length of the uncompressed text for a given revision"""

1217

l = self.index[rev][2]

1217

l = self.index[rev][2]

1218

if l >= 0:

1218

if l >= 0:

1219

return l

1219

return l

1220

1221

t = self.revision(rev, raw=True)

1221

t = self.revision(rev, raw=True)

1222

return len(t)

1222

return len(t)

1223

1224

def size(self, rev):

1224

def size(self, rev):

1225

"""length of non-raw text (processed by a "read" flag processor)"""

1225

"""length of non-raw text (processed by a "read" flag processor)"""

1226

# fast path: if no "read" flag processor could change the content,

1226

# fast path: if no "read" flag processor could change the content,

1227

# size is rawsize. note: ELLIPSIS is known to not change the content.

1227

# size is rawsize. note: ELLIPSIS is known to not change the content.

1228

flags = self.flags(rev)

1228

flags = self.flags(rev)

1229

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1229

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

1230

return self.rawsize(rev)

1230

return self.rawsize(rev)

1231

1232

return len(self.revision(rev, raw=False))

1232

return len(self.revision(rev, raw=False))

1233

1234

def chainbase(self, rev):

1234

def chainbase(self, rev):

1235

base = self._chainbasecache.get(rev)

1235

base = self._chainbasecache.get(rev)

1236

if base is not None:

1236

if base is not None:

1237

return base

1237

return base

1238

1239

index = self.index

1239

index = self.index

1240

iterrev = rev

1240

iterrev = rev

1241

base = index[iterrev][3]

1241

base = index[iterrev][3]

1242

while base != iterrev:

1242

while base != iterrev:

1243

iterrev = base

1243

iterrev = base

1244

base = index[iterrev][3]

1244

base = index[iterrev][3]

1245

1246

self._chainbasecache[rev] = base

1246

self._chainbasecache[rev] = base

1247

return base

1247

return base

1248

1249

def linkrev(self, rev):

1249

def linkrev(self, rev):

1250

return self.index[rev][4]

1250

return self.index[rev][4]

1251

1252

def parentrevs(self, rev):

1252

def parentrevs(self, rev):

1253

try:

1253

try:

1254

entry = self.index[rev]

1254

entry = self.index[rev]

1255

except IndexError:

1255

except IndexError:

1256

if rev == wdirrev:

1256

if rev == wdirrev:

1257

raise error.WdirUnsupported

1257

raise error.WdirUnsupported

1258

raise

1258

raise

1259

1260

return entry[5], entry[6]

1260

return entry[5], entry[6]

1261

1262

def node(self, rev):

1262

def node(self, rev):

1263

try:

1263

try:

1264

return self.index[rev][7]

1264

return self.index[rev][7]

1265

except IndexError:

1265

except IndexError:

1266

if rev == wdirrev:

1266

if rev == wdirrev:

1267

raise error.WdirUnsupported

1267

raise error.WdirUnsupported

1268

raise

1268

raise

1269

1270

# Derived from index values.

1270

# Derived from index values.

1271

1272

def end(self, rev):

1272

def end(self, rev):

1273

return self.start(rev) + self.length(rev)

1273

return self.start(rev) + self.length(rev)

1274

1275

def parents(self, node):

1275

def parents(self, node):

1276

i = self.index

1276

i = self.index

1277

d = i[self.rev(node)]

1277

d = i[self.rev(node)]

1278

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1278

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1279

1280

def chainlen(self, rev):

1280

def chainlen(self, rev):

1281

return self._chaininfo(rev)[0]

1281

return self._chaininfo(rev)[0]

1282

1283

def _chaininfo(self, rev):

1283

def _chaininfo(self, rev):

1284

chaininfocache = self._chaininfocache

1284

chaininfocache = self._chaininfocache

1285

if rev in chaininfocache:

1285

if rev in chaininfocache:

1286

return chaininfocache[rev]

1286

return chaininfocache[rev]

1287

index = self.index

1287

index = self.index

1288

generaldelta = self._generaldelta

1288

generaldelta = self._generaldelta

1289

iterrev = rev

1289

iterrev = rev

1290

e = index[iterrev]

1290

e = index[iterrev]

1291

clen = 0

1291

clen = 0

1292

compresseddeltalen = 0

1292

compresseddeltalen = 0

1293

while iterrev != e[3]:

1293

while iterrev != e[3]:

1294

clen += 1

1294

clen += 1

1295

compresseddeltalen += e[1]

1295

compresseddeltalen += e[1]

1296

if generaldelta:

1296

if generaldelta:

1297

iterrev = e[3]

1297

iterrev = e[3]

1298

else:

1298

else:

1299

iterrev -= 1

1299

iterrev -= 1

1300

if iterrev in chaininfocache:

1300

if iterrev in chaininfocache:

1301

t = chaininfocache[iterrev]

1301

t = chaininfocache[iterrev]

1302

clen += t[0]

1302

clen += t[0]

1303

compresseddeltalen += t[1]

1303

compresseddeltalen += t[1]

1304

break

1304

break

1305

e = index[iterrev]

1305

e = index[iterrev]

1306

else:

1306

else:

1307

# Add text length of base since decompressing that also takes

1307

# Add text length of base since decompressing that also takes

1308

# work. For cache hits the length is already included.

1308

# work. For cache hits the length is already included.

1309

compresseddeltalen += e[1]

1309

compresseddeltalen += e[1]

1310

r = (clen, compresseddeltalen)

1310

r = (clen, compresseddeltalen)

1311

chaininfocache[rev] = r

1311

chaininfocache[rev] = r

1312

return r

1312

return r

1313

1314

def _deltachain(self, rev, stoprev=None):

1314

def _deltachain(self, rev, stoprev=None):

1315

"""Obtain the delta chain for a revision.

1315

"""Obtain the delta chain for a revision.

1316

1317

``stoprev`` specifies a revision to stop at. If not specified, we

1317

``stoprev`` specifies a revision to stop at. If not specified, we

1318

stop at the base of the chain.

1318

stop at the base of the chain.

1319

1320

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1320

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1321

revs in ascending order and ``stopped`` is a bool indicating whether

1321

revs in ascending order and ``stopped`` is a bool indicating whether

1322

``stoprev`` was hit.

1322

``stoprev`` was hit.

1323

"""

1323

"""

1324

# Try C implementation.

1324

# Try C implementation.

1325

try:

1325

try:

1326

return self.index.deltachain(rev, stoprev, self._generaldelta)

1326

return self.index.deltachain(rev, stoprev, self._generaldelta)

1327

except AttributeError:

1327

except AttributeError:

1328

pass

1328

pass

1329

1330

chain = []

1330

chain = []

1331

1332

# Alias to prevent attribute lookup in tight loop.

1332

# Alias to prevent attribute lookup in tight loop.

1333

index = self.index

1333

index = self.index

1334

generaldelta = self._generaldelta

1334

generaldelta = self._generaldelta

1335

1336

iterrev = rev

1336

iterrev = rev

1337

e = index[iterrev]

1337

e = index[iterrev]

1338

while iterrev != e[3] and iterrev != stoprev:

1338

while iterrev != e[3] and iterrev != stoprev:

1339

chain.append(iterrev)

1339

chain.append(iterrev)

1340

if generaldelta:

1340

if generaldelta:

1341

iterrev = e[3]

1341

iterrev = e[3]

1342

else:

1342

else:

1343

iterrev -= 1

1343

iterrev -= 1

1344

e = index[iterrev]

1344

e = index[iterrev]

1345

1346

if iterrev == stoprev:

1346

if iterrev == stoprev:

1347

stopped = True

1347

stopped = True

1348

else:

1348

else:

1349

chain.append(iterrev)

1349

chain.append(iterrev)

1350

stopped = False

1350

stopped = False

1351

1352

chain.reverse()

1352

chain.reverse()

1353

return chain, stopped

1353

return chain, stopped

1354

1355

def ancestors(self, revs, stoprev=0, inclusive=False):

1355

def ancestors(self, revs, stoprev=0, inclusive=False):

1356

"""Generate the ancestors of 'revs' in reverse topological order.

1356

"""Generate the ancestors of 'revs' in reverse topological order.

1357

Does not generate revs lower than stoprev.

1357

Does not generate revs lower than stoprev.

1358

1359

See the documentation for ancestor.lazyancestors for more details."""

1359

See the documentation for ancestor.lazyancestors for more details."""

1360

1361

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1361

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1362

inclusive=inclusive)

1362

inclusive=inclusive)

1363

1364

def descendants(self, revs):

1364

def descendants(self, revs):

1365

"""Generate the descendants of 'revs' in revision order.

1365

"""Generate the descendants of 'revs' in revision order.

1366

1367

Yield a sequence of revision numbers starting with a child of

1367

Yield a sequence of revision numbers starting with a child of

1368

some rev in revs, i.e., each revision is *not* considered a

1368

some rev in revs, i.e., each revision is *not* considered a

1369

descendant of itself. Results are ordered by revision number (a

1369

descendant of itself. Results are ordered by revision number (a

1370

topological sort)."""

1370

topological sort)."""

1371

first = min(revs)

1371

first = min(revs)

1372

if first == nullrev:

1372

if first == nullrev:

1373

for i in self:

1373

for i in self:

1374

yield i

1374

yield i

1375

return

1375

return

1376

1377

seen = set(revs)

1377

seen = set(revs)

1378

for i in self.revs(start=first + 1):

1378

for i in self.revs(start=first + 1):

1379

for x in self.parentrevs(i):

1379

for x in self.parentrevs(i):

1380

if x != nullrev and x in seen:

1380

if x != nullrev and x in seen:

1381

seen.add(i)

1381

seen.add(i)

1382

yield i

1382

yield i

1383

break

1383

break

1384

1385

def findcommonmissing(self, common=None, heads=None):

1385

def findcommonmissing(self, common=None, heads=None):

1386

"""Return a tuple of the ancestors of common and the ancestors of heads

1386

"""Return a tuple of the ancestors of common and the ancestors of heads

1387

that are not ancestors of common. In revset terminology, we return the

1387

that are not ancestors of common. In revset terminology, we return the

1388

tuple:

1388

tuple:

1389

1390

::common, (::heads) - (::common)

1390

::common, (::heads) - (::common)

1391

1392

The list is sorted by revision number, meaning it is

1392

The list is sorted by revision number, meaning it is

1393

topologically sorted.

1393

topologically sorted.

1394

1395

'heads' and 'common' are both lists of node IDs. If heads is

1395

'heads' and 'common' are both lists of node IDs. If heads is

1396

not supplied, uses all of the revlog's heads. If common is not

1396

not supplied, uses all of the revlog's heads. If common is not

1397

supplied, uses nullid."""

1397

supplied, uses nullid."""

1398

if common is None:

1398

if common is None:

1399

common = [nullid]

1399

common = [nullid]

1400

if heads is None:

1400

if heads is None:

1401

heads = self.heads()

1401

heads = self.heads()

1402

1403

common = [self.rev(n) for n in common]

1403

common = [self.rev(n) for n in common]

1404

heads = [self.rev(n) for n in heads]

1404

heads = [self.rev(n) for n in heads]

1405

1406

# we want the ancestors, but inclusive

1406

# we want the ancestors, but inclusive

1407

class lazyset(object):

1407

class lazyset(object):

1408

def __init__(self, lazyvalues):

1408

def __init__(self, lazyvalues):

1409

self.addedvalues = set()

1409

self.addedvalues = set()

1410

self.lazyvalues = lazyvalues

1410

self.lazyvalues = lazyvalues

1411

1412

def __contains__(self, value):

1412

def __contains__(self, value):

1413

return value in self.addedvalues or value in self.lazyvalues

1413

return value in self.addedvalues or value in self.lazyvalues

1414

1415

def __iter__(self):

1415

def __iter__(self):

1416

added = self.addedvalues

1416

added = self.addedvalues

1417

for r in added:

1417

for r in added:

1418

yield r

1418

yield r

1419

for r in self.lazyvalues:

1419

for r in self.lazyvalues:

1420

if not r in added:

1420

if not r in added:

1421

yield r

1421

yield r

1422

1423

def add(self, value):

1423

def add(self, value):

1424

self.addedvalues.add(value)

1424

self.addedvalues.add(value)

1425

1426

def update(self, values):

1426

def update(self, values):

1427

self.addedvalues.update(values)

1427

self.addedvalues.update(values)

1428

1429

has = lazyset(self.ancestors(common))

1429

has = lazyset(self.ancestors(common))

1430

has.add(nullrev)

1430

has.add(nullrev)

1431

has.update(common)

1431

has.update(common)

1432

1433

# take all ancestors from heads that aren't in has

1433

# take all ancestors from heads that aren't in has

1434

missing = set()

1434

missing = set()

1435

visit = collections.deque(r for r in heads if r not in has)

1435

visit = collections.deque(r for r in heads if r not in has)

1436

while visit:

1436

while visit:

1437

r = visit.popleft()

1437

r = visit.popleft()

1438

if r in missing:

1438

if r in missing:

1439

continue

1439

continue

1440

else:

1440

else:

1441

missing.add(r)

1441

missing.add(r)

1442

for p in self.parentrevs(r):

1442

for p in self.parentrevs(r):

1443

if p not in has:

1443

if p not in has:

1444

visit.append(p)

1444

visit.append(p)

1445

missing = list(missing)

1445

missing = list(missing)

1446

missing.sort()

1446

missing.sort()

1447

return has, [self.node(miss) for miss in missing]

1447

return has, [self.node(miss) for miss in missing]

1448

1449

def incrementalmissingrevs(self, common=None):

1449

def incrementalmissingrevs(self, common=None):

1450

"""Return an object that can be used to incrementally compute the

1450

"""Return an object that can be used to incrementally compute the

1451

revision numbers of the ancestors of arbitrary sets that are not

1451

revision numbers of the ancestors of arbitrary sets that are not

1452

ancestors of common. This is an ancestor.incrementalmissingancestors

1452

ancestors of common. This is an ancestor.incrementalmissingancestors

1453

object.

1453

object.

1454

1455

'common' is a list of revision numbers. If common is not supplied, uses

1455

'common' is a list of revision numbers. If common is not supplied, uses

1456

nullrev.

1456

nullrev.

1457

"""

1457

"""

1458

if common is None:

1458

if common is None:

1459

common = [nullrev]

1459

common = [nullrev]

1460

1461

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1461

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1462

1463

def findmissingrevs(self, common=None, heads=None):

1463

def findmissingrevs(self, common=None, heads=None):

1464

"""Return the revision numbers of the ancestors of heads that

1464

"""Return the revision numbers of the ancestors of heads that

1465

are not ancestors of common.

1465

are not ancestors of common.

1466

1467

More specifically, return a list of revision numbers corresponding to

1467

More specifically, return a list of revision numbers corresponding to

1468

nodes N such that every N satisfies the following constraints:

1468

nodes N such that every N satisfies the following constraints:

1469

1470

1. N is an ancestor of some node in 'heads'

1470

1. N is an ancestor of some node in 'heads'

1471

2. N is not an ancestor of any node in 'common'

1471

2. N is not an ancestor of any node in 'common'

1472

1473

The list is sorted by revision number, meaning it is

1473

The list is sorted by revision number, meaning it is

1474

topologically sorted.

1474

topologically sorted.

1475

1476

'heads' and 'common' are both lists of revision numbers. If heads is

1476

'heads' and 'common' are both lists of revision numbers. If heads is

1477

not supplied, uses all of the revlog's heads. If common is not

1477

not supplied, uses all of the revlog's heads. If common is not

1478

supplied, uses nullid."""

1478

supplied, uses nullid."""

1479

if common is None:

1479

if common is None:

1480

common = [nullrev]

1480

common = [nullrev]

1481

if heads is None:

1481

if heads is None:

1482

heads = self.headrevs()

1482

heads = self.headrevs()

1483

1484

inc = self.incrementalmissingrevs(common=common)

1484

inc = self.incrementalmissingrevs(common=common)

1485

return inc.missingancestors(heads)

1485

return inc.missingancestors(heads)

1486

1487

def findmissing(self, common=None, heads=None):

1487

def findmissing(self, common=None, heads=None):

1488

"""Return the ancestors of heads that are not ancestors of common.

1488

"""Return the ancestors of heads that are not ancestors of common.

1489

1490

More specifically, return a list of nodes N such that every N

1490

More specifically, return a list of nodes N such that every N

1491

satisfies the following constraints:

1491

satisfies the following constraints:

1492

1493

1. N is an ancestor of some node in 'heads'

1493

1. N is an ancestor of some node in 'heads'

1494

2. N is not an ancestor of any node in 'common'

1494

2. N is not an ancestor of any node in 'common'

1495

1496

The list is sorted by revision number, meaning it is

1496

The list is sorted by revision number, meaning it is

1497

topologically sorted.

1497

topologically sorted.

1498

1499

'heads' and 'common' are both lists of node IDs. If heads is

1499

'heads' and 'common' are both lists of node IDs. If heads is

1500

not supplied, uses all of the revlog's heads. If common is not

1500

not supplied, uses all of the revlog's heads. If common is not

1501

supplied, uses nullid."""

1501

supplied, uses nullid."""

1502

if common is None:

1502

if common is None:

1503

common = [nullid]

1503

common = [nullid]

1504

if heads is None:

1504

if heads is None:

1505

heads = self.heads()

1505

heads = self.heads()

1506

1507

common = [self.rev(n) for n in common]

1507

common = [self.rev(n) for n in common]

1508

heads = [self.rev(n) for n in heads]

1508

heads = [self.rev(n) for n in heads]

1509

1510

inc = self.incrementalmissingrevs(common=common)

1510

inc = self.incrementalmissingrevs(common=common)

1511

return [self.node(r) for r in inc.missingancestors(heads)]

1511

return [self.node(r) for r in inc.missingancestors(heads)]

1512

1513

def nodesbetween(self, roots=None, heads=None):

1513

def nodesbetween(self, roots=None, heads=None):

1514

"""Return a topological path from 'roots' to 'heads'.

1514

"""Return a topological path from 'roots' to 'heads'.

1515

1516

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1516

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1517

topologically sorted list of all nodes N that satisfy both of

1517

topologically sorted list of all nodes N that satisfy both of

1518

these constraints:

1518

these constraints:

1519

1520

1. N is a descendant of some node in 'roots'

1520

1. N is a descendant of some node in 'roots'

1521

2. N is an ancestor of some node in 'heads'

1521

2. N is an ancestor of some node in 'heads'

1522

1523

Every node is considered to be both a descendant and an ancestor

1523

Every node is considered to be both a descendant and an ancestor

1524

of itself, so every reachable node in 'roots' and 'heads' will be

1524

of itself, so every reachable node in 'roots' and 'heads' will be

1525

included in 'nodes'.

1525

included in 'nodes'.

1526

1527

'outroots' is the list of reachable nodes in 'roots', i.e., the

1527

'outroots' is the list of reachable nodes in 'roots', i.e., the

1528

subset of 'roots' that is returned in 'nodes'. Likewise,

1528

subset of 'roots' that is returned in 'nodes'. Likewise,

1529

'outheads' is the subset of 'heads' that is also in 'nodes'.

1529

'outheads' is the subset of 'heads' that is also in 'nodes'.

1530

1531

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1531

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1532

unspecified, uses nullid as the only root. If 'heads' is

1532

unspecified, uses nullid as the only root. If 'heads' is

1533

unspecified, uses list of all of the revlog's heads."""

1533

unspecified, uses list of all of the revlog's heads."""

1534

nonodes = ([], [], [])

1534

nonodes = ([], [], [])

1535

if roots is not None:

1535

if roots is not None:

1536

roots = list(roots)

1536

roots = list(roots)

1537

if not roots:

1537

if not roots:

1538

return nonodes

1538

return nonodes

1539

lowestrev = min([self.rev(n) for n in roots])

1539

lowestrev = min([self.rev(n) for n in roots])

1540

else:

1540

else:

1541

roots = [nullid] # Everybody's a descendant of nullid

1541

roots = [nullid] # Everybody's a descendant of nullid

1542

lowestrev = nullrev

1542

lowestrev = nullrev

1543

if (lowestrev == nullrev) and (heads is None):

1543

if (lowestrev == nullrev) and (heads is None):

1544

# We want _all_ the nodes!

1544

# We want _all_ the nodes!

1545

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1545

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1546

if heads is None:

1546

if heads is None:

1547

# All nodes are ancestors, so the latest ancestor is the last

1547

# All nodes are ancestors, so the latest ancestor is the last

1548

# node.

1548

# node.

1549

highestrev = len(self) - 1

1549

highestrev = len(self) - 1

1550

# Set ancestors to None to signal that every node is an ancestor.

1550

# Set ancestors to None to signal that every node is an ancestor.

1551

ancestors = None

1551

ancestors = None

1552

# Set heads to an empty dictionary for later discovery of heads

1552

# Set heads to an empty dictionary for later discovery of heads

1553

heads = {}

1553

heads = {}

1554

else:

1554

else:

1555

heads = list(heads)

1555

heads = list(heads)

1556

if not heads:

1556

if not heads:

1557

return nonodes

1557

return nonodes

1558

ancestors = set()

1558

ancestors = set()

1559

# Turn heads into a dictionary so we can remove 'fake' heads.

1559

# Turn heads into a dictionary so we can remove 'fake' heads.

1560

# Also, later we will be using it to filter out the heads we can't

1560

# Also, later we will be using it to filter out the heads we can't

1561

# find from roots.

1561

# find from roots.

1562

heads = dict.fromkeys(heads, False)

1562

heads = dict.fromkeys(heads, False)

1563

# Start at the top and keep marking parents until we're done.

1563

# Start at the top and keep marking parents until we're done.

1564

nodestotag = set(heads)

1564

nodestotag = set(heads)

1565

# Remember where the top was so we can use it as a limit later.

1565

# Remember where the top was so we can use it as a limit later.

1566

highestrev = max([self.rev(n) for n in nodestotag])

1566

highestrev = max([self.rev(n) for n in nodestotag])

1567

while nodestotag:

1567

while nodestotag:

1568

# grab a node to tag

1568

# grab a node to tag

1569

n = nodestotag.pop()

1569

n = nodestotag.pop()

1570

# Never tag nullid

1570

# Never tag nullid

1571

if n == nullid:

1571

if n == nullid:

1572

continue

1572

continue

1573

# A node's revision number represents its place in a

1573

# A node's revision number represents its place in a

1574

# topologically sorted list of nodes.

1574

# topologically sorted list of nodes.

1575

r = self.rev(n)

1575

r = self.rev(n)

1576

if r >= lowestrev:

1576

if r >= lowestrev:

1577

if n not in ancestors:

1577

if n not in ancestors:

1578

# If we are possibly a descendant of one of the roots

1578

# If we are possibly a descendant of one of the roots

1579

# and we haven't already been marked as an ancestor

1579

# and we haven't already been marked as an ancestor

1580

ancestors.add(n) # Mark as ancestor

1580

ancestors.add(n) # Mark as ancestor

1581

# Add non-nullid parents to list of nodes to tag.

1581

# Add non-nullid parents to list of nodes to tag.

1582

nodestotag.update([p for p in self.parents(n) if

1582

nodestotag.update([p for p in self.parents(n) if

1583

p != nullid])

1583

p != nullid])

1584

elif n in heads: # We've seen it before, is it a fake head?

1584

elif n in heads: # We've seen it before, is it a fake head?

1585

# So it is, real heads should not be the ancestors of

1585

# So it is, real heads should not be the ancestors of

1586

# any other heads.

1586

# any other heads.

1587

heads.pop(n)

1587

heads.pop(n)

1588

if not ancestors:

1588

if not ancestors:

1589

return nonodes

1589

return nonodes

1590

# Now that we have our set of ancestors, we want to remove any

1590

# Now that we have our set of ancestors, we want to remove any

1591

# roots that are not ancestors.

1591

# roots that are not ancestors.

1592

1593

# If one of the roots was nullid, everything is included anyway.

1593

# If one of the roots was nullid, everything is included anyway.

1594

if lowestrev > nullrev:

1594

if lowestrev > nullrev:

1595

# But, since we weren't, let's recompute the lowest rev to not

1595

# But, since we weren't, let's recompute the lowest rev to not

1596

# include roots that aren't ancestors.

1596

# include roots that aren't ancestors.

1597

1598

# Filter out roots that aren't ancestors of heads

1598

# Filter out roots that aren't ancestors of heads

1599

roots = [root for root in roots if root in ancestors]

1599

roots = [root for root in roots if root in ancestors]

1600

# Recompute the lowest revision

1600

# Recompute the lowest revision

1601

if roots:

1601

if roots:

1602

lowestrev = min([self.rev(root) for root in roots])

1602

lowestrev = min([self.rev(root) for root in roots])

1603

else:

1603

else:

1604

# No more roots? Return empty list

1604

# No more roots? Return empty list

1605

return nonodes

1605

return nonodes

1606

else:

1606

else:

1607

# We are descending from nullid, and don't need to care about

1607

# We are descending from nullid, and don't need to care about

1608

# any other roots.

1608

# any other roots.

1609

lowestrev = nullrev

1609

lowestrev = nullrev

1610

roots = [nullid]

1610

roots = [nullid]

1611

# Transform our roots list into a set.

1611

# Transform our roots list into a set.

1612

descendants = set(roots)

1612

descendants = set(roots)

1613

# Also, keep the original roots so we can filter out roots that aren't

1613

# Also, keep the original roots so we can filter out roots that aren't

1614

# 'real' roots (i.e. are descended from other roots).

1614

# 'real' roots (i.e. are descended from other roots).

1615

roots = descendants.copy()

1615

roots = descendants.copy()

1616

# Our topologically sorted list of output nodes.

1616

# Our topologically sorted list of output nodes.

1617

orderedout = []

1617

orderedout = []

1618

# Don't start at nullid since we don't want nullid in our output list,

1618

# Don't start at nullid since we don't want nullid in our output list,

1619

# and if nullid shows up in descendants, empty parents will look like

1619

# and if nullid shows up in descendants, empty parents will look like

1620

# they're descendants.

1620

# they're descendants.

1621

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1621

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1622

n = self.node(r)

1622

n = self.node(r)

1623

isdescendant = False

1623

isdescendant = False

1624

if lowestrev == nullrev: # Everybody is a descendant of nullid

1624

if lowestrev == nullrev: # Everybody is a descendant of nullid

1625

isdescendant = True

1625

isdescendant = True

1626

elif n in descendants:

1626

elif n in descendants:

1627

# n is already a descendant

1627

# n is already a descendant

1628

isdescendant = True

1628

isdescendant = True

1629

# This check only needs to be done here because all the roots

1629

# This check only needs to be done here because all the roots

1630

# will start being marked is descendants before the loop.

1630

# will start being marked is descendants before the loop.

1631

if n in roots:

1631

if n in roots:

1632

# If n was a root, check if it's a 'real' root.

1632

# If n was a root, check if it's a 'real' root.

1633

p = tuple(self.parents(n))

1633

p = tuple(self.parents(n))

1634

# If any of its parents are descendants, it's not a root.

1634

# If any of its parents are descendants, it's not a root.

1635

if (p[0] in descendants) or (p[1] in descendants):

1635

if (p[0] in descendants) or (p[1] in descendants):

1636

roots.remove(n)

1636

roots.remove(n)

1637

else:

1637

else:

1638

p = tuple(self.parents(n))

1638

p = tuple(self.parents(n))

1639

# A node is a descendant if either of its parents are

1639

# A node is a descendant if either of its parents are

1640

# descendants. (We seeded the dependents list with the roots

1640

# descendants. (We seeded the dependents list with the roots

1641

# up there, remember?)

1641

# up there, remember?)

1642

if (p[0] in descendants) or (p[1] in descendants):

1642

if (p[0] in descendants) or (p[1] in descendants):

1643

descendants.add(n)

1643

descendants.add(n)

1644

isdescendant = True

1644

isdescendant = True

1645

if isdescendant and ((ancestors is None) or (n in ancestors)):

1645

if isdescendant and ((ancestors is None) or (n in ancestors)):

1646

# Only include nodes that are both descendants and ancestors.

1646

# Only include nodes that are both descendants and ancestors.

1647

orderedout.append(n)

1647

orderedout.append(n)

1648

if (ancestors is not None) and (n in heads):

1648

if (ancestors is not None) and (n in heads):

1649

# We're trying to figure out which heads are reachable

1649

# We're trying to figure out which heads are reachable

1650

# from roots.

1650

# from roots.

1651

# Mark this head as having been reached

1651

# Mark this head as having been reached

1652

heads[n] = True

1652

heads[n] = True

1653

elif ancestors is None:

1653

elif ancestors is None:

1654

# Otherwise, we're trying to discover the heads.

1654

# Otherwise, we're trying to discover the heads.

1655

# Assume this is a head because if it isn't, the next step

1655

# Assume this is a head because if it isn't, the next step

1656

# will eventually remove it.

1656

# will eventually remove it.

1657

heads[n] = True

1657

heads[n] = True

1658

# But, obviously its parents aren't.

1658

# But, obviously its parents aren't.

1659

for p in self.parents(n):

1659

for p in self.parents(n):

1660

heads.pop(p, None)

1660

heads.pop(p, None)

1661

heads = [head for head, flag in heads.iteritems() if flag]

1661

heads = [head for head, flag in heads.iteritems() if flag]

1662

roots = list(roots)

1662

roots = list(roots)

1663

assert orderedout

1663

assert orderedout

1664

assert roots

1664

assert roots

1665

assert heads

1665

assert heads

1666

return (orderedout, roots, heads)

1666

return (orderedout, roots, heads)

1667

1668

def headrevs(self):

1668

def headrevs(self):

1669

try:

1669

try:

1670

return self.index.headrevs()

1670

return self.index.headrevs()

1671

except AttributeError:

1671

except AttributeError:

1672

return self._headrevs()

1672

return self._headrevs()

1673

1674

def computephases(self, roots):

1674

def computephases(self, roots):

1675

return self.index.computephasesmapsets(roots)

1675

return self.index.computephasesmapsets(roots)

1676

1677

def _headrevs(self):

1677

def _headrevs(self):

1678

count = len(self)

1678

count = len(self)

1679

if not count:

1679

if not count:

1680

return [nullrev]

1680

return [nullrev]

1681

# we won't iter over filtered rev so nobody is a head at start

1681

# we won't iter over filtered rev so nobody is a head at start

1682

ishead = [0] * (count + 1)

1682

ishead = [0] * (count + 1)

1683

index = self.index

1683

index = self.index

1684

for r in self:

1684

for r in self:

1685

ishead[r] = 1 # I may be an head

1685

ishead[r] = 1 # I may be an head

1686

e = index[r]

1686

e = index[r]

1687

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1687

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1688

return [r for r, val in enumerate(ishead) if val]

1688

return [r for r, val in enumerate(ishead) if val]

1689

1690

def heads(self, start=None, stop=None):

1690

def heads(self, start=None, stop=None):

1691

"""return the list of all nodes that have no children

1691

"""return the list of all nodes that have no children

1692

1693

if start is specified, only heads that are descendants of

1693

if start is specified, only heads that are descendants of

1694

start will be returned

1694

start will be returned

1695

if stop is specified, it will consider all the revs from stop

1695

if stop is specified, it will consider all the revs from stop

1696

as if they had no children

1696

as if they had no children

1697

"""

1697

"""

1698

if start is None and stop is None:

1698

if start is None and stop is None:

1699

if not len(self):

1699

if not len(self):

1700

return [nullid]

1700

return [nullid]

1701

return [self.node(r) for r in self.headrevs()]

1701

return [self.node(r) for r in self.headrevs()]

1702

1703

if start is None:

1703

if start is None:

1704

start = nullid

1704

start = nullid

1705

if stop is None:

1705

if stop is None:

1706

stop = []

1706

stop = []

1707

stoprevs = set([self.rev(n) for n in stop])

1707

stoprevs = set([self.rev(n) for n in stop])

1708

startrev = self.rev(start)

1708

startrev = self.rev(start)

1709

reachable = {startrev}

1709

reachable = {startrev}

1710

heads = {startrev}

1710

heads = {startrev}

1711

1712

parentrevs = self.parentrevs

1712

parentrevs = self.parentrevs

1713

for r in self.revs(start=startrev + 1):

1713

for r in self.revs(start=startrev + 1):

1714

for p in parentrevs(r):

1714

for p in parentrevs(r):

1715

if p in reachable:

1715

if p in reachable:

1716

if r not in stoprevs:

1716

if r not in stoprevs:

1717

reachable.add(r)

1717

reachable.add(r)

1718

heads.add(r)

1718

heads.add(r)

1719

if p in heads and p not in stoprevs:

1719

if p in heads and p not in stoprevs:

1720

heads.remove(p)

1720

heads.remove(p)

1721

1722

return [self.node(r) for r in heads]

1722

return [self.node(r) for r in heads]

1723

1724

def children(self, node):

1724

def children(self, node):

1725

"""find the children of a given node"""

1725

"""find the children of a given node"""

1726

c = []

1726

c = []

1727

p = self.rev(node)

1727

p = self.rev(node)

1728

for r in self.revs(start=p + 1):

1728

for r in self.revs(start=p + 1):

1729

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1729

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1730

if prevs:

1730

if prevs:

1731

for pr in prevs:

1731

for pr in prevs:

1732

if pr == p:

1732

if pr == p:

1733

c.append(self.node(r))

1733

c.append(self.node(r))

1734

elif p == nullrev:

1734

elif p == nullrev:

1735

c.append(self.node(r))

1735

c.append(self.node(r))

1736

return c

1736

return c

1737

1738

def commonancestorsheads(self, a, b):

1738

def commonancestorsheads(self, a, b):

1739

"""calculate all the heads of the common ancestors of nodes a and b"""

1739

"""calculate all the heads of the common ancestors of nodes a and b"""

1740

a, b = self.rev(a), self.rev(b)

1740

a, b = self.rev(a), self.rev(b)

1741

ancs = self._commonancestorsheads(a, b)

1741

ancs = self._commonancestorsheads(a, b)

1742

return pycompat.maplist(self.node, ancs)

1742

return pycompat.maplist(self.node, ancs)

1743

1744

def _commonancestorsheads(self, *revs):

1744

def _commonancestorsheads(self, *revs):

1745

"""calculate all the heads of the common ancestors of revs"""

1745

"""calculate all the heads of the common ancestors of revs"""

1746

try:

1746

try:

1747

ancs = self.index.commonancestorsheads(*revs)

1747

ancs = self.index.commonancestorsheads(*revs)

1748

except (AttributeError, OverflowError): # C implementation failed

1748

except (AttributeError, OverflowError): # C implementation failed

1749

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1749

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1750

return ancs

1750

return ancs

1751

1752

def isancestor(self, a, b):

1752

def isancestor(self, a, b):

1753

"""return True if node a is an ancestor of node b

1753

"""return True if node a is an ancestor of node b

1754

1755

A revision is considered an ancestor of itself."""

1755

A revision is considered an ancestor of itself."""

1756

a, b = self.rev(a), self.rev(b)

1756

a, b = self.rev(a), self.rev(b)

1757

return self.isancestorrev(a, b)

1757

return self.isancestorrev(a, b)

1758

1759

def isancestorrev(self, a, b):

1759

def isancestorrev(self, a, b):

1760

"""return True if revision a is an ancestor of revision b

1760

"""return True if revision a is an ancestor of revision b

1761

1762

A revision is considered an ancestor of itself.

1762

A revision is considered an ancestor of itself.

1763

1764

The implementation of this is trivial but the use of

1764

The implementation of this is trivial but the use of

1765

commonancestorsheads is not."""

1765

commonancestorsheads is not."""

1766

if a == nullrev:

1766

if a == nullrev:

1767

return True

1767

return True

1768

elif a == b:

1768

elif a == b:

1769

return True

1769

return True

1770

elif a > b:

1770

elif a > b:

1771

return False

1771

return False

1772

return a in self._commonancestorsheads(a, b)

1772

return a in self._commonancestorsheads(a, b)

1773

1774

def ancestor(self, a, b):

1774

def ancestor(self, a, b):

1775

"""calculate the "best" common ancestor of nodes a and b"""

1775

"""calculate the "best" common ancestor of nodes a and b"""

1776

1777

a, b = self.rev(a), self.rev(b)

1777

a, b = self.rev(a), self.rev(b)

1778

try:

1778

try:

1779

ancs = self.index.ancestors(a, b)

1779

ancs = self.index.ancestors(a, b)

1780

except (AttributeError, OverflowError):

1780

except (AttributeError, OverflowError):

1781

ancs = ancestor.ancestors(self.parentrevs, a, b)

1781

ancs = ancestor.ancestors(self.parentrevs, a, b)

1782

if ancs:

1782

if ancs:

1783

# choose a consistent winner when there's a tie

1783

# choose a consistent winner when there's a tie

1784

return min(map(self.node, ancs))

1784

return min(map(self.node, ancs))

1785

return nullid

1785

return nullid

1786

1787

def _match(self, id):

1787

def _match(self, id):

1788

if isinstance(id, int):

1788

if isinstance(id, int):

1789

# rev

1789

# rev

1790

return self.node(id)

1790

return self.node(id)

1791

if len(id) == 20:

1791

if len(id) == 20:

1792

# possibly a binary node

1792

# possibly a binary node

1793

# odds of a binary node being all hex in ASCII are 1 in 10**25

1793

# odds of a binary node being all hex in ASCII are 1 in 10**25

1794

try:

1794

try:

1795

node = id

1795

node = id

1796

self.rev(node) # quick search the index

1796

self.rev(node) # quick search the index

1797

return node

1797

return node

1798

except LookupError:

1798

except LookupError:

1799

pass # may be partial hex id

1799

pass # may be partial hex id

1800

try:

1800

try:

1801

# str(rev)

1801

# str(rev)

1802

rev = int(id)

1802

rev = int(id)

1803

if "%d" % rev != id:

1803

if "%d" % rev != id:

1804

raise ValueError

1804

raise ValueError

1805

if rev < 0:

1805

if rev < 0:

1806

rev = len(self) + rev

1806

rev = len(self) + rev

1807

if rev < 0 or rev >= len(self):

1807

if rev < 0 or rev >= len(self):

1808

raise ValueError

1808

raise ValueError

1809

return self.node(rev)

1809

return self.node(rev)

1810

except (ValueError, OverflowError):

1810

except (ValueError, OverflowError):

1811

pass

1811

pass

1812

if len(id) == 40:

1812

if len(id) == 40:

1813

try:

1813

try:

1814

# a full hex nodeid?

1814

# a full hex nodeid?

1815

node = bin(id)

1815

node = bin(id)

1816

self.rev(node)

1816

self.rev(node)

1817

return node

1817

return node

1818

except (TypeError, LookupError):

1818

except (TypeError, LookupError):

1819

pass

1819

pass

1820

1821

def _partialmatch(self, id):

1821

def _partialmatch(self, id):

1822

# we don't care wdirfilenodeids as they should be always full hash

1822

# we don't care wdirfilenodeids as they should be always full hash

1823

maybewdir = wdirhex.startswith(id)

1823

maybewdir = wdirhex.startswith(id)

1824

try:

1824

try:

1825

partial = self.index.partialmatch(id)

1825

partial = self.index.partialmatch(id)

1826

if partial and self.hasnode(partial):

1826

if partial and self.hasnode(partial):

1827

if maybewdir:

1827

if maybewdir:

1828

# single 'ff...' match in radix tree, ambiguous with wdir

1828

# single 'ff...' match in radix tree, ambiguous with wdir

1829

raise RevlogError

1829

raise RevlogError

1830

return partial

1830

return partial

1831

if maybewdir:

1831

if maybewdir:

1832

# no 'ff...' match in radix tree, wdir identified

1832

# no 'ff...' match in radix tree, wdir identified

1833

raise error.WdirUnsupported

1833

raise error.WdirUnsupported

1834

return None

1834

return None

1835

except RevlogError:

1835

except RevlogError:

1836

# parsers.c radix tree lookup gave multiple matches

1836

# parsers.c radix tree lookup gave multiple matches

1837

# fast path: for unfiltered changelog, radix tree is accurate

1837

# fast path: for unfiltered changelog, radix tree is accurate

1838

if not getattr(self, 'filteredrevs', None):

1838

if not getattr(self, 'filteredrevs', None):

1839

raise AmbiguousPrefixLookupError(id, self.indexfile,

1839

raise AmbiguousPrefixLookupError(id, self.indexfile,

1840

_('ambiguous identifier'))

1840

_('ambiguous identifier'))

1841

# fall through to slow path that filters hidden revisions

1841

# fall through to slow path that filters hidden revisions

1842

except (AttributeError, ValueError):

1842

except (AttributeError, ValueError):

1843

# we are pure python, or key was too short to search radix tree

1843

# we are pure python, or key was too short to search radix tree

1844

pass

1844

pass

1845

1846

if id in self._pcache:

1846

if id in self._pcache:

1847

return self._pcache[id]

1847

return self._pcache[id]

1848

1849

if len(id) <= 40:

1849

if len(id) <= 40:

1850

try:

1850

try:

1851

# hex(node)[:...]

1851

# hex(node)[:...]

1852

l = len(id) // 2 # grab an even number of digits

1852

l = len(id) // 2 # grab an even number of digits

1853

prefix = bin(id[:l * 2])

1853

prefix = bin(id[:l * 2])

1854

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1854

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1855

nl = [n for n in nl if hex(n).startswith(id) and

1855

nl = [n for n in nl if hex(n).startswith(id) and

1856

self.hasnode(n)]

1856

self.hasnode(n)]

1857

if nullhex.startswith(id):

1857

if nullhex.startswith(id):

1858

nl.append(nullid)

1858

nl.append(nullid)

1859

if len(nl) > 0:

1859

if len(nl) > 0:

1860

if len(nl) == 1 and not maybewdir:

1860

if len(nl) == 1 and not maybewdir:

1861

self._pcache[id] = nl[0]

1861

self._pcache[id] = nl[0]

1862

return nl[0]

1862

return nl[0]

1863

raise AmbiguousPrefixLookupError(id, self.indexfile,

1863

raise AmbiguousPrefixLookupError(id, self.indexfile,

1864

_('ambiguous identifier'))

1864

_('ambiguous identifier'))

1865

if maybewdir:

1865

if maybewdir:

1866

raise error.WdirUnsupported

1866

raise error.WdirUnsupported

1867

return None

1867

return None

1868

except TypeError:

1868

except TypeError:

1869

pass

1869

pass

1870

1871

def lookup(self, id):

1871

def lookup(self, id):

1872

"""locate a node based on:

1872

"""locate a node based on:

1873

- revision number or str(revision number)

1873

- revision number or str(revision number)

1874

- nodeid or subset of hex nodeid

1874

- nodeid or subset of hex nodeid

1875

"""

1875

"""

1876

n = self._match(id)

1876

n = self._match(id)

1877

if n is not None:

1877

if n is not None:

1878

return n

1878

return n

1879

n = self._partialmatch(id)

1879

n = self._partialmatch(id)

1880

if n:

1880

if n:

1881

return n

1881

return n

1882

1883

raise LookupError(id, self.indexfile, _('no match found'))

1883

raise LookupError(id, self.indexfile, _('no match found'))

1884

1885

def shortest(self, node, minlength=1):

1885

def shortest(self, node, minlength=1):

1886

"""Find the shortest unambiguous prefix that matches node."""

1886

"""Find the shortest unambiguous prefix that matches node."""

1887

def isvalid(prefix):

1887

def isvalid(prefix):

1888

try:

1888

try:

1889

node = self._partialmatch(prefix)

1889

node = self._partialmatch(prefix)

1890

except error.RevlogError:

1890

except error.RevlogError:

1891

return False

1891

return False

1892

except error.WdirUnsupported:

1892

except error.WdirUnsupported:

1893

# single 'ff...' match

1893

# single 'ff...' match

1894

return True

1894

return True

1895

if node is None:

1895

if node is None:

1896

raise LookupError(node, self.indexfile, _('no node'))

1896

raise LookupError(node, self.indexfile, _('no node'))

1897

return True

1897

return True

1898

1899

def maybewdir(prefix):

1899

def maybewdir(prefix):

1900

return all(c == 'f' for c in prefix)

1900

return all(c == 'f' for c in prefix)

1901

1902

hexnode = hex(node)

1902

hexnode = hex(node)

1903

1904

def disambiguate(hexnode, minlength):

1904

def disambiguate(hexnode, minlength):

1905

"""Disambiguate against wdirid."""

1905

"""Disambiguate against wdirid."""

1906

for length in range(minlength, 41):

1906

for length in range(minlength, 41):

1907

prefix = hexnode[:length]

1907

prefix = hexnode[:length]

1908

if not maybewdir(prefix):

1908

if not maybewdir(prefix):

1909

return prefix

1909

return prefix

1910

1911

if not getattr(self, 'filteredrevs', None):

1911

if not getattr(self, 'filteredrevs', None):

1912

try:

1912

try:

1913

length = max(self.index.shortest(node), minlength)

1913

length = max(self.index.shortest(node), minlength)

1914

return disambiguate(hexnode, length)

1914

return disambiguate(hexnode, length)

1915

except RevlogError:

1915

except RevlogError:

1916

if node != wdirid:

1916

if node != wdirid:

1917

raise LookupError(node, self.indexfile, _('no node'))

1917

raise LookupError(node, self.indexfile, _('no node'))

1918

except AttributeError:

1918

except AttributeError:

1919

# Fall through to pure code

1919

# Fall through to pure code

1920

pass

1920

pass

1921

1922

if node == wdirid:

1922

if node == wdirid:

1923

for length in range(minlength, 41):

1923

for length in range(minlength, 41):

1924

prefix = hexnode[:length]

1924

prefix = hexnode[:length]

1925

if isvalid(prefix):

1925

if isvalid(prefix):

1926

return prefix

1926

return prefix

1927

1928

for length in range(minlength, 41):

1928

for length in range(minlength, 41):

1929

prefix = hexnode[:length]

1929

prefix = hexnode[:length]

1930

if isvalid(prefix):

1930

if isvalid(prefix):

1931

return disambiguate(hexnode, length)

1931

return disambiguate(hexnode, length)

1932

1933

def cmp(self, node, text):

1933

def cmp(self, node, text):

1934

"""compare text with a given file revision

1934

"""compare text with a given file revision

1935

1936

returns True if text is different than what is stored.

1936

returns True if text is different than what is stored.

1937

"""

1937

"""

1938

p1, p2 = self.parents(node)

1938

p1, p2 = self.parents(node)

1939

return hash(text, p1, p2) != node

1939

return hash(text, p1, p2) != node

1940

1941

def _cachesegment(self, offset, data):

1941

def _cachesegment(self, offset, data):

1942

"""Add a segment to the revlog cache.

1942

"""Add a segment to the revlog cache.

1943

1944

Accepts an absolute offset and the data that is at that location.

1944

Accepts an absolute offset and the data that is at that location.

1945

"""

1945

"""

1946

o, d = self._chunkcache

1946

o, d = self._chunkcache

1947

# try to add to existing cache

1947

# try to add to existing cache

1948

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1948

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1949

self._chunkcache = o, d + data

1949

self._chunkcache = o, d + data

1950

else:

1950

else:

1951

self._chunkcache = offset, data

1951

self._chunkcache = offset, data

1952

1953

def _readsegment(self, offset, length, df=None):

1953

def _readsegment(self, offset, length, df=None):

1954

"""Load a segment of raw data from the revlog.

1954

"""Load a segment of raw data from the revlog.

1955

1956

Accepts an absolute offset, length to read, and an optional existing

1956

Accepts an absolute offset, length to read, and an optional existing

1957

file handle to read from.

1957

file handle to read from.

1958

1959

If an existing file handle is passed, it will be seeked and the

1959

If an existing file handle is passed, it will be seeked and the

1960

original seek position will NOT be restored.

1960

original seek position will NOT be restored.

1961

1962

Returns a str or buffer of raw byte data.

1962

Returns a str or buffer of raw byte data.

1963

"""

1963

"""

1964

# Cache data both forward and backward around the requested

1964

# Cache data both forward and backward around the requested

1965

# data, in a fixed size window. This helps speed up operations

1965

# data, in a fixed size window. This helps speed up operations

1966

# involving reading the revlog backwards.

1966

# involving reading the revlog backwards.

1967

cachesize = self._chunkcachesize

1967

cachesize = self._chunkcachesize

1968

realoffset = offset & ~(cachesize - 1)

1968

realoffset = offset & ~(cachesize - 1)

1969

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1969

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1970

- realoffset)

1970

- realoffset)

1971

with self._datareadfp(df) as df:

1971

with self._datareadfp(df) as df:

1972

df.seek(realoffset)

1972

df.seek(realoffset)

1973

d = df.read(reallength)

1973

d = df.read(reallength)

1974

self._cachesegment(realoffset, d)

1974

self._cachesegment(realoffset, d)

1975

if offset != realoffset or reallength != length:

1975

if offset != realoffset or reallength != length:

1976

return util.buffer(d, offset - realoffset, length)

1976

return util.buffer(d, offset - realoffset, length)

1977

return d

1977

return d

1978

1979

def _getsegment(self, offset, length, df=None):

1979

def _getsegment(self, offset, length, df=None):

1980

"""Obtain a segment of raw data from the revlog.

1980

"""Obtain a segment of raw data from the revlog.

1981

1982

Accepts an absolute offset, length of bytes to obtain, and an

1982

Accepts an absolute offset, length of bytes to obtain, and an

1983

optional file handle to the already-opened revlog. If the file

1983

optional file handle to the already-opened revlog. If the file

1984

handle is used, it's original seek position will not be preserved.

1984

handle is used, it's original seek position will not be preserved.

1985

1986

Requests for data may be returned from a cache.

1986

Requests for data may be returned from a cache.

1987

1988

Returns a str or a buffer instance of raw byte data.

1988

Returns a str or a buffer instance of raw byte data.

1989

"""

1989

"""

1990

o, d = self._chunkcache

1990

o, d = self._chunkcache

1991

l = len(d)

1991

l = len(d)

1992

1993

# is it in the cache?

1993

# is it in the cache?

1994

cachestart = offset - o

1994

cachestart = offset - o

1995

cacheend = cachestart + length

1995

cacheend = cachestart + length

1996

if cachestart >= 0 and cacheend <= l:

1996

if cachestart >= 0 and cacheend <= l:

1997

if cachestart == 0 and cacheend == l:

1997

if cachestart == 0 and cacheend == l:

1998

return d # avoid a copy

1998

return d # avoid a copy

1999

return util.buffer(d, cachestart, cacheend - cachestart)

1999

return util.buffer(d, cachestart, cacheend - cachestart)

2000

2001

return self._readsegment(offset, length, df=df)

2001

return self._readsegment(offset, length, df=df)

2002

2003

def _getsegmentforrevs(self, startrev, endrev, df=None):

2003

def _getsegmentforrevs(self, startrev, endrev, df=None):

2004

"""Obtain a segment of raw data corresponding to a range of revisions.

2004

"""Obtain a segment of raw data corresponding to a range of revisions.

2005

2006

Accepts the start and end revisions and an optional already-open

2006

Accepts the start and end revisions and an optional already-open

2007

file handle to be used for reading. If the file handle is read, its

2007

file handle to be used for reading. If the file handle is read, its

2008

seek position will not be preserved.

2008

seek position will not be preserved.

2009

2010

Requests for data may be satisfied by a cache.

2010

Requests for data may be satisfied by a cache.

2011

2012

Returns a 2-tuple of (offset, data) for the requested range of

2012

Returns a 2-tuple of (offset, data) for the requested range of

2013

revisions. Offset is the integer offset from the beginning of the

2013

revisions. Offset is the integer offset from the beginning of the

2014

revlog and data is a str or buffer of the raw byte data.

2014

revlog and data is a str or buffer of the raw byte data.

2015

2016

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2016

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

2017

to determine where each revision's data begins and ends.

2017

to determine where each revision's data begins and ends.

2018

"""

2018

"""

2019

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2019

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

2020

# (functions are expensive).

2020

# (functions are expensive).

2021

index = self.index

2021

index = self.index

2022

istart = index[startrev]

2022

istart = index[startrev]

2023

start = int(istart[0] >> 16)

2023

start = int(istart[0] >> 16)

2024

if startrev == endrev:

2024

if startrev == endrev:

2025

end = start + istart[1]

2025

end = start + istart[1]

2026

else:

2026

else:

2027

iend = index[endrev]

2027

iend = index[endrev]

2028

end = int(iend[0] >> 16) + iend[1]

2028

end = int(iend[0] >> 16) + iend[1]

2029

2030

if self._inline:

2030

if self._inline:

2031

start += (startrev + 1) * self._io.size

2031

start += (startrev + 1) * self._io.size

2032

end += (endrev + 1) * self._io.size

2032

end += (endrev + 1) * self._io.size

2033

length = end - start

2033

length = end - start

2034

2035

return start, self._getsegment(start, length, df=df)

2035

return start, self._getsegment(start, length, df=df)

2036

2037

def _chunk(self, rev, df=None):

2037

def _chunk(self, rev, df=None):

2038

"""Obtain a single decompressed chunk for a revision.

2038

"""Obtain a single decompressed chunk for a revision.

2039

2040

Accepts an integer revision and an optional already-open file handle

2040

Accepts an integer revision and an optional already-open file handle

2041

to be used for reading. If used, the seek position of the file will not

2041

to be used for reading. If used, the seek position of the file will not

2042

be preserved.

2042

be preserved.

2043

2044

Returns a str holding uncompressed data for the requested revision.

2044

Returns a str holding uncompressed data for the requested revision.

2045

"""

2045

"""

2046

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2046

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

2047

2048

def _chunks(self, revs, df=None, targetsize=None):

2048

def _chunks(self, revs, df=None, targetsize=None):

2049

"""Obtain decompressed chunks for the specified revisions.

2049

"""Obtain decompressed chunks for the specified revisions.

2050

2051

Accepts an iterable of numeric revisions that are assumed to be in

2051

Accepts an iterable of numeric revisions that are assumed to be in

2052

ascending order. Also accepts an optional already-open file handle

2052

ascending order. Also accepts an optional already-open file handle

2053

to be used for reading. If used, the seek position of the file will

2053

to be used for reading. If used, the seek position of the file will

2054

not be preserved.

2054

not be preserved.

2055

2056

This function is similar to calling ``self._chunk()`` multiple times,

2056

This function is similar to calling ``self._chunk()`` multiple times,

2057

but is faster.

2057

but is faster.

2058

2059

Returns a list with decompressed data for each requested revision.

2059

Returns a list with decompressed data for each requested revision.

2060

"""

2060

"""

2061

if not revs:

2061

if not revs:

2062

return []

2062

return []

2063

start = self.start

2063

start = self.start

2064

length = self.length

2064

length = self.length

2065

inline = self._inline

2065

inline = self._inline

2066

iosize = self._io.size

2066

iosize = self._io.size

2067

buffer = util.buffer

2067

buffer = util.buffer

2068

2069

l = []

2069

l = []

2070

ladd = l.append

2070

ladd = l.append

2071

2072

if not self._withsparseread:

2072

if not self._withsparseread:

2073

slicedchunks = (revs,)

2073

slicedchunks = (revs,)

2074

else:

2074

else:

2075

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2075

slicedchunks = _slicechunk(self, revs, targetsize=targetsize)

2076

2077

for revschunk in slicedchunks:

2077

for revschunk in slicedchunks:

2078

firstrev = revschunk[0]

2078

firstrev = revschunk[0]

2079

# Skip trailing revisions with empty diff

2079

# Skip trailing revisions with empty diff

2080

for lastrev in revschunk[::-1]:

2080

for lastrev in revschunk[::-1]:

2081

if length(lastrev) != 0:

2081

if length(lastrev) != 0:

2082

break

2082

break

2083

2084

try:

2084

try:

2085

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2085

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

2086

except OverflowError:

2086

except OverflowError:

2087

# issue4215 - we can't cache a run of chunks greater than

2087

# issue4215 - we can't cache a run of chunks greater than

2088

# 2G on Windows

2088

# 2G on Windows

2089

return [self._chunk(rev, df=df) for rev in revschunk]

2089

return [self._chunk(rev, df=df) for rev in revschunk]

2090

2091

decomp = self.decompress

2091

decomp = self.decompress

2092

for rev in revschunk:

2092

for rev in revschunk:

2093

chunkstart = start(rev)

2093

chunkstart = start(rev)

2094

if inline:

2094

if inline:

2095

chunkstart += (rev + 1) * iosize

2095

chunkstart += (rev + 1) * iosize

2096

chunklength = length(rev)

2096

chunklength = length(rev)

2097

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2097

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

2098

2099

return l

2099

return l

2100

2101

def _chunkclear(self):

2101

def _chunkclear(self):

2102

"""Clear the raw chunk cache."""

2102

"""Clear the raw chunk cache."""

2103

self._chunkcache = (0, '')

2103

self._chunkcache = (0, '')

2104

2105

def deltaparent(self, rev):

2105

def deltaparent(self, rev):

2106

"""return deltaparent of the given revision"""

2106

"""return deltaparent of the given revision"""

2107

base = self.index[rev][3]

2107

base = self.index[rev][3]

2108

if base == rev:

2108

if base == rev:

2109

return nullrev

2109

return nullrev

2110

elif self._generaldelta:

2110

elif self._generaldelta:

2111

return base

2111

return base

2112

else:

2112

else:

2113

return rev - 1

2113

return rev - 1

2114

2115

def issnapshot(self, rev):

2115

def issnapshot(self, rev):

2116

"""tells whether rev is a snapshot

2116

"""tells whether rev is a snapshot

2117

"""

2117

"""

2118

if rev == nullrev:

2118

if rev == nullrev:

2119

return True

2119

return True

2120

deltap = self.deltaparent(rev)

2120

deltap = self.deltaparent(rev)

2121

if deltap == nullrev:

2121

if deltap == nullrev:

2122

return True

2122

return True

2123

p1, p2 = self.parentrevs(rev)

2123

p1, p2 = self.parentrevs(rev)

2124

if deltap in (p1, p2):

2124

if deltap in (p1, p2):

2125

return False

2125

return False

2126

return self.issnapshot(deltap)

2126

return self.issnapshot(deltap)

2127

2128

def snapshotdepth(self, rev):

2128

def snapshotdepth(self, rev):

2129

"""number of snapshot in the chain before this one"""

2129

"""number of snapshot in the chain before this one"""

2130

if not self.issnapshot(rev):

2130

if not self.issnapshot(rev):

2131

raise ProgrammingError('revision %d not a snapshot')

2131

raise ProgrammingError('revision %d not a snapshot')

2132

return len(self._deltachain(rev)[0]) - 1

2132

return len(self._deltachain(rev)[0]) - 1

2133

2134

def revdiff(self, rev1, rev2):

2134

def revdiff(self, rev1, rev2):

2135

"""return or calculate a delta between two revisions

2135

"""return or calculate a delta between two revisions

2136

2137

The delta calculated is in binary form and is intended to be written to

2137

The delta calculated is in binary form and is intended to be written to

2138

revlog data directly. So this function needs raw revision data.

2138

revlog data directly. So this function needs raw revision data.

2139

"""

2139

"""

2140

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2140

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

2141

return bytes(self._chunk(rev2))

2141

return bytes(self._chunk(rev2))

2142

2143

return mdiff.textdiff(self.revision(rev1, raw=True),

2143

return mdiff.textdiff(self.revision(rev1, raw=True),

2144

self.revision(rev2, raw=True))

2144

self.revision(rev2, raw=True))

2145

2146

def revision(self, nodeorrev, _df=None, raw=False):

2146

def revision(self, nodeorrev, _df=None, raw=False):

2147

"""return an uncompressed revision of a given node or revision

2147

"""return an uncompressed revision of a given node or revision

2148

number.

2148

number.

2149

2150

_df - an existing file handle to read from. (internal-only)

2150

_df - an existing file handle to read from. (internal-only)

2151

raw - an optional argument specifying if the revision data is to be

2151

raw - an optional argument specifying if the revision data is to be

2152

treated as raw data when applying flag transforms. 'raw' should be set

2152

treated as raw data when applying flag transforms. 'raw' should be set

2153

to True when generating changegroups or in debug commands.

2153

to True when generating changegroups or in debug commands.

2154

"""

2154

"""

2155

if isinstance(nodeorrev, int):

2155

if isinstance(nodeorrev, int):

2156

rev = nodeorrev

2156

rev = nodeorrev

2157

node = self.node(rev)

2157

node = self.node(rev)

2158

else:

2158

else:

2159

node = nodeorrev

2159

node = nodeorrev

2160

rev = None

2160

rev = None

2161

2162

cachedrev = None

2162

cachedrev = None

2163

flags = None

2163

flags = None

2164

rawtext = None

2164

rawtext = None

2165

if node == nullid:

2165

if node == nullid:

2166

return ""

2166

return ""

2167

if self._cache:

2167

if self._cache:

2168

if self._cache[0] == node:

2168

if self._cache[0] == node:

2169

# _cache only stores rawtext

2169

# _cache only stores rawtext

2170

if raw:

2170

if raw:

2171

return self._cache[2]

2171

return self._cache[2]

2172

# duplicated, but good for perf

2172

# duplicated, but good for perf

2173

if rev is None:

2173

if rev is None:

2174

rev = self.rev(node)

2174

rev = self.rev(node)

2175

if flags is None:

2175

if flags is None:

2176

flags = self.flags(rev)

2176

flags = self.flags(rev)

2177

# no extra flags set, no flag processor runs, text = rawtext

2177

# no extra flags set, no flag processor runs, text = rawtext

2178

if flags == REVIDX_DEFAULT_FLAGS:

2178

if flags == REVIDX_DEFAULT_FLAGS:

2179

return self._cache[2]

2179

return self._cache[2]

2180

# rawtext is reusable. need to run flag processor

2180

# rawtext is reusable. need to run flag processor

2181

rawtext = self._cache[2]

2181

rawtext = self._cache[2]

2182

2183

cachedrev = self._cache[1]

2183

cachedrev = self._cache[1]

2184

2185

# look up what we need to read

2185

# look up what we need to read

2186

if rawtext is None:

2186

if rawtext is None:

2187

if rev is None:

2187

if rev is None:

2188

rev = self.rev(node)

2188

rev = self.rev(node)

2189

2190

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2190

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

2191

if stopped:

2191

if stopped:

2192

rawtext = self._cache[2]

2192

rawtext = self._cache[2]

2193

2194

# drop cache to save memory

2194

# drop cache to save memory

2195

self._cache = None

2195

self._cache = None

2196

2197

targetsize = None

2197

targetsize = None

2198

rawsize = self.index[rev][2]

2198

rawsize = self.index[rev][2]

2199

if 0 <= rawsize:

2199

if 0 <= rawsize:

2200

targetsize = 4 * rawsize

2200

targetsize = 4 * rawsize

2201

2202

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2202

bins = self._chunks(chain, df=_df, targetsize=targetsize)

2203

if rawtext is None:

2203

if rawtext is None:

2204

rawtext = bytes(bins[0])

2204

rawtext = bytes(bins[0])

2205

bins = bins[1:]

2205

bins = bins[1:]

2206

2207

rawtext = mdiff.patches(rawtext, bins)

2207

rawtext = mdiff.patches(rawtext, bins)

2208

self._cache = (node, rev, rawtext)

2208

self._cache = (node, rev, rawtext)

2209

2210

if flags is None:

2210

if flags is None:

2211

if rev is None:

2211

if rev is None:

2212

rev = self.rev(node)

2212

rev = self.rev(node)

2213

flags = self.flags(rev)

2213

flags = self.flags(rev)

2214

2215

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2215

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

2216

if validatehash:

2216

if validatehash:

2217

self.checkhash(text, node, rev=rev)

2217

self.checkhash(text, node, rev=rev)

2218

2219

return text

2219

return text

2220

2221

def hash(self, text, p1, p2):

2221

def hash(self, text, p1, p2):

2222

"""Compute a node hash.

2222

"""Compute a node hash.

2223

2224

Available as a function so that subclasses can replace the hash

2224

Available as a function so that subclasses can replace the hash

2225

as needed.

2225

as needed.

2226

"""

2226

"""

2227

return hash(text, p1, p2)

2227

return hash(text, p1, p2)

2228

2229

def _processflags(self, text, flags, operation, raw=False):

2229

def _processflags(self, text, flags, operation, raw=False):

2230

"""Inspect revision data flags and applies transforms defined by

2230

"""Inspect revision data flags and applies transforms defined by

2231

registered flag processors.

2231

registered flag processors.

2232

2233

``text`` - the revision data to process

2233

``text`` - the revision data to process

2234

``flags`` - the revision flags

2234

``flags`` - the revision flags

2235

``operation`` - the operation being performed (read or write)

2235

``operation`` - the operation being performed (read or write)

2236

``raw`` - an optional argument describing if the raw transform should be

2236

``raw`` - an optional argument describing if the raw transform should be

2237

applied.

2237

applied.

2238

2239

This method processes the flags in the order (or reverse order if

2239

This method processes the flags in the order (or reverse order if

2240

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2240

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

2241

flag processors registered for present flags. The order of flags defined

2241

flag processors registered for present flags. The order of flags defined

2242

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2242

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

2243

2244

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2244

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

2245

processed text and ``validatehash`` is a bool indicating whether the

2245

processed text and ``validatehash`` is a bool indicating whether the

2246

returned text should be checked for hash integrity.

2246

returned text should be checked for hash integrity.

2247

2248

Note: If the ``raw`` argument is set, it has precedence over the

2248

Note: If the ``raw`` argument is set, it has precedence over the

2249

operation and will only update the value of ``validatehash``.

2249

operation and will only update the value of ``validatehash``.

2250

"""

2250

"""

2251

# fast path: no flag processors will run

2251

# fast path: no flag processors will run

2252

if flags == 0:

2252

if flags == 0:

2253

return text, True

2253

return text, True

2254

if not operation in ('read', 'write'):

2254

if not operation in ('read', 'write'):

2255

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2255

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

2256

# Check all flags are known.

2256

# Check all flags are known.

2257

if flags & ~REVIDX_KNOWN_FLAGS:

2257

if flags & ~REVIDX_KNOWN_FLAGS:

2258

raise RevlogError(_("incompatible revision flag '%#x'") %

2258

raise RevlogError(_("incompatible revision flag '%#x'") %

2259

(flags & ~REVIDX_KNOWN_FLAGS))

2259

(flags & ~REVIDX_KNOWN_FLAGS))

2260

validatehash = True

2260

validatehash = True

2261

# Depending on the operation (read or write), the order might be

2261

# Depending on the operation (read or write), the order might be

2262

# reversed due to non-commutative transforms.

2262

# reversed due to non-commutative transforms.

2263

orderedflags = REVIDX_FLAGS_ORDER

2263

orderedflags = REVIDX_FLAGS_ORDER

2264

if operation == 'write':

2264

if operation == 'write':

2265

orderedflags = reversed(orderedflags)

2265

orderedflags = reversed(orderedflags)

2266

2267

for flag in orderedflags:

2267

for flag in orderedflags:

2268

# If a flagprocessor has been registered for a known flag, apply the

2268

# If a flagprocessor has been registered for a known flag, apply the

2269

# related operation transform and update result tuple.

2269

# related operation transform and update result tuple.

2270

if flag & flags:

2270

if flag & flags:

2271

vhash = True

2271

vhash = True

2272

2273

if flag not in _flagprocessors:

2273

if flag not in _flagprocessors:

2274

message = _("missing processor for flag '%#x'") % (flag)

2274

message = _("missing processor for flag '%#x'") % (flag)

2275

raise RevlogError(message)

2275

raise RevlogError(message)

2276

2277

processor = _flagprocessors[flag]

2277

processor = _flagprocessors[flag]

2278

if processor is not None:

2278

if processor is not None:

2279

readtransform, writetransform, rawtransform = processor

2279

readtransform, writetransform, rawtransform = processor

2280

2281

if raw:

2281

if raw:

2282

vhash = rawtransform(self, text)

2282

vhash = rawtransform(self, text)

2283

elif operation == 'read':

2283

elif operation == 'read':

2284

text, vhash = readtransform(self, text)

2284

text, vhash = readtransform(self, text)

2285

else: # write operation

2285

else: # write operation

2286

text, vhash = writetransform(self, text)

2286

text, vhash = writetransform(self, text)

2287

validatehash = validatehash and vhash

2287

validatehash = validatehash and vhash

2288

2289

return text, validatehash

2289

return text, validatehash

2290

2291

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2291

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2292

"""Check node hash integrity.

2292

"""Check node hash integrity.

2293

2294

Available as a function so that subclasses can extend hash mismatch

2294

Available as a function so that subclasses can extend hash mismatch

2295

behaviors as needed.

2295

behaviors as needed.

2296

"""

2296

"""

2297

try:

2297

try:

2298

if p1 is None and p2 is None:

2298

if p1 is None and p2 is None:

2299

p1, p2 = self.parents(node)

2299

p1, p2 = self.parents(node)

2300

if node != self.hash(text, p1, p2):

2300

if node != self.hash(text, p1, p2):

2301

revornode = rev

2301

revornode = rev

2302

if revornode is None:

2302

if revornode is None:

2303

revornode = templatefilters.short(hex(node))

2303

revornode = templatefilters.short(hex(node))

2304

raise RevlogError(_("integrity check failed on %s:%s")

2304

raise RevlogError(_("integrity check failed on %s:%s")

2305

% (self.indexfile, pycompat.bytestr(revornode)))

2305

% (self.indexfile, pycompat.bytestr(revornode)))

2306

except RevlogError:

2306

except RevlogError:

2307

if self._censorable and _censoredtext(text):

2307

if self._censorable and _censoredtext(text):

2308

raise error.CensoredNodeError(self.indexfile, node, text)

2308

raise error.CensoredNodeError(self.indexfile, node, text)

2309

raise

2309

raise

2310

2311

def _enforceinlinesize(self, tr, fp=None):

2311

def _enforceinlinesize(self, tr, fp=None):

2312

"""Check if the revlog is too big for inline and convert if so.

2312

"""Check if the revlog is too big for inline and convert if so.

2313

2314

This should be called after revisions are added to the revlog. If the

2314

This should be called after revisions are added to the revlog. If the

2315

revlog has grown too large to be an inline revlog, it will convert it

2315

revlog has grown too large to be an inline revlog, it will convert it

2316

to use multiple index and data files.

2316

to use multiple index and data files.

2317

"""

2317

"""

2318

tiprev = len(self) - 1

2318

tiprev = len(self) - 1

2319

if (not self._inline or

2319

if (not self._inline or

2320

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2320

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

2321

return

2321

return

2322

2323

trinfo = tr.find(self.indexfile)

2323

trinfo = tr.find(self.indexfile)

2324

if trinfo is None:

2324

if trinfo is None:

2325

raise RevlogError(_("%s not found in the transaction")

2325

raise RevlogError(_("%s not found in the transaction")

2326

% self.indexfile)

2326

% self.indexfile)

2327

2328

trindex = trinfo[2]

2328

trindex = trinfo[2]

2329

if trindex is not None:

2329

if trindex is not None:

2330

dataoff = self.start(trindex)

2330

dataoff = self.start(trindex)

2331

else:

2331

else:

2332

# revlog was stripped at start of transaction, use all leftover data

2332

# revlog was stripped at start of transaction, use all leftover data

2333

trindex = len(self) - 1

2333

trindex = len(self) - 1

2334

dataoff = self.end(tiprev)

2334

dataoff = self.end(tiprev)

2335

2336

tr.add(self.datafile, dataoff)

2336

tr.add(self.datafile, dataoff)

2337

2338

if fp:

2338

if fp:

2339

fp.flush()

2339

fp.flush()

2340

fp.close()

2340

fp.close()

2341

2342

with self._datafp('w') as df:

2342

with self._datafp('w') as df:

2343

for r in self:

2343

for r in self:

2344

df.write(self._getsegmentforrevs(r, r)[1])

2344

df.write(self._getsegmentforrevs(r, r)[1])

2345

2346

with self._indexfp('w') as fp:

2346

with self._indexfp('w') as fp:

2347

self.version &= ~FLAG_INLINE_DATA

2347

self.version &= ~FLAG_INLINE_DATA

2348

self._inline = False

2348

self._inline = False

2349

io = self._io

2349

io = self._io

2350

for i in self:

2350

for i in self:

2351

e = io.packentry(self.index[i], self.node, self.version, i)

2351

e = io.packentry(self.index[i], self.node, self.version, i)

2352

fp.write(e)

2352

fp.write(e)

2353

2354

# the temp file replace the real index when we exit the context

2354

# the temp file replace the real index when we exit the context

2355

# manager

2355

# manager

2356

2357

tr.replace(self.indexfile, trindex * self._io.size)

2357

tr.replace(self.indexfile, trindex * self._io.size)

2358

self._chunkclear()

2358

self._chunkclear()

2359

2360

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2360

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2361

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2361

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2362

"""add a revision to the log

2362

"""add a revision to the log

2363

2364

text - the revision data to add

2364

text - the revision data to add

2365

transaction - the transaction object used for rollback

2365

transaction - the transaction object used for rollback

2366

link - the linkrev data to add

2366

link - the linkrev data to add

2367

p1, p2 - the parent nodeids of the revision

2367

p1, p2 - the parent nodeids of the revision

2368

cachedelta - an optional precomputed delta

2368

cachedelta - an optional precomputed delta

2369

node - nodeid of revision; typically node is not specified, and it is

2369

node - nodeid of revision; typically node is not specified, and it is

2370

computed by default as hash(text, p1, p2), however subclasses might

2370

computed by default as hash(text, p1, p2), however subclasses might

2371

use different hashing method (and override checkhash() in such case)

2371

use different hashing method (and override checkhash() in such case)

2372

flags - the known flags to set on the revision

2372

flags - the known flags to set on the revision

2373

deltacomputer - an optional _deltacomputer instance shared between

2373

deltacomputer - an optional _deltacomputer instance shared between

2374

multiple calls

2374

multiple calls

2375

"""

2375

"""

2376

if link == nullrev:

2376

if link == nullrev:

2377

raise RevlogError(_("attempted to add linkrev -1 to %s")

2377

raise RevlogError(_("attempted to add linkrev -1 to %s")

2378

% self.indexfile)

2378

% self.indexfile)

2379

2380

if flags:

2380

if flags:

2381

node = node or self.hash(text, p1, p2)

2381

node = node or self.hash(text, p1, p2)

2382

2383

rawtext, validatehash = self._processflags(text, flags, 'write')

2383

rawtext, validatehash = self._processflags(text, flags, 'write')

2384

2385

# If the flag processor modifies the revision data, ignore any provided

2385

# If the flag processor modifies the revision data, ignore any provided

2386

# cachedelta.

2386

# cachedelta.

2387

if rawtext != text:

2387

if rawtext != text:

2388

cachedelta = None

2388

cachedelta = None

2389

2390

if len(rawtext) > _maxentrysize:

2390

if len(rawtext) > _maxentrysize:

2391

raise RevlogError(

2391

raise RevlogError(

2392

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2392

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2393

% (self.indexfile, len(rawtext)))

2393

% (self.indexfile, len(rawtext)))

2394

2395

node = node or self.hash(rawtext, p1, p2)

2395

node = node or self.hash(rawtext, p1, p2)

2396

if node in self.nodemap:

2396

if node in self.nodemap:

2397

return node

2397

return node

2398

2399

if validatehash:

2399

if validatehash:

2400

self.checkhash(rawtext, node, p1=p1, p2=p2)

2400

self.checkhash(rawtext, node, p1=p1, p2=p2)

2401

2402

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2402

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2403

flags, cachedelta=cachedelta,

2403

flags, cachedelta=cachedelta,

2404

deltacomputer=deltacomputer)

2404

deltacomputer=deltacomputer)

2405

2406

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2406

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2407

cachedelta=None, deltacomputer=None):

2407

cachedelta=None, deltacomputer=None):

2408

"""add a raw revision with known flags, node and parents

2408

"""add a raw revision with known flags, node and parents

2409

useful when reusing a revision not stored in this revlog (ex: received

2409

useful when reusing a revision not stored in this revlog (ex: received

2410

over wire, or read from an external bundle).

2410

over wire, or read from an external bundle).

2411

"""

2411

"""

2412

dfh = None

2412

dfh = None

2413

if not self._inline:

2413

if not self._inline:

2414

dfh = self._datafp("a+")

2414

dfh = self._datafp("a+")

2415

ifh = self._indexfp("a+")

2415

ifh = self._indexfp("a+")

2416

try:

2416

try:

2417

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2417

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2418

flags, cachedelta, ifh, dfh,

2418

flags, cachedelta, ifh, dfh,

2419

deltacomputer=deltacomputer)

2419

deltacomputer=deltacomputer)

2420

finally:

2420

finally:

2421

if dfh:

2421

if dfh:

2422

dfh.close()

2422

dfh.close()

2423

ifh.close()

2423

ifh.close()

2424

2425

def compress(self, data):

2425

def compress(self, data):

2426

"""Generate a possibly-compressed representation of data."""

2426

"""Generate a possibly-compressed representation of data."""

2427

if not data:

2427

if not data:

2428

return '', data

2428

return '', data

2429

2430

compressed = self._compressor.compress(data)

2430

compressed = self._compressor.compress(data)

2431

2432

if compressed:

2432

if compressed:

2433

# The revlog compressor added the header in the returned data.

2433

# The revlog compressor added the header in the returned data.

2434

return '', compressed

2434

return '', compressed

2435

2436

if data[0:1] == '\0':

2436

if data[0:1] == '\0':

2437

return '', data

2437

return '', data

2438

return 'u', data

2438

return 'u', data

2439

2440

def decompress(self, data):

2440

def decompress(self, data):

2441

"""Decompress a revlog chunk.

2441

"""Decompress a revlog chunk.

2442

2443

The chunk is expected to begin with a header identifying the

2443

The chunk is expected to begin with a header identifying the

2444

format type so it can be routed to an appropriate decompressor.

2444

format type so it can be routed to an appropriate decompressor.

2445

"""

2445

"""

2446

if not data:

2446

if not data:

2447

return data

2447

return data

2448

2449

# Revlogs are read much more frequently than they are written and many

2449

# Revlogs are read much more frequently than they are written and many

2450

# chunks only take microseconds to decompress, so performance is

2450

# chunks only take microseconds to decompress, so performance is

2451

# important here.

2451

# important here.

2452

#

2452

#

2453

# We can make a few assumptions about revlogs:

2453

# We can make a few assumptions about revlogs:

2454

#

2454

#

2455

# 1) the majority of chunks will be compressed (as opposed to inline

2455

# 1) the majority of chunks will be compressed (as opposed to inline

2456

# raw data).

2456

# raw data).

2457

# 2) decompressing *any* data will likely by at least 10x slower than

2457

# 2) decompressing *any* data will likely by at least 10x slower than

2458

# returning raw inline data.

2458

# returning raw inline data.

2459

# 3) we want to prioritize common and officially supported compression

2459

# 3) we want to prioritize common and officially supported compression

2460

# engines

2460

# engines

2461

#

2461

#

2462

# It follows that we want to optimize for "decompress compressed data

2462

# It follows that we want to optimize for "decompress compressed data

2463

# when encoded with common and officially supported compression engines"

2463

# when encoded with common and officially supported compression engines"

2464

# case over "raw data" and "data encoded by less common or non-official

2464

# case over "raw data" and "data encoded by less common or non-official

2465

# compression engines." That is why we have the inline lookup first

2465

# compression engines." That is why we have the inline lookup first

2466

# followed by the compengines lookup.

2466

# followed by the compengines lookup.

2467

#

2467

#

2468

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2468

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2469

# compressed chunks. And this matters for changelog and manifest reads.

2469

# compressed chunks. And this matters for changelog and manifest reads.

2470

t = data[0:1]

2470

t = data[0:1]

2471

2472

if t == 'x':

2472

if t == 'x':

2473

try:

2473

try:

2474

return _zlibdecompress(data)

2474

return _zlibdecompress(data)

2475

except zlib.error as e:

2475

except zlib.error as e:

2476

raise RevlogError(_('revlog decompress error: %s') %

2476

raise RevlogError(_('revlog decompress error: %s') %

2477

stringutil.forcebytestr(e))

2477

stringutil.forcebytestr(e))

2478

# '\0' is more common than 'u' so it goes first.

2478

# '\0' is more common than 'u' so it goes first.

2479

elif t == '\0':

2479

elif t == '\0':

2480

return data

2480

return data

2481

elif t == 'u':

2481

elif t == 'u':

2482

return util.buffer(data, 1)

2482

return util.buffer(data, 1)

2483

2484

try:

2484

try:

2485

compressor = self._decompressors[t]

2485

compressor = self._decompressors[t]

2486

except KeyError:

2486

except KeyError:

2487

try:

2487

try:

2488

engine = util.compengines.forrevlogheader(t)

2488

engine = util.compengines.forrevlogheader(t)

2489

compressor = engine.revlogcompressor()

2489

compressor = engine.revlogcompressor()

2490

self._decompressors[t] = compressor

2490

self._decompressors[t] = compressor

2491

except KeyError:

2491

except KeyError:

2492

raise RevlogError(_('unknown compression type %r') % t)

2492

raise RevlogError(_('unknown compression type %r') % t)

2493

2494

return compressor.decompress(data)

2494

return compressor.decompress(data)

2495

2496

def _isgooddeltainfo(self, deltainfo, revinfo):

2496

def _isgooddeltainfo(self, deltainfo, revinfo):

2497

"""Returns True if the given delta is good. Good means that it is within

2497

"""Returns True if the given delta is good. Good means that it is within

2498

the disk span, disk size, and chain length bounds that we know to be

2498

the disk span, disk size, and chain length bounds that we know to be

2499

performant."""

2499

performant."""

2500

if deltainfo is None:

2500

if deltainfo is None:

2501

return False

2501

return False

2502

2503

# - 'deltainfo.distance' is the distance from the base revision --

2503

# - 'deltainfo.distance' is the distance from the base revision --

2504

# bounding it limits the amount of I/O we need to do.

2504

# bounding it limits the amount of I/O we need to do.

2505

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2505

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2506

# deltas we need to apply -- bounding it limits the amount of CPU

2506

# deltas we need to apply -- bounding it limits the amount of CPU

2507

# we consume.

2507

# we consume.

2508

2509

if self._sparserevlog:

2509

if self._sparserevlog:

2510

# As sparse-read will be used, we can consider that the distance,

2510

# As sparse-read will be used, we can consider that the distance,

2511

# instead of being the span of the whole chunk,

2511

# instead of being the span of the whole chunk,

2512

# is the span of the largest read chunk

2512

# is the span of the largest read chunk

2513

base = deltainfo.base

2513

base = deltainfo.base

2514

2515

if base != nullrev:

2515

if base != nullrev:

2516

deltachain = self._deltachain(base)[0]

2516

deltachain = self._deltachain(base)[0]

2517

else:

2517

else:

2518

deltachain = []

2518

deltachain = []

2519

2520

# search for the first non-snapshot revision

2520

# search for the first non-snapshot revision

2521

for idx, r in enumerate(deltachain):

2521

for idx, r in enumerate(deltachain):

2522

if not self.issnapshot(r):

2522

if not self.issnapshot(r):

2523

break

2523

break

2524

deltachain = deltachain[idx:]

2524

deltachain = deltachain[idx:]

2525

chunks = _slicechunk(self, deltachain, deltainfo)

2525

chunks = _slicechunk(self, deltachain, deltainfo)

2526

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2526

all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]

2527

distance = max(all_span)

2527

distance = max(all_span)

2528

else:

2528

else:

2529

distance = deltainfo.distance

2529

distance = deltainfo.distance

2530

2531

textlen = revinfo.textlen

2531

textlen = revinfo.textlen

2532

defaultmax = textlen * 4

2532

defaultmax = textlen * 4

2533

maxdist = self._maxdeltachainspan

2533

maxdist = self._maxdeltachainspan

2534

if not maxdist:

2534

if not maxdist:

2535

maxdist = distance # ensure the conditional pass

2535

maxdist = distance # ensure the conditional pass

2536

maxdist = max(maxdist, defaultmax)

2536

maxdist = max(maxdist, defaultmax)

2537

if self._sparserevlog and maxdist < self._srmingapsize:

2537

if self._sparserevlog and maxdist < self._srmingapsize:

2538

# In multiple place, we are ignoring irrelevant data range below a

2538

# In multiple place, we are ignoring irrelevant data range below a

2539

# certain size. Be also apply this tradeoff here and relax span

2539

# certain size. Be also apply this tradeoff here and relax span

2540

# constraint for small enought content.

2540

# constraint for small enought content.

2541

maxdist = self._srmingapsize

2541

maxdist = self._srmingapsize

2542

2543

# Bad delta from read span:

2543

# Bad delta from read span:

2544

#

2544

#

2545

# If the span of data read is larger than the maximum allowed.

2545

# If the span of data read is larger than the maximum allowed.

2546

if maxdist < distance:

2546

if maxdist < distance:

2547

return False

2547

return False

2548

2549

# Bad delta from new delta size:

2549

# Bad delta from new delta size:

2550

#

2550

#

2551

# If the delta size is larger than the target text, storing the

2551

# If the delta size is larger than the target text, storing the

2552

# delta will be inefficient.

2552

# delta will be inefficient.

2553

if textlen < deltainfo.deltalen:

2553

if textlen < deltainfo.deltalen:

2554

return False

2554

return False

2555

2556

# Bad delta from cumulated payload size:

2556

# Bad delta from cumulated payload size:

2557

#

2557

#

2558

# If the sum of delta get larger than K * target text length.

2558

# If the sum of delta get larger than K * target text length.

2559

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2559

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

2560

return False

2560

return False

2561

2562

# Bad delta from chain length:

2562

# Bad delta from chain length:

2563

#

2563

#

2564

# If the number of delta in the chain gets too high.

2564

# If the number of delta in the chain gets too high.

2565

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2565

if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:

2566

return False

2566

return False

2567

2568

# bad delta from intermediate snapshot size limit

2568

# bad delta from intermediate snapshot size limit

2569

#

2569

#

2570

# If an intermediate snapshot size is higher than the limit. The

2570

# If an intermediate snapshot size is higher than the limit. The

2571

# limit exist to prevent endless chain of intermediate delta to be

2571

# limit exist to prevent endless chain of intermediate delta to be

2572

# created.

2572

# created.

2573

if (deltainfo.snapshotdepth is not None and

2573

if (deltainfo.snapshotdepth is not None and

2574

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

2574

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

2575

return False

2575

return False

2576

2577

# bad delta if new intermediate snapshot is larger than the previous

2577

# bad delta if new intermediate snapshot is larger than the previous

2578

# snapshot

2578

# snapshot

2579

if (deltainfo.snapshotdepth

2579

if (deltainfo.snapshotdepth

2580

and self.length(deltainfo.base) < deltainfo.deltalen):

2580

and self.length(deltainfo.base) < deltainfo.deltalen):

2581

return False

2581

return False

2582

2583

return True

2583

return True

2584

2585

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2585

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2586

cachedelta, ifh, dfh, alwayscache=False,

2586

cachedelta, ifh, dfh, alwayscache=False,

2587

deltacomputer=None):

2587

deltacomputer=None):

2588

"""internal function to add revisions to the log

2588

"""internal function to add revisions to the log

2589

2590

see addrevision for argument descriptions.

2590

see addrevision for argument descriptions.

2591

2592

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2592

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2593

2594

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2594

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2595

be used.

2595

be used.

2596

2597

invariants:

2597

invariants:

2598

- rawtext is optional (can be None); if not set, cachedelta must be set.

2598

- rawtext is optional (can be None); if not set, cachedelta must be set.

2599

if both are set, they must correspond to each other.

2599

if both are set, they must correspond to each other.

2600

"""

2600

"""

2601

if node == nullid:

2601

if node == nullid:

2602

raise RevlogError(_("%s: attempt to add null revision") %

2602

raise RevlogError(_("%s: attempt to add null revision") %

2603

(self.indexfile))

2603

(self.indexfile))

2604

if node == wdirid or node in wdirfilenodeids:

2604

if node == wdirid or node in wdirfilenodeids:

2605

raise RevlogError(_("%s: attempt to add wdir revision") %

2605

raise RevlogError(_("%s: attempt to add wdir revision") %

2606

(self.indexfile))

2606

(self.indexfile))

2607

2608

if self._inline:

2608

if self._inline:

2609

fh = ifh

2609

fh = ifh

2610

else:

2610

else:

2611

fh = dfh

2611

fh = dfh

2612

2613

btext = [rawtext]

2613

btext = [rawtext]

2614

2615

curr = len(self)

2615

curr = len(self)

2616

prev = curr - 1

2616

prev = curr - 1

2617

offset = self.end(prev)

2617

offset = self.end(prev)

2618

p1r, p2r = self.rev(p1), self.rev(p2)

2618

p1r, p2r = self.rev(p1), self.rev(p2)

2619

2620

# full versions are inserted when the needed deltas

2620

# full versions are inserted when the needed deltas

2621

# become comparable to the uncompressed text

2621

# become comparable to the uncompressed text

2622

if rawtext is None:

2622

if rawtext is None:

2623

# need rawtext size, before changed by flag processors, which is

2623

# need rawtext size, before changed by flag processors, which is

2624

# the non-raw size. use revlog explicitly to avoid filelog's extra

2624

# the non-raw size. use revlog explicitly to avoid filelog's extra

2625

# logic that might remove metadata size.

2625

# logic that might remove metadata size.

2626

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2626

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2627

cachedelta[1])

2627

cachedelta[1])

2628

else:

2628

else:

2629

textlen = len(rawtext)

2629

textlen = len(rawtext)

2630

2631

if deltacomputer is None:

2631

if deltacomputer is None:

2632

deltacomputer = _deltacomputer(self)

2632

deltacomputer = _deltacomputer(self)

2633

2634

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2634

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2635

2636

# no delta for flag processor revision (see "candelta" for why)

2636

# no delta for flag processor revision (see "candelta" for why)

2637

# not calling candelta since only one revision needs test, also to

2637

# not calling candelta since only one revision needs test, also to

2638

# avoid overhead fetching flags again.

2638

# avoid overhead fetching flags again.

2639

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2639

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2640

deltainfo = None

2640

deltainfo = None

2641

else:

2641

else:

2642

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2642

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2643

2644

if deltainfo is not None:

2644

if deltainfo is not None:

2645

base = deltainfo.base

2645

base = deltainfo.base

2646

chainbase = deltainfo.chainbase

2646

chainbase = deltainfo.chainbase

2647

data = deltainfo.data

2647

data = deltainfo.data

2648

l = deltainfo.deltalen

2648

l = deltainfo.deltalen

2649

else:

2649

else:

2650

rawtext = deltacomputer.buildtext(revinfo, fh)

2650

rawtext = deltacomputer.buildtext(revinfo, fh)

2651

data = self.compress(rawtext)

2651

data = self.compress(rawtext)

2652

l = len(data[1]) + len(data[0])

2652

l = len(data[1]) + len(data[0])

2653

base = chainbase = curr

2653

base = chainbase = curr

2654

2655

e = (offset_type(offset, flags), l, textlen,

2655

e = (offset_type(offset, flags), l, textlen,

2656

base, link, p1r, p2r, node)

2656

base, link, p1r, p2r, node)

2657

self.index.append(e)

2657

self.index.append(e)

2658

self.nodemap[node] = curr

2658

self.nodemap[node] = curr

2659

2660

entry = self._io.packentry(e, self.node, self.version, curr)

2660

entry = self._io.packentry(e, self.node, self.version, curr)

2661

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2661

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2662

2663

if alwayscache and rawtext is None:

2663

if alwayscache and rawtext is None:

2664

rawtext = deltacomputer._buildtext(revinfo, fh)

2664

rawtext = deltacomputer.buildtext(revinfo, fh)

2665

2666

if type(rawtext) == bytes: # only accept immutable objects

2666

if type(rawtext) == bytes: # only accept immutable objects

2667

self._cache = (node, curr, rawtext)

2667

self._cache = (node, curr, rawtext)

2668

self._chainbasecache[curr] = chainbase

2668

self._chainbasecache[curr] = chainbase

2669

return node

2669

return node

2670

2671

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2671

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2672

# Files opened in a+ mode have inconsistent behavior on various

2672

# Files opened in a+ mode have inconsistent behavior on various

2673

# platforms. Windows requires that a file positioning call be made

2673

# platforms. Windows requires that a file positioning call be made

2674

# when the file handle transitions between reads and writes. See

2674

# when the file handle transitions between reads and writes. See

2675

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2675

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2676

# platforms, Python or the platform itself can be buggy. Some versions

2676

# platforms, Python or the platform itself can be buggy. Some versions

2677

# of Solaris have been observed to not append at the end of the file

2677

# of Solaris have been observed to not append at the end of the file

2678

# if the file was seeked to before the end. See issue4943 for more.

2678

# if the file was seeked to before the end. See issue4943 for more.

2679

#

2679

#

2680

# We work around this issue by inserting a seek() before writing.

2680

# We work around this issue by inserting a seek() before writing.

2681

# Note: This is likely not necessary on Python 3.

2681

# Note: This is likely not necessary on Python 3.

2682

ifh.seek(0, os.SEEK_END)

2682

ifh.seek(0, os.SEEK_END)

2683

if dfh:

2683

if dfh:

2684

dfh.seek(0, os.SEEK_END)

2684

dfh.seek(0, os.SEEK_END)

2685

2686

curr = len(self) - 1

2686

curr = len(self) - 1

2687

if not self._inline:

2687

if not self._inline:

2688

transaction.add(self.datafile, offset)

2688

transaction.add(self.datafile, offset)

2689

transaction.add(self.indexfile, curr * len(entry))

2689

transaction.add(self.indexfile, curr * len(entry))

2690

if data[0]:

2690

if data[0]:

2691

dfh.write(data[0])

2691

dfh.write(data[0])

2692

dfh.write(data[1])

2692

dfh.write(data[1])

2693

ifh.write(entry)

2693

ifh.write(entry)

2694

else:

2694

else:

2695

offset += curr * self._io.size

2695

offset += curr * self._io.size

2696

transaction.add(self.indexfile, offset, curr)

2696

transaction.add(self.indexfile, offset, curr)

2697

ifh.write(entry)

2697

ifh.write(entry)

2698

ifh.write(data[0])

2698

ifh.write(data[0])

2699

ifh.write(data[1])

2699

ifh.write(data[1])

2700

self._enforceinlinesize(transaction, ifh)

2700

self._enforceinlinesize(transaction, ifh)

2701

2702

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2702

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2703

"""

2703

"""

2704

add a delta group

2704

add a delta group

2705

2706

given a set of deltas, add them to the revision log. the

2706

given a set of deltas, add them to the revision log. the

2707

first delta is against its parent, which should be in our

2707

first delta is against its parent, which should be in our

2708

log, the rest are against the previous delta.

2708

log, the rest are against the previous delta.

2709

2710

If ``addrevisioncb`` is defined, it will be called with arguments of

2710

If ``addrevisioncb`` is defined, it will be called with arguments of

2711

this revlog and the node that was added.

2711

this revlog and the node that was added.

2712

"""

2712

"""

2713

2714

nodes = []

2714

nodes = []

2715

2716

r = len(self)

2716

r = len(self)

2717

end = 0

2717

end = 0

2718

if r:

2718

if r:

2719

end = self.end(r - 1)

2719

end = self.end(r - 1)

2720

ifh = self._indexfp("a+")

2720

ifh = self._indexfp("a+")

2721

isize = r * self._io.size

2721

isize = r * self._io.size

2722

if self._inline:

2722

if self._inline:

2723

transaction.add(self.indexfile, end + isize, r)

2723

transaction.add(self.indexfile, end + isize, r)

2724

dfh = None

2724

dfh = None

2725

else:

2725

else:

2726

transaction.add(self.indexfile, isize, r)

2726

transaction.add(self.indexfile, isize, r)

2727

transaction.add(self.datafile, end)

2727

transaction.add(self.datafile, end)

2728

dfh = self._datafp("a+")

2728

dfh = self._datafp("a+")

2729

def flush():

2729

def flush():

2730

if dfh:

2730

if dfh:

2731

dfh.flush()

2731

dfh.flush()

2732

ifh.flush()

2732

ifh.flush()

2733

try:

2733

try:

2734

deltacomputer = _deltacomputer(self)

2734

deltacomputer = _deltacomputer(self)

2735

# loop through our set of deltas

2735

# loop through our set of deltas

2736

for data in deltas:

2736

for data in deltas:

2737

node, p1, p2, linknode, deltabase, delta, flags = data

2737

node, p1, p2, linknode, deltabase, delta, flags = data

2738

link = linkmapper(linknode)

2738

link = linkmapper(linknode)

2739

flags = flags or REVIDX_DEFAULT_FLAGS

2739

flags = flags or REVIDX_DEFAULT_FLAGS

2740

2741

nodes.append(node)

2741

nodes.append(node)

2742

2743

if node in self.nodemap:

2743

if node in self.nodemap:

2744

# this can happen if two branches make the same change

2744

# this can happen if two branches make the same change

2745

continue

2745

continue

2746

2747

for p in (p1, p2):

2747

for p in (p1, p2):

2748

if p not in self.nodemap:

2748

if p not in self.nodemap:

2749

raise LookupError(p, self.indexfile,

2749

raise LookupError(p, self.indexfile,

2750

_('unknown parent'))

2750

_('unknown parent'))

2751

2752

if deltabase not in self.nodemap:

2752

if deltabase not in self.nodemap:

2753

raise LookupError(deltabase, self.indexfile,

2753

raise LookupError(deltabase, self.indexfile,

2754

_('unknown delta base'))

2754

_('unknown delta base'))

2755

2756

baserev = self.rev(deltabase)

2756

baserev = self.rev(deltabase)

2757

2758

if baserev != nullrev and self.iscensored(baserev):

2758

if baserev != nullrev and self.iscensored(baserev):

2759

# if base is censored, delta must be full replacement in a

2759

# if base is censored, delta must be full replacement in a

2760

# single patch operation

2760

# single patch operation

2761

hlen = struct.calcsize(">lll")

2761

hlen = struct.calcsize(">lll")

2762

oldlen = self.rawsize(baserev)

2762

oldlen = self.rawsize(baserev)

2763

newlen = len(delta) - hlen

2763

newlen = len(delta) - hlen

2764

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2764

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2765

raise error.CensoredBaseError(self.indexfile,

2765

raise error.CensoredBaseError(self.indexfile,

2766

self.node(baserev))

2766

self.node(baserev))

2767

2768

if not flags and self._peek_iscensored(baserev, delta, flush):

2768

if not flags and self._peek_iscensored(baserev, delta, flush):

2769

flags |= REVIDX_ISCENSORED

2769

flags |= REVIDX_ISCENSORED

2770

2771

# We assume consumers of addrevisioncb will want to retrieve

2771

# We assume consumers of addrevisioncb will want to retrieve

2772

# the added revision, which will require a call to

2772

# the added revision, which will require a call to

2773

# revision(). revision() will fast path if there is a cache

2773

# revision(). revision() will fast path if there is a cache

2774

# hit. So, we tell _addrevision() to always cache in this case.

2774

# hit. So, we tell _addrevision() to always cache in this case.

2775

# We're only using addgroup() in the context of changegroup

2775

# We're only using addgroup() in the context of changegroup

2776

# generation so the revision data can always be handled as raw

2776

# generation so the revision data can always be handled as raw

2777

# by the flagprocessor.

2777

# by the flagprocessor.

2778

self._addrevision(node, None, transaction, link,

2778

self._addrevision(node, None, transaction, link,

2779

p1, p2, flags, (baserev, delta),

2779

p1, p2, flags, (baserev, delta),

2780

ifh, dfh,

2780

ifh, dfh,

2781

alwayscache=bool(addrevisioncb),

2781

alwayscache=bool(addrevisioncb),

2782

deltacomputer=deltacomputer)

2782

deltacomputer=deltacomputer)

2783

2784

if addrevisioncb:

2784

if addrevisioncb:

2785

addrevisioncb(self, node)

2785

addrevisioncb(self, node)

2786

2787

if not dfh and not self._inline:

2787

if not dfh and not self._inline:

2788

# addrevision switched from inline to conventional

2788

# addrevision switched from inline to conventional

2789

# reopen the index

2789

# reopen the index

2790

ifh.close()

2790

ifh.close()

2791

dfh = self._datafp("a+")

2791

dfh = self._datafp("a+")

2792

ifh = self._indexfp("a+")

2792

ifh = self._indexfp("a+")

2793

finally:

2793

finally:

2794

if dfh:

2794

if dfh:

2795

dfh.close()

2795

dfh.close()

2796

ifh.close()

2796

ifh.close()

2797

2798

return nodes

2798

return nodes

2799

2800

def iscensored(self, rev):

2800

def iscensored(self, rev):

2801

"""Check if a file revision is censored."""

2801

"""Check if a file revision is censored."""

2802

if not self._censorable:

2802

if not self._censorable:

2803

return False

2803

return False

2804

2805

return self.flags(rev) & REVIDX_ISCENSORED

2805

return self.flags(rev) & REVIDX_ISCENSORED

2806

2807

def _peek_iscensored(self, baserev, delta, flush):

2807

def _peek_iscensored(self, baserev, delta, flush):

2808

"""Quickly check if a delta produces a censored revision."""

2808

"""Quickly check if a delta produces a censored revision."""

2809

if not self._censorable:

2809

if not self._censorable:

2810

return False

2810

return False

2811

2812

# Fragile heuristic: unless new file meta keys are added alphabetically

2812

# Fragile heuristic: unless new file meta keys are added alphabetically

2813

# preceding "censored", all censored revisions are prefixed by

2813

# preceding "censored", all censored revisions are prefixed by

2814

# "\1\ncensored:". A delta producing such a censored revision must be a

2814

# "\1\ncensored:". A delta producing such a censored revision must be a

2815

# full-replacement delta, so we inspect the first and only patch in the

2815

# full-replacement delta, so we inspect the first and only patch in the

2816

# delta for this prefix.

2816

# delta for this prefix.

2817

hlen = struct.calcsize(">lll")

2817

hlen = struct.calcsize(">lll")

2818

if len(delta) <= hlen:

2818

if len(delta) <= hlen:

2819

return False

2819

return False

2820

2821

oldlen = self.rawsize(baserev)

2821

oldlen = self.rawsize(baserev)

2822

newlen = len(delta) - hlen

2822

newlen = len(delta) - hlen

2823

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2823

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2824

return False

2824

return False

2825

2826

add = "\1\ncensored:"

2826

add = "\1\ncensored:"

2827

addlen = len(add)

2827

addlen = len(add)

2828

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2828

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2829

2830

def getstrippoint(self, minlink):

2830

def getstrippoint(self, minlink):

2831

"""find the minimum rev that must be stripped to strip the linkrev

2831

"""find the minimum rev that must be stripped to strip the linkrev

2832

2833

Returns a tuple containing the minimum rev and a set of all revs that

2833

Returns a tuple containing the minimum rev and a set of all revs that

2834

have linkrevs that will be broken by this strip.

2834

have linkrevs that will be broken by this strip.

2835

"""

2835

"""

2836

brokenrevs = set()

2836

brokenrevs = set()

2837

strippoint = len(self)

2837

strippoint = len(self)

2838

2839

heads = {}

2839

heads = {}

2840

futurelargelinkrevs = set()

2840

futurelargelinkrevs = set()

2841

for head in self.headrevs():

2841

for head in self.headrevs():

2842

headlinkrev = self.linkrev(head)

2842

headlinkrev = self.linkrev(head)

2843

heads[head] = headlinkrev

2843

heads[head] = headlinkrev

2844

if headlinkrev >= minlink:

2844

if headlinkrev >= minlink:

2845

futurelargelinkrevs.add(headlinkrev)

2845

futurelargelinkrevs.add(headlinkrev)

2846

2847

# This algorithm involves walking down the rev graph, starting at the

2847

# This algorithm involves walking down the rev graph, starting at the

2848

# heads. Since the revs are topologically sorted according to linkrev,

2848

# heads. Since the revs are topologically sorted according to linkrev,

2849

# once all head linkrevs are below the minlink, we know there are

2849

# once all head linkrevs are below the minlink, we know there are

2850

# no more revs that could have a linkrev greater than minlink.

2850

# no more revs that could have a linkrev greater than minlink.

2851

# So we can stop walking.

2851

# So we can stop walking.

2852

while futurelargelinkrevs:

2852

while futurelargelinkrevs:

2853

strippoint -= 1

2853

strippoint -= 1

2854

linkrev = heads.pop(strippoint)

2854

linkrev = heads.pop(strippoint)

2855

2856

if linkrev < minlink:

2856

if linkrev < minlink:

2857

brokenrevs.add(strippoint)

2857

brokenrevs.add(strippoint)

2858

else:

2858

else:

2859

futurelargelinkrevs.remove(linkrev)

2859

futurelargelinkrevs.remove(linkrev)

2860

2861

for p in self.parentrevs(strippoint):

2861

for p in self.parentrevs(strippoint):

2862

if p != nullrev:

2862

if p != nullrev:

2863

plinkrev = self.linkrev(p)

2863

plinkrev = self.linkrev(p)

2864

heads[p] = plinkrev

2864

heads[p] = plinkrev

2865

if plinkrev >= minlink:

2865

if plinkrev >= minlink:

2866

futurelargelinkrevs.add(plinkrev)

2866

futurelargelinkrevs.add(plinkrev)

2867

2868

return strippoint, brokenrevs

2868

return strippoint, brokenrevs

2869

2870

def strip(self, minlink, transaction):

2870

def strip(self, minlink, transaction):

2871

"""truncate the revlog on the first revision with a linkrev >= minlink

2871

"""truncate the revlog on the first revision with a linkrev >= minlink

2872

2873

This function is called when we're stripping revision minlink and

2873

This function is called when we're stripping revision minlink and

2874

its descendants from the repository.

2874

its descendants from the repository.

2875

2876

We have to remove all revisions with linkrev >= minlink, because

2876

We have to remove all revisions with linkrev >= minlink, because

2877

the equivalent changelog revisions will be renumbered after the

2877

the equivalent changelog revisions will be renumbered after the

2878

strip.

2878

strip.

2879

2880

So we truncate the revlog on the first of these revisions, and

2880

So we truncate the revlog on the first of these revisions, and

2881

trust that the caller has saved the revisions that shouldn't be

2881

trust that the caller has saved the revisions that shouldn't be

2882

removed and that it'll re-add them after this truncation.

2882

removed and that it'll re-add them after this truncation.

2883

"""

2883

"""

2884

if len(self) == 0:

2884

if len(self) == 0:

2885

return

2885

return

2886

2887

rev, _ = self.getstrippoint(minlink)

2887

rev, _ = self.getstrippoint(minlink)

2888

if rev == len(self):

2888

if rev == len(self):

2889

return

2889

return

2890

2891

# first truncate the files on disk

2891

# first truncate the files on disk

2892

end = self.start(rev)

2892

end = self.start(rev)

2893

if not self._inline:

2893

if not self._inline:

2894

transaction.add(self.datafile, end)

2894

transaction.add(self.datafile, end)

2895

end = rev * self._io.size

2895

end = rev * self._io.size

2896

else:

2896

else:

2897

end += rev * self._io.size

2897

end += rev * self._io.size

2898

2899

transaction.add(self.indexfile, end)

2899

transaction.add(self.indexfile, end)

2900

2901

# then reset internal state in memory to forget those revisions

2901

# then reset internal state in memory to forget those revisions

2902

self._cache = None

2902

self._cache = None

2903

self._chaininfocache = {}

2903

self._chaininfocache = {}

2904

self._chunkclear()

2904

self._chunkclear()

2905

for x in pycompat.xrange(rev, len(self)):

2905

for x in pycompat.xrange(rev, len(self)):

2906

del self.nodemap[self.node(x)]

2906

del self.nodemap[self.node(x)]

2907

2908

del self.index[rev:-1]

2908

del self.index[rev:-1]

2909

self._nodepos = None

2909

self._nodepos = None

2910

2911

def checksize(self):

2911

def checksize(self):

2912

expected = 0

2912

expected = 0

2913

if len(self):

2913

if len(self):

2914

expected = max(0, self.end(len(self) - 1))

2914

expected = max(0, self.end(len(self) - 1))

2915

2916

try:

2916

try:

2917

with self._datafp() as f:

2917

with self._datafp() as f:

2918

f.seek(0, 2)

2918

f.seek(0, 2)

2919

actual = f.tell()

2919

actual = f.tell()

2920

dd = actual - expected

2920

dd = actual - expected

2921

except IOError as inst:

2921

except IOError as inst:

2922

if inst.errno != errno.ENOENT:

2922

if inst.errno != errno.ENOENT:

2923

raise

2923

raise

2924

dd = 0

2924

dd = 0

2925

2926

try:

2926

try:

2927

f = self.opener(self.indexfile)

2927

f = self.opener(self.indexfile)

2928

f.seek(0, 2)

2928

f.seek(0, 2)

2929

actual = f.tell()

2929

actual = f.tell()

2930

f.close()

2930

f.close()

2931

s = self._io.size

2931

s = self._io.size

2932

i = max(0, actual // s)

2932

i = max(0, actual // s)

2933

di = actual - (i * s)

2933

di = actual - (i * s)

2934

if self._inline:

2934

if self._inline:

2935

databytes = 0

2935

databytes = 0

2936

for r in self:

2936

for r in self:

2937

databytes += max(0, self.length(r))

2937

databytes += max(0, self.length(r))

2938

dd = 0

2938

dd = 0

2939

di = actual - len(self) * s - databytes

2939

di = actual - len(self) * s - databytes

2940

except IOError as inst:

2940

except IOError as inst:

2941

if inst.errno != errno.ENOENT:

2941

if inst.errno != errno.ENOENT:

2942

raise

2942

raise

2943

di = 0

2943

di = 0

2944

2945

return (dd, di)

2945

return (dd, di)

2946

2947

def files(self):

2947

def files(self):

2948

res = [self.indexfile]

2948

res = [self.indexfile]

2949

if not self._inline:

2949

if not self._inline:

2950

res.append(self.datafile)

2950

res.append(self.datafile)

2951

return res

2951

return res

2952

2953

DELTAREUSEALWAYS = 'always'

2953

DELTAREUSEALWAYS = 'always'

2954

DELTAREUSESAMEREVS = 'samerevs'

2954

DELTAREUSESAMEREVS = 'samerevs'

2955

DELTAREUSENEVER = 'never'

2955

DELTAREUSENEVER = 'never'

2956

2957

DELTAREUSEFULLADD = 'fulladd'

2957

DELTAREUSEFULLADD = 'fulladd'

2958

2959

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2959

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2960

2961

def clone(self, tr, destrevlog, addrevisioncb=None,

2961

def clone(self, tr, destrevlog, addrevisioncb=None,

2962

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2962

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2963

"""Copy this revlog to another, possibly with format changes.

2963

"""Copy this revlog to another, possibly with format changes.

2964

2965

The destination revlog will contain the same revisions and nodes.

2965

The destination revlog will contain the same revisions and nodes.

2966

However, it may not be bit-for-bit identical due to e.g. delta encoding

2966

However, it may not be bit-for-bit identical due to e.g. delta encoding

2967

differences.

2967

differences.

2968

2969

The ``deltareuse`` argument control how deltas from the existing revlog

2969

The ``deltareuse`` argument control how deltas from the existing revlog

2970

are preserved in the destination revlog. The argument can have the

2970

are preserved in the destination revlog. The argument can have the

2971

following values:

2971

following values:

2972

2973

DELTAREUSEALWAYS

2973

DELTAREUSEALWAYS

2974

Deltas will always be reused (if possible), even if the destination

2974

Deltas will always be reused (if possible), even if the destination

2975

revlog would not select the same revisions for the delta. This is the

2975

revlog would not select the same revisions for the delta. This is the

2976

fastest mode of operation.

2976

fastest mode of operation.

2977

DELTAREUSESAMEREVS

2977

DELTAREUSESAMEREVS

2978

Deltas will be reused if the destination revlog would pick the same

2978

Deltas will be reused if the destination revlog would pick the same

2979

revisions for the delta. This mode strikes a balance between speed

2979

revisions for the delta. This mode strikes a balance between speed

2980

and optimization.

2980

and optimization.

2981

DELTAREUSENEVER

2981

DELTAREUSENEVER

2982

Deltas will never be reused. This is the slowest mode of execution.

2982

Deltas will never be reused. This is the slowest mode of execution.

2983

This mode can be used to recompute deltas (e.g. if the diff/delta

2983

This mode can be used to recompute deltas (e.g. if the diff/delta

2984

algorithm changes).

2984

algorithm changes).

2985

2986

Delta computation can be slow, so the choice of delta reuse policy can

2986

Delta computation can be slow, so the choice of delta reuse policy can

2987

significantly affect run time.

2987

significantly affect run time.

2988

2989

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2989

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2990

two extremes. Deltas will be reused if they are appropriate. But if the

2990

two extremes. Deltas will be reused if they are appropriate. But if the

2991

delta could choose a better revision, it will do so. This means if you

2991

delta could choose a better revision, it will do so. This means if you

2992

are converting a non-generaldelta revlog to a generaldelta revlog,

2992

are converting a non-generaldelta revlog to a generaldelta revlog,

2993

deltas will be recomputed if the delta's parent isn't a parent of the

2993

deltas will be recomputed if the delta's parent isn't a parent of the

2994

revision.

2994

revision.

2995

2996

In addition to the delta policy, the ``deltabothparents`` argument

2996

In addition to the delta policy, the ``deltabothparents`` argument

2997

controls whether to compute deltas against both parents for merges.

2997

controls whether to compute deltas against both parents for merges.

2998

By default, the current default is used.

2998

By default, the current default is used.

2999

"""

2999

"""

3000

if deltareuse not in self.DELTAREUSEALL:

3000

if deltareuse not in self.DELTAREUSEALL:

3001

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

3001

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

3002

3003

if len(destrevlog):

3003

if len(destrevlog):

3004

raise ValueError(_('destination revlog is not empty'))

3004

raise ValueError(_('destination revlog is not empty'))

3005

3006

if getattr(self, 'filteredrevs', None):

3006

if getattr(self, 'filteredrevs', None):

3007

raise ValueError(_('source revlog has filtered revisions'))

3007

raise ValueError(_('source revlog has filtered revisions'))

3008

if getattr(destrevlog, 'filteredrevs', None):

3008

if getattr(destrevlog, 'filteredrevs', None):

3009

raise ValueError(_('destination revlog has filtered revisions'))

3009

raise ValueError(_('destination revlog has filtered revisions'))

3010

3011

# lazydeltabase controls whether to reuse a cached delta, if possible.

3011

# lazydeltabase controls whether to reuse a cached delta, if possible.

3012

oldlazydeltabase = destrevlog._lazydeltabase

3012

oldlazydeltabase = destrevlog._lazydeltabase

3013

oldamd = destrevlog._deltabothparents

3013

oldamd = destrevlog._deltabothparents

3014

3015

try:

3015

try:

3016

if deltareuse == self.DELTAREUSEALWAYS:

3016

if deltareuse == self.DELTAREUSEALWAYS:

3017

destrevlog._lazydeltabase = True

3017

destrevlog._lazydeltabase = True

3018

elif deltareuse == self.DELTAREUSESAMEREVS:

3018

elif deltareuse == self.DELTAREUSESAMEREVS:

3019

destrevlog._lazydeltabase = False

3019

destrevlog._lazydeltabase = False

3020

3021

destrevlog._deltabothparents = deltabothparents or oldamd

3021

destrevlog._deltabothparents = deltabothparents or oldamd

3022

3023

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

3023

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

3024

self.DELTAREUSESAMEREVS)

3024

self.DELTAREUSESAMEREVS)

3025

3026

deltacomputer = _deltacomputer(destrevlog)

3026

deltacomputer = _deltacomputer(destrevlog)

3027

index = self.index

3027

index = self.index

3028

for rev in self:

3028

for rev in self:

3029

entry = index[rev]

3029

entry = index[rev]

3030

3031

# Some classes override linkrev to take filtered revs into

3031

# Some classes override linkrev to take filtered revs into

3032

# account. Use raw entry from index.

3032

# account. Use raw entry from index.

3033

flags = entry[0] & 0xffff

3033

flags = entry[0] & 0xffff

3034

linkrev = entry[4]

3034

linkrev = entry[4]

3035

p1 = index[entry[5]][7]

3035

p1 = index[entry[5]][7]

3036

p2 = index[entry[6]][7]

3036

p2 = index[entry[6]][7]

3037

node = entry[7]

3037

node = entry[7]

3038

3039

# (Possibly) reuse the delta from the revlog if allowed and

3039

# (Possibly) reuse the delta from the revlog if allowed and

3040

# the revlog chunk is a delta.

3040

# the revlog chunk is a delta.

3041

cachedelta = None

3041

cachedelta = None

3042

rawtext = None

3042

rawtext = None

3043

if populatecachedelta:

3043

if populatecachedelta:

3044

dp = self.deltaparent(rev)

3044

dp = self.deltaparent(rev)

3045

if dp != nullrev:

3045

if dp != nullrev:

3046

cachedelta = (dp, bytes(self._chunk(rev)))

3046

cachedelta = (dp, bytes(self._chunk(rev)))

3047

3048

if not cachedelta:

3048

if not cachedelta:

3049

rawtext = self.revision(rev, raw=True)

3049

rawtext = self.revision(rev, raw=True)

3050

3051

3052

if deltareuse == self.DELTAREUSEFULLADD:

3052

if deltareuse == self.DELTAREUSEFULLADD:

3053

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3053

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

3054

cachedelta=cachedelta,

3054

cachedelta=cachedelta,

3055

node=node, flags=flags,

3055

node=node, flags=flags,

3056

deltacomputer=deltacomputer)

3056

deltacomputer=deltacomputer)

3057

else:

3057

else:

3058

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3058

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

3059

checkambig=False)

3059

checkambig=False)

3060

dfh = None

3060

dfh = None

3061

if not destrevlog._inline:

3061

if not destrevlog._inline:

3062

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3062

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

3063

try:

3063

try:

3064

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3064

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

3065

p2, flags, cachedelta, ifh, dfh,

3065

p2, flags, cachedelta, ifh, dfh,

3066

deltacomputer=deltacomputer)

3066

deltacomputer=deltacomputer)

3067

finally:

3067

finally:

3068

if dfh:

3068

if dfh:

3069

dfh.close()

3069

dfh.close()

3070

ifh.close()

3070

ifh.close()

3071

3072

if addrevisioncb:

3072

if addrevisioncb:

3073

addrevisioncb(self, rev, node)

3073

addrevisioncb(self, rev, node)

3074

finally:

3074

finally:

3075

destrevlog._lazydeltabase = oldlazydeltabase

3075

destrevlog._lazydeltabase = oldlazydeltabase

3076

destrevlog._deltabothparents = oldamd

3076

destrevlog._deltabothparents = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullhex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def _segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> _segmentspan(revlog, [0, 4])
                 >>> _segmentspan(revlog, [3, 4])
                 >>> _segmentspan(revlog, [1, 2, 3,])
                 >>> _segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = _segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._deltabothparents and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     if not revinfo.textlen:
                         return None # empty file do not need delta
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     deltainfo = None
                     deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         # filter out delta base that will never produce good delta
                         candidaterevs = [r for r in candidaterevs
                                          if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # skip over empty delta (no need to include them in a chain)
                             while candidaterev != nullrev and not deltalength(candidaterev):
                                 candidaterev = deltaparent(candidaterev)
                             # no need to try a delta against nullid, this will be handled
                             # by fulltext later.
                             if candidaterev == nullrev:
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     length = len(self)
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                         if stop > length:
                             stop = length
                     else:
                         stop = length
                     return pycompat.xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                              _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if nullhex.startswith(id):
                                 nl.append(nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise AmbiguousPrefixLookupError(id, self.indexfile,
                                                                  _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
                     p1, p2 = self.parentrevs(rev)
                     if deltap in (p1, p2):
                         return False
                     return self.issnapshot(deltap)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     if self._sparserevlog:
                         # As sparse-read will be used, we can consider that the distance,
                         # instead of being the span of the whole chunk,
                         # is the span of the largest read chunk
                         base = deltainfo.base
                         if base != nullrev:
                             deltachain = self._deltachain(base)[0]
                         else:
                             deltachain = []
                         # search for the first non-snapshot revision
                         for idx, r in enumerate(deltachain):
                             if not self.issnapshot(r):
                                 break
                         deltachain = deltachain[idx:]
                         chunks = _slicechunk(self, deltachain, deltainfo)
                         all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
                         distance = max(all_span)
                     else:
                         distance = deltainfo.distance
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if self._sparserevlog and maxdist < self._srmingapsize:
                         # In multiple place, we are ignoring irrelevant data range below a
                         # certain size. Be also apply this tradeoff here and relax span
                         # constraint for small enought content.
                         maxdist = self._srmingapsize
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     if maxdist < distance:
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the
                     #   delta will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
                         return False
                     # bad delta from intermediate snapshot size limit
                     #
                     #   If an intermediate snapshot size is higher than the limit.  The
                     #   limit exist to prevent endless chain of intermediate delta to be
                     #   created.
                     if (deltainfo.snapshotdepth is not None and
                             (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                         return False
                     # bad delta if new intermediate snapshot is larger than the previous
                     # snapshot
                     if (deltainfo.snapshotdepth
                             and self.length(deltainfo.base) < deltainfo.deltalen):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
-                        rawtext = deltacomputer._buildtext(revinfo, fh)
+                        rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd