upstream/mercurial-mirror Commit - r40479:bafa1c4b

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import heapq

13

import heapq

14

import struct

14

import struct

15

16

# import stuff from node for others to import from revlog

16

# import stuff from node for others to import from revlog

17

from ..node import (

17

from ..node import (

18

nullrev,

18

nullrev,

19

)

19

)

20

from ..i18n import _

20

from ..i18n import _

21

22

from .constants import (

22

from .constants import (

23

REVIDX_ISCENSORED,

23

REVIDX_ISCENSORED,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

25

)

25

)

26

27

from ..thirdparty import (

27

from ..thirdparty import (

28

attr,

28

attr,

29

)

29

)

30

31

from .. import (

31

from .. import (

32

error,

32

error,

33

mdiff,

33

mdiff,

34

)

34

)

35

36

# maximum <delta-chain-data>/<revision-text-length> ratio

36

# maximum <delta-chain-data>/<revision-text-length> ratio

37

LIMIT_DELTA2TEXT = 2

37

LIMIT_DELTA2TEXT = 2

38

39

class _testrevlog(object):

39

class _testrevlog(object):

40

"""minimalist fake revlog to use in doctests"""

40

"""minimalist fake revlog to use in doctests"""

41

42

def __init__(self, data, density=0.5, mingap=0):

42

def __init__(self, data, density=0.5, mingap=0):

43

"""data is an list of revision payload boundaries"""

43

"""data is an list of revision payload boundaries"""

44

self._data = data

44

self._data = data

45

self._srdensitythreshold = density

45

self._srdensitythreshold = density

46

self._srmingapsize = mingap

46

self._srmingapsize = mingap

47

48

def start(self, rev):

48

def start(self, rev):

49

if rev == 0:

49

if rev == 0:

50

return 0

50

return 0

51

return self._data[rev - 1]

51

return self._data[rev - 1]

52

53

def end(self, rev):

53

def end(self, rev):

54

return self._data[rev]

54

return self._data[rev]

55

56

def length(self, rev):

56

def length(self, rev):

57

return self.end(rev) - self.start(rev)

57

return self.end(rev) - self.start(rev)

58

59

def __len__(self):

59

def __len__(self):

60

return len(self._data)

60

return len(self._data)

61

62

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

62

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

63

"""slice revs to reduce the amount of unrelated data to be read from disk.

63

"""slice revs to reduce the amount of unrelated data to be read from disk.

64

65

``revs`` is sliced into groups that should be read in one time.

65

``revs`` is sliced into groups that should be read in one time.

66

Assume that revs are sorted.

66

Assume that revs are sorted.

67

68

The initial chunk is sliced until the overall density (payload/chunks-span

68

The initial chunk is sliced until the overall density (payload/chunks-span

69

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

69

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

70

`revlog._srmingapsize` is skipped.

70

`revlog._srmingapsize` is skipped.

71

72

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

72

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

73

For consistency with other slicing choice, this limit won't go lower than

73

For consistency with other slicing choice, this limit won't go lower than

74

`revlog._srmingapsize`.

74

`revlog._srmingapsize`.

75

76

If individual revisions chunk are larger than this limit, they will still

76

If individual revisions chunk are larger than this limit, they will still

77

be raised individually.

77

be raised individually.

78

79

>>> revlog = _testrevlog([

79

>>> revlog = _testrevlog([

80

... 5, #00 (5)

80

... 5, #00 (5)

81

... 10, #01 (5)

81

... 10, #01 (5)

82

... 12, #02 (2)

82

... 12, #02 (2)

83

... 12, #03 (empty)

83

... 12, #03 (empty)

84

... 27, #04 (15)

84

... 27, #04 (15)

85

... 31, #05 (4)

85

... 31, #05 (4)

86

... 31, #06 (empty)

86

... 31, #06 (empty)

87

... 42, #07 (11)

87

... 42, #07 (11)

88

... 47, #08 (5)

88

... 47, #08 (5)

89

... 47, #09 (empty)

89

... 47, #09 (empty)

90

... 48, #10 (1)

90

... 48, #10 (1)

91

... 51, #11 (3)

91

... 51, #11 (3)

92

... 74, #12 (23)

92

... 74, #12 (23)

93

... 85, #13 (11)

93

... 85, #13 (11)

94

... 86, #14 (1)

94

... 86, #14 (1)

95

... 91, #15 (5)

95

... 91, #15 (5)

96

... ])

96

... ])

97

98

>>> list(slicechunk(revlog, list(range(16))))

98

>>> list(slicechunk(revlog, list(range(16))))

99

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

99

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

100

>>> list(slicechunk(revlog, [0, 15]))

100

>>> list(slicechunk(revlog, [0, 15]))

101

[[0], [15]]

101

[[0], [15]]

102

>>> list(slicechunk(revlog, [0, 11, 15]))

102

>>> list(slicechunk(revlog, [0, 11, 15]))

103

[[0], [11], [15]]

103

[[0], [11], [15]]

104

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

104

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

105

[[0], [11, 13, 15]]

105

[[0], [11, 13, 15]]

106

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

106

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

107

[[1, 2], [5, 8, 10, 11], [14]]

107

[[1, 2], [5, 8, 10, 11], [14]]

108

109

Slicing with a maximum chunk size

109

Slicing with a maximum chunk size

110

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

110

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

111

[[0], [11], [13], [15]]

111

[[0], [11], [13], [15]]

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

113

[[0], [11], [13, 15]]

113

[[0], [11], [13, 15]]

114

"""

114

"""

115

if targetsize is not None:

115

if targetsize is not None:

116

targetsize = max(targetsize, revlog._srmingapsize)

116

targetsize = max(targetsize, revlog._srmingapsize)

117

# targetsize should not be specified when evaluating delta candidates:

117

# targetsize should not be specified when evaluating delta candidates:

118

# * targetsize is used to ensure we stay within specification when reading,

118

# * targetsize is used to ensure we stay within specification when reading,

119

# * deltainfo is used to pick are good delta chain when writing.

119

# * deltainfo is used to pick are good delta chain when writing.

120

if not (deltainfo is None or targetsize is None):

120

if not (deltainfo is None or targetsize is None):

121

msg = 'cannot use `targetsize` with a `deltainfo`'

121

msg = 'cannot use `targetsize` with a `deltainfo`'

122

raise error.ProgrammingError(msg)

122

raise error.ProgrammingError(msg)

123

for chunk in _slicechunktodensity(revlog, revs,

123

for chunk in _slicechunktodensity(revlog, revs,

124

deltainfo,

124

deltainfo,

125

revlog._srdensitythreshold,

125

revlog._srdensitythreshold,

126

revlog._srmingapsize):

126

revlog._srmingapsize):

127

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

127

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

128

yield subchunk

128

yield subchunk

129

130

def _slicechunktosize(revlog, revs, targetsize=None):

130

def _slicechunktosize(revlog, revs, targetsize=None):

131

"""slice revs to match the target size

131

"""slice revs to match the target size

132

133

This is intended to be used on chunk that density slicing selected by that

133

This is intended to be used on chunk that density slicing selected by that

134

are still too large compared to the read garantee of revlog. This might

134

are still too large compared to the read garantee of revlog. This might

135

happens when "minimal gap size" interrupted the slicing or when chain are

135

happens when "minimal gap size" interrupted the slicing or when chain are

136

built in a way that create large blocks next to each other.

136

built in a way that create large blocks next to each other.

137

138

>>> revlog = _testrevlog([

138

>>> revlog = _testrevlog([

139

... 3, #0 (3)

139

... 3, #0 (3)

140

... 5, #1 (2)

140

... 5, #1 (2)

141

... 6, #2 (1)

141

... 6, #2 (1)

142

... 8, #3 (2)

142

... 8, #3 (2)

143

... 8, #4 (empty)

143

... 8, #4 (empty)

144

... 11, #5 (3)

144

... 11, #5 (3)

145

... 12, #6 (1)

145

... 12, #6 (1)

146

... 13, #7 (1)

146

... 13, #7 (1)

147

... 14, #8 (1)

147

... 14, #8 (1)

148

... ])

148

... ])

149

150

Cases where chunk is already small enough

150

Cases where chunk is already small enough

151

>>> list(_slicechunktosize(revlog, [0], 3))

151

>>> list(_slicechunktosize(revlog, [0], 3))

152

[[0]]

152

[[0]]

153

>>> list(_slicechunktosize(revlog, [6, 7], 3))

153

>>> list(_slicechunktosize(revlog, [6, 7], 3))

154

[[6, 7]]

154

[[6, 7]]

155

>>> list(_slicechunktosize(revlog, [0], None))

155

>>> list(_slicechunktosize(revlog, [0], None))

156

[[0]]

156

[[0]]

157

>>> list(_slicechunktosize(revlog, [6, 7], None))

157

>>> list(_slicechunktosize(revlog, [6, 7], None))

158

[[6, 7]]

158

[[6, 7]]

159

160

cases where we need actual slicing

160

cases where we need actual slicing

161

>>> list(_slicechunktosize(revlog, [0, 1], 3))

161

>>> list(_slicechunktosize(revlog, [0, 1], 3))

162

[[0], [1]]

162

[[0], [1]]

163

>>> list(_slicechunktosize(revlog, [1, 3], 3))

163

>>> list(_slicechunktosize(revlog, [1, 3], 3))

164

[[1], [3]]

164

[[1], [3]]

165

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

165

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

166

[[1, 2], [3]]

166

[[1, 2], [3]]

167

>>> list(_slicechunktosize(revlog, [3, 5], 3))

167

>>> list(_slicechunktosize(revlog, [3, 5], 3))

168

[[3], [5]]

168

[[3], [5]]

169

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

169

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

170

[[3], [5]]

170

[[3], [5]]

171

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

171

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

172

[[5], [6, 7, 8]]

172

[[5], [6, 7, 8]]

173

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

173

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

174

[[0], [1, 2], [3], [5], [6, 7, 8]]

174

[[0], [1, 2], [3], [5], [6, 7, 8]]

175

176

Case with too large individual chunk (must return valid chunk)

176

Case with too large individual chunk (must return valid chunk)

177

>>> list(_slicechunktosize(revlog, [0, 1], 2))

177

>>> list(_slicechunktosize(revlog, [0, 1], 2))

178

[[0], [1]]

178

[[0], [1]]

179

>>> list(_slicechunktosize(revlog, [1, 3], 1))

179

>>> list(_slicechunktosize(revlog, [1, 3], 1))

180

[[1], [3]]

180

[[1], [3]]

181

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

181

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

182

[[3], [5]]

182

[[3], [5]]

183

"""

183

"""

184

assert targetsize is None or 0 <= targetsize

184

assert targetsize is None or 0 <= targetsize

185

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

185

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

186

yield revs

186

yield revs

187

return

187

return

188

189

startrevidx = 0

189

startrevidx = 0

190

startdata = revlog.start(revs[0])

190

startdata = revlog.start(revs[0])

191

endrevidx = 0

191

endrevidx = 0

192

iterrevs = enumerate(revs)

192

iterrevs = enumerate(revs)

193

next(iterrevs) # skip first rev.

193

next(iterrevs) # skip first rev.

194

for idx, r in iterrevs:

194

for idx, r in iterrevs:

195

span = revlog.end(r) - startdata

195

span = revlog.end(r) - startdata

196

if span <= targetsize:

196

if span <= targetsize:

197

endrevidx = idx

197

endrevidx = idx

198

else:

198

else:

199

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

199

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

200

if chunk:

200

if chunk:

201

yield chunk

201

yield chunk

202

startrevidx = idx

202

startrevidx = idx

203

startdata = revlog.start(r)

203

startdata = revlog.start(r)

204

endrevidx = idx

204

endrevidx = idx

205

yield _trimchunk(revlog, revs, startrevidx)

205

yield _trimchunk(revlog, revs, startrevidx)

206

207

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

207

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

208

mingapsize=0):

208

mingapsize=0):

209

"""slice revs to reduce the amount of unrelated data to be read from disk.

209

"""slice revs to reduce the amount of unrelated data to be read from disk.

210

211

``revs`` is sliced into groups that should be read in one time.

211

``revs`` is sliced into groups that should be read in one time.

212

Assume that revs are sorted.

212

Assume that revs are sorted.

213

214

``deltainfo`` is a _deltainfo instance of a revision that we would append

214

``deltainfo`` is a _deltainfo instance of a revision that we would append

215

to the top of the revlog.

215

to the top of the revlog.

216

217

The initial chunk is sliced until the overall density (payload/chunks-span

217

The initial chunk is sliced until the overall density (payload/chunks-span

218

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

218

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

219

skipped.

219

skipped.

220

221

>>> revlog = _testrevlog([

221

>>> revlog = _testrevlog([

222

... 5, #00 (5)

222

... 5, #00 (5)

223

... 10, #01 (5)

223

... 10, #01 (5)

224

... 12, #02 (2)

224

... 12, #02 (2)

225

... 12, #03 (empty)

225

... 12, #03 (empty)

226

... 27, #04 (15)

226

... 27, #04 (15)

227

... 31, #05 (4)

227

... 31, #05 (4)

228

... 31, #06 (empty)

228

... 31, #06 (empty)

229

... 42, #07 (11)

229

... 42, #07 (11)

230

... 47, #08 (5)

230

... 47, #08 (5)

231

... 47, #09 (empty)

231

... 47, #09 (empty)

232

... 48, #10 (1)

232

... 48, #10 (1)

233

... 51, #11 (3)

233

... 51, #11 (3)

234

... 74, #12 (23)

234

... 74, #12 (23)

235

... 85, #13 (11)

235

... 85, #13 (11)

236

... 86, #14 (1)

236

... 86, #14 (1)

237

... 91, #15 (5)

237

... 91, #15 (5)

238

... ])

238

... ])

239

240

>>> list(_slicechunktodensity(revlog, list(range(16))))

240

>>> list(_slicechunktodensity(revlog, list(range(16))))

241

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

241

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

242

>>> list(_slicechunktodensity(revlog, [0, 15]))

242

>>> list(_slicechunktodensity(revlog, [0, 15]))

243

[[0], [15]]

243

[[0], [15]]

244

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

244

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

245

[[0], [11], [15]]

245

[[0], [11], [15]]

246

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

246

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

247

[[0], [11, 13, 15]]

247

[[0], [11, 13, 15]]

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

249

[[1, 2], [5, 8, 10, 11], [14]]

249

[[1, 2], [5, 8, 10, 11], [14]]

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

251

... mingapsize=20))

251

... mingapsize=20))

252

[[1, 2, 3, 5, 8, 10, 11], [14]]

252

[[1, 2, 3, 5, 8, 10, 11], [14]]

253

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

254

... targetdensity=0.95))

254

... targetdensity=0.95))

255

[[1, 2], [5], [8, 10, 11], [14]]

255

[[1, 2], [5], [8, 10, 11], [14]]

256

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

257

... targetdensity=0.95, mingapsize=12))

257

... targetdensity=0.95, mingapsize=12))

258

[[1, 2], [5, 8, 10, 11], [14]]

258

[[1, 2], [5, 8, 10, 11], [14]]

259

"""

259

"""

260

start = revlog.start

260

start = revlog.start

261

length = revlog.length

261

length = revlog.length

262

263

if len(revs) <= 1:

263

if len(revs) <= 1:

264

yield revs

264

yield revs

265

return

265

return

266

267

nextrev = len(revlog)

267

nextrev = len(revlog)

268

nextoffset = revlog.end(nextrev - 1)

268

nextoffset = revlog.end(nextrev - 1)

269

270

if deltainfo is None:

270

if deltainfo is None:

271

deltachainspan = segmentspan(revlog, revs)

271

deltachainspan = segmentspan(revlog, revs)

272

chainpayload = sum(length(r) for r in revs)

272

chainpayload = sum(length(r) for r in revs)

273

else:

273

else:

274

deltachainspan = deltainfo.distance

274

deltachainspan = deltainfo.distance

275

chainpayload = deltainfo.compresseddeltalen

275

chainpayload = deltainfo.compresseddeltalen

276

277

if deltachainspan < mingapsize:

277

if deltachainspan < mingapsize:

278

yield revs

278

yield revs

279

return

279

return

280

281

readdata = deltachainspan

281

readdata = deltachainspan

282

283

if deltachainspan:

283

if deltachainspan:

284

density = chainpayload / float(deltachainspan)

284

density = chainpayload / float(deltachainspan)

285

else:

285

else:

286

density = 1.0

286

density = 1.0

287

288

if density >= targetdensity:

288

if density >= targetdensity:

289

yield revs

289

yield revs

290

return

290

return

291

292

if deltainfo is not None and deltainfo.deltalen:

292

if deltainfo is not None and deltainfo.deltalen:

293

revs = list(revs)

293

revs = list(revs)

294

revs.append(nextrev)

294

revs.append(nextrev)

295

296

# Store the gaps in a heap to have them sorted by decreasing size

296

# Store the gaps in a heap to have them sorted by decreasing size

297

gapsheap = []

297

gapsheap = []

298

heapq.heapify(gapsheap)

298

heapq.heapify(gapsheap)

299

prevend = None

299

prevend = None

300

for i, rev in enumerate(revs):

300

for i, rev in enumerate(revs):

301

if rev < nextrev:

301

if rev < nextrev:

302

revstart = start(rev)

302

revstart = start(rev)

303

revlen = length(rev)

303

revlen = length(rev)

304

else:

304

else:

305

revstart = nextoffset

305

revstart = nextoffset

306

revlen = deltainfo.deltalen

306

revlen = deltainfo.deltalen

307

308

# Skip empty revisions to form larger holes

308

# Skip empty revisions to form larger holes

309

if revlen == 0:

309

if revlen == 0:

310

continue

310

continue

311

312

if prevend is not None:

312

if prevend is not None:

313

gapsize = revstart - prevend

313

gapsize = revstart - prevend

314

# only consider holes that are large enough

314

# only consider holes that are large enough

315

if gapsize > mingapsize:

315

if gapsize > mingapsize:

316

heapq.heappush(gapsheap, (-gapsize, i))

316

heapq.heappush(gapsheap, (-gapsize, i))

317

318

prevend = revstart + revlen

318

prevend = revstart + revlen

319

320

# Collect the indices of the largest holes until the density is acceptable

320

# Collect the indices of the largest holes until the density is acceptable

321

indicesheap = []

321

indicesheap = []

322

heapq.heapify(indicesheap)

322

heapq.heapify(indicesheap)

323

while gapsheap and density < targetdensity:

323

while gapsheap and density < targetdensity:

324

oppgapsize, gapidx = heapq.heappop(gapsheap)

324

oppgapsize, gapidx = heapq.heappop(gapsheap)

325

326

heapq.heappush(indicesheap, gapidx)

326

heapq.heappush(indicesheap, gapidx)

327

328

# the gap sizes are stored as negatives to be sorted decreasingly

328

# the gap sizes are stored as negatives to be sorted decreasingly

329

# by the heap

329

# by the heap

330

readdata -= (-oppgapsize)

330

readdata -= (-oppgapsize)

331

if readdata > 0:

331

if readdata > 0:

332

density = chainpayload / float(readdata)

332

density = chainpayload / float(readdata)

333

else:

333

else:

334

density = 1.0

334

density = 1.0

335

336

# Cut the revs at collected indices

336

# Cut the revs at collected indices

337

previdx = 0

337

previdx = 0

338

while indicesheap:

338

while indicesheap:

339

idx = heapq.heappop(indicesheap)

339

idx = heapq.heappop(indicesheap)

340

341

chunk = _trimchunk(revlog, revs, previdx, idx)

341

chunk = _trimchunk(revlog, revs, previdx, idx)

342

if chunk:

342

if chunk:

343

yield chunk

343

yield chunk

344

345

previdx = idx

345

previdx = idx

346

347

chunk = _trimchunk(revlog, revs, previdx)

347

chunk = _trimchunk(revlog, revs, previdx)

348

if chunk:

348

if chunk:

349

yield chunk

349

yield chunk

350

351

def _trimchunk(revlog, revs, startidx, endidx=None):

351

def _trimchunk(revlog, revs, startidx, endidx=None):

352

"""returns revs[startidx:endidx] without empty trailing revs

352

"""returns revs[startidx:endidx] without empty trailing revs

353

354

Doctest Setup

354

Doctest Setup

355

>>> revlog = _testrevlog([

355

>>> revlog = _testrevlog([

356

... 5, #0

356

... 5, #0

357

... 10, #1

357

... 10, #1

358

... 12, #2

358

... 12, #2

359

... 12, #3 (empty)

359

... 12, #3 (empty)

360

... 17, #4

360

... 17, #4

361

... 21, #5

361

... 21, #5

362

... 21, #6 (empty)

362

... 21, #6 (empty)

363

... ])

363

... ])

364

365

Contiguous cases:

365

Contiguous cases:

366

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

366

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

367

[0, 1, 2, 3, 4, 5]

367

[0, 1, 2, 3, 4, 5]

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

369

[0, 1, 2, 3, 4]

369

[0, 1, 2, 3, 4]

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

371

[0, 1, 2]

371

[0, 1, 2]

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

373

[2]

373

[2]

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

375

[3, 4, 5]

375

[3, 4, 5]

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

377

[3, 4]

377

[3, 4]

378

379

Discontiguous cases:

379

Discontiguous cases:

380

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

380

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

381

[1, 3, 5]

381

[1, 3, 5]

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

383

[1]

383

[1]

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

385

[3, 5]

385

[3, 5]

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

387

[3, 5]

387

[3, 5]

388

"""

388

"""

389

length = revlog.length

389

length = revlog.length

390

391

if endidx is None:

391

if endidx is None:

392

endidx = len(revs)

392

endidx = len(revs)

393

394

# If we have a non-emtpy delta candidate, there are nothing to trim

394

# If we have a non-emtpy delta candidate, there are nothing to trim

395

if revs[endidx - 1] < len(revlog):

395

if revs[endidx - 1] < len(revlog):

396

# Trim empty revs at the end, except the very first revision of a chain

396

# Trim empty revs at the end, except the very first revision of a chain

397

while (endidx > 1

397

while (endidx > 1

398

and endidx > startidx

398

and endidx > startidx

399

and length(revs[endidx - 1]) == 0):

399

and length(revs[endidx - 1]) == 0):

400

endidx -= 1

400

endidx -= 1

401

402

return revs[startidx:endidx]

402

return revs[startidx:endidx]

403

404

def segmentspan(revlog, revs, deltainfo=None):

404

def segmentspan(revlog, revs, deltainfo=None):

405

"""Get the byte span of a segment of revisions

405

"""Get the byte span of a segment of revisions

406

407

revs is a sorted array of revision numbers

407

revs is a sorted array of revision numbers

408

409

>>> revlog = _testrevlog([

409

>>> revlog = _testrevlog([

410

... 5, #0

410

... 5, #0

411

... 10, #1

411

... 10, #1

412

... 12, #2

412

... 12, #2

413

... 12, #3 (empty)

413

... 12, #3 (empty)

414

... 17, #4

414

... 17, #4

415

... ])

415

... ])

416

417

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

417

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

418

17

418

17

419

>>> segmentspan(revlog, [0, 4])

419

>>> segmentspan(revlog, [0, 4])

420

17

420

17

421

>>> segmentspan(revlog, [3, 4])

421

>>> segmentspan(revlog, [3, 4])

422

5

422

5

423

>>> segmentspan(revlog, [1, 2, 3,])

423

>>> segmentspan(revlog, [1, 2, 3,])

424

7

424

7

425

>>> segmentspan(revlog, [1, 3])

425

>>> segmentspan(revlog, [1, 3])

426

7

426

7

427

"""

427

"""

428

if not revs:

428

if not revs:

429

return 0

429

return 0

430

if deltainfo is not None and len(revlog) <= revs[-1]:

430

if deltainfo is not None and len(revlog) <= revs[-1]:

431

if len(revs) == 1:

431

if len(revs) == 1:

432

return deltainfo.deltalen

432

return deltainfo.deltalen

433

offset = revlog.end(len(revlog) - 1)

433

offset = revlog.end(len(revlog) - 1)

434

end = deltainfo.deltalen + offset

434

end = deltainfo.deltalen + offset

435

else:

435

else:

436

end = revlog.end(revs[-1])

436

end = revlog.end(revs[-1])

437

return end - revlog.start(revs[0])

437

return end - revlog.start(revs[0])

438

439

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

439

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

440

"""build full text from a (base, delta) pair and other metadata"""

440

"""build full text from a (base, delta) pair and other metadata"""

441

# special case deltas which replace entire base; no need to decode

441

# special case deltas which replace entire base; no need to decode

442

# base revision. this neatly avoids censored bases, which throw when

442

# base revision. this neatly avoids censored bases, which throw when

443

# they're decoded.

443

# they're decoded.

444

hlen = struct.calcsize(">lll")

444

hlen = struct.calcsize(">lll")

445

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

445

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

446

len(delta) - hlen):

446

len(delta) - hlen):

447

fulltext = delta[hlen:]

447

fulltext = delta[hlen:]

448

else:

448

else:

449

# deltabase is rawtext before changed by flag processors, which is

449

# deltabase is rawtext before changed by flag processors, which is

450

# equivalent to non-raw text

450

# equivalent to non-raw text

451

basetext = revlog.revision(baserev, _df=fh, raw=False)

451

basetext = revlog.revision(baserev, _df=fh, raw=False)

452

fulltext = mdiff.patch(basetext, delta)

452

fulltext = mdiff.patch(basetext, delta)

453

454

try:

454

try:

455

res = revlog._processflags(fulltext, flags, 'read', raw=True)

455

res = revlog._processflags(fulltext, flags, 'read', raw=True)

456

fulltext, validatehash = res

456

fulltext, validatehash = res

457

if validatehash:

457

if validatehash:

458

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

458

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

459

if flags & REVIDX_ISCENSORED:

459

if flags & REVIDX_ISCENSORED:

460

raise error.StorageError(_('node %s is not censored') %

460

raise error.StorageError(_('node %s is not censored') %

461

expectednode)

461

expectednode)

462

except error.CensoredNodeError:

462

except error.CensoredNodeError:

463

# must pass the censored index flag to add censored revisions

463

# must pass the censored index flag to add censored revisions

464

if not flags & REVIDX_ISCENSORED:

464

if not flags & REVIDX_ISCENSORED:

465

raise

465

raise

466

return fulltext

466

return fulltext

467

468

@attr.s(slots=True, frozen=True)

468

@attr.s(slots=True, frozen=True)

469

class _deltainfo(object):

469

class _deltainfo(object):

470

distance = attr.ib()

470

distance = attr.ib()

471

deltalen = attr.ib()

471

deltalen = attr.ib()

472

data = attr.ib()

472

data = attr.ib()

473

base = attr.ib()

473

base = attr.ib()

474

chainbase = attr.ib()

474

chainbase = attr.ib()

475

chainlen = attr.ib()

475

chainlen = attr.ib()

476

compresseddeltalen = attr.ib()

476

compresseddeltalen = attr.ib()

477

snapshotdepth = attr.ib()

477

snapshotdepth = attr.ib()

478

479

def isgooddeltainfo(revlog, deltainfo, revinfo):

479

def isgooddeltainfo(revlog, deltainfo, revinfo):

480

"""Returns True if the given delta is good. Good means that it is within

480

"""Returns True if the given delta is good. Good means that it is within

481

the disk span, disk size, and chain length bounds that we know to be

481

the disk span, disk size, and chain length bounds that we know to be

482

performant."""

482

performant."""

483

if deltainfo is None:

483

if deltainfo is None:

484

return False

484

return False

485

486

# - 'deltainfo.distance' is the distance from the base revision --

486

# - 'deltainfo.distance' is the distance from the base revision --

487

# bounding it limits the amount of I/O we need to do.

487

# bounding it limits the amount of I/O we need to do.

488

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

488

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

489

# deltas we need to apply -- bounding it limits the amount of CPU

489

# deltas we need to apply -- bounding it limits the amount of CPU

490

# we consume.

490

# we consume.

491

492

if revlog._sparserevlog:

492

if revlog._sparserevlog:

493

# As sparse-read will be used, we can consider that the distance,

493

# As sparse-read will be used, we can consider that the distance,

494

# instead of being the span of the whole chunk,

494

# instead of being the span of the whole chunk,

495

# is the span of the largest read chunk

495

# is the span of the largest read chunk

496

base = deltainfo.base

496

base = deltainfo.base

497

498

if base != nullrev:

498

if base != nullrev:

499

deltachain = revlog._deltachain(base)[0]

499

deltachain = revlog._deltachain(base)[0]

500

else:

500

else:

501

deltachain = []

501

deltachain = []

502

503

# search for the first non-snapshot revision

503

# search for the first non-snapshot revision

504

for idx, r in enumerate(deltachain):

504

for idx, r in enumerate(deltachain):

505

if not revlog.issnapshot(r):

505

if not revlog.issnapshot(r):

506

break

506

break

507

deltachain = deltachain[idx:]

507

deltachain = deltachain[idx:]

508

chunks = slicechunk(revlog, deltachain, deltainfo)

508

chunks = slicechunk(revlog, deltachain, deltainfo)

509

all_span = [segmentspan(revlog, revs, deltainfo)

509

all_span = [segmentspan(revlog, revs, deltainfo)

510

for revs in chunks]

510

for revs in chunks]

511

distance = max(all_span)

511

distance = max(all_span)

512

else:

512

else:

513

distance = deltainfo.distance

513

distance = deltainfo.distance

514

515

textlen = revinfo.textlen

515

textlen = revinfo.textlen

516

defaultmax = textlen * 4

516

defaultmax = textlen * 4

517

maxdist = revlog._maxdeltachainspan

517

maxdist = revlog._maxdeltachainspan

518

if not maxdist:

518

if not maxdist:

519

maxdist = distance # ensure the conditional pass

519

maxdist = distance # ensure the conditional pass

520

maxdist = max(maxdist, defaultmax)

520

maxdist = max(maxdist, defaultmax)

521

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

521

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

522

# In multiple place, we are ignoring irrelevant data range below a

522

# In multiple place, we are ignoring irrelevant data range below a

523

# certain size. Be also apply this tradeoff here and relax span

523

# certain size. Be also apply this tradeoff here and relax span

524

# constraint for small enought content.

524

# constraint for small enought content.

525

maxdist = revlog._srmingapsize

525

maxdist = revlog._srmingapsize

526

527

# Bad delta from read span:

527

# Bad delta from read span:

528

#

528

#

529

# If the span of data read is larger than the maximum allowed.

529

# If the span of data read is larger than the maximum allowed.

530

if maxdist < distance:

530

if maxdist < distance:

531

return False

531

return False

532

533

# Bad delta from new delta size:

533

# Bad delta from new delta size:

534

#

534

#

535

# If the delta size is larger than the target text, storing the

535

# If the delta size is larger than the target text, storing the

536

# delta will be inefficient.

536

# delta will be inefficient.

537

if textlen < deltainfo.deltalen:

537

if textlen < deltainfo.deltalen:

538

return False

538

return False

539

540

# Bad delta from cumulated payload size:

540

# Bad delta from cumulated payload size:

541

#

541

#

542

# If the sum of delta get larger than K * target text length.

542

# If the sum of delta get larger than K * target text length.

543

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

543

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

544

return False

544

return False

545

546

# Bad delta from chain length:

546

# Bad delta from chain length:

547

#

547

#

548

# If the number of delta in the chain gets too high.

548

# If the number of delta in the chain gets too high.

549

if (revlog._maxchainlen

549

if (revlog._maxchainlen

550

and revlog._maxchainlen < deltainfo.chainlen):

550

and revlog._maxchainlen < deltainfo.chainlen):

551

return False

551

return False

552

553

# bad delta from intermediate snapshot size limit

553

# bad delta from intermediate snapshot size limit

554

#

554

#

555

# If an intermediate snapshot size is higher than the limit. The

555

# If an intermediate snapshot size is higher than the limit. The

556

# limit exist to prevent endless chain of intermediate delta to be

556

# limit exist to prevent endless chain of intermediate delta to be

557

# created.

557

# created.

558

if (deltainfo.snapshotdepth is not None and

558

if (deltainfo.snapshotdepth is not None and

559

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

559

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

560

return False

560

return False

561

562

# bad delta if new intermediate snapshot is larger than the previous

562

# bad delta if new intermediate snapshot is larger than the previous

563

# snapshot

563

# snapshot

564

if (deltainfo.snapshotdepth

564

if (deltainfo.snapshotdepth

565

and revlog.length(deltainfo.base) < deltainfo.deltalen):

565

and revlog.length(deltainfo.base) < deltainfo.deltalen):

566

return False

566

return False

567

568

return True

568

return True

569

570

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

570

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

571

"""Provides group of revision to be tested as delta base

571

"""Provides group of revision to be tested as delta base

572

573

This top level function focus on emitting groups with unique and worthwhile

573

This top level function focus on emitting groups with unique and worthwhile

574

content. See _raw_candidate_groups for details about the group order.

574

content. See _raw_candidate_groups for details about the group order.

575

"""

575

"""

576

# should we try to build a delta?

576

# should we try to build a delta?

577

if not (len(revlog) and revlog._storedeltachains):

577

if not (len(revlog) and revlog._storedeltachains):

578

yield None

578

yield None

579

return

579

return

580

581

deltalength = revlog.length

581

deltalength = revlog.length

582

deltaparent = revlog.deltaparent

582

deltaparent = revlog.deltaparent

583

good = None

583

good = None

584

585

deltas_limit = textlen * LIMIT_DELTA2TEXT

585

deltas_limit = textlen * LIMIT_DELTA2TEXT

586

587

tested = set([nullrev])

587

tested = set([nullrev])

588

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

588

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

589

while True:

589

while True:

590

temptative = candidates.send(good)

590

temptative = candidates.send(good)

591

if temptative is None:

591

if temptative is None:

592

break

592

break

593

group = []

593

group = []

594

for rev in temptative:

594

for rev in temptative:

595

# skip over empty delta (no need to include them in a chain)

595

# skip over empty delta (no need to include them in a chain)

596

while not (rev == nullrev or rev in tested or deltalength(rev)):

596

while not (rev == nullrev or rev in tested or deltalength(rev)):

597

tested.add(rev)

597

tested.add(rev)

598

rev = deltaparent(rev)

598

rev = deltaparent(rev)

599

# filter out revision we tested already

599

# filter out revision we tested already

600

if rev in tested:

600

if rev in tested:

601

continue

601

continue

602

tested.add(rev)

602

tested.add(rev)

603

# filter out delta base that will never produce good delta

603

# filter out delta base that will never produce good delta

604

if deltas_limit < revlog.length(rev):

604

if deltas_limit < revlog.length(rev):

605

continue

605

continue

606

# no need to try a delta against nullrev, this will be done as a

606

# no need to try a delta against nullrev, this will be done as a

607

# last resort.

607

# last resort.

608

if rev == nullrev:

608

if rev == nullrev:

609

continue

609

continue

610

# no delta for rawtext-changing revs (see "candelta" for why)

610

# no delta for rawtext-changing revs (see "candelta" for why)

611

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

611

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

612

continue

612

continue

613

group.append(rev)

613

group.append(rev)

614

if group:

614

if group:

615

# XXX: in the sparse revlog case, group can become large,

615

# XXX: in the sparse revlog case, group can become large,

616

# impacting performances. Some bounding or slicing mecanism

616

# impacting performances. Some bounding or slicing mecanism

617

# would help to reduce this impact.

617

# would help to reduce this impact.

618

good = yield tuple(group)

618

good = yield tuple(group)

619

yield None

619

yield None

620

621

def _findsnapshots(revlog, cache, start_rev):

621

def _findsnapshots(revlog, cache, start_rev):

622

"""find snapshot from start_rev to tip"""

622

"""find snapshot from start_rev to tip"""

623

deltaparent = revlog.deltaparent

623

deltaparent = revlog.deltaparent

624

issnapshot = revlog.issnapshot

624

issnapshot = revlog.issnapshot

625

for rev in revlog.revs(start_rev):

625

for rev in revlog.revs(start_rev):

626

if issnapshot(rev):

626

if issnapshot(rev):

627

cache[deltaparent(rev)].append(rev)

627

cache[deltaparent(rev)].append(rev)

628

629

def _refinedgroups(revlog, p1, p2, cachedelta):

629

def _refinedgroups(revlog, p1, p2, cachedelta):

630

good = None

630

good = None

631

# First we try to reuse a the delta contained in the bundle.

631

# First we try to reuse a the delta contained in the bundle.

632

# (or from the source revlog)

632

# (or from the source revlog)

633

#

633

#

634

# This logic only applies to general delta repositories and can be disabled

634

# This logic only applies to general delta repositories and can be disabled

635

# through configuration. Disabling reuse source delta is useful when

635

# through configuration. Disabling reuse source delta is useful when

636

# we want to make sure we recomputed "optimal" deltas.

636

# we want to make sure we recomputed "optimal" deltas.

637

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

637

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

638

# Assume what we received from the server is a good choice

638

# Assume what we received from the server is a good choice

639

# build delta will reuse the cache

639

# build delta will reuse the cache

640

good = yield (cachedelta[0],)

640

good = yield (cachedelta[0],)

641

if good is not None:

641

if good is not None:

642

yield None

642

yield None

643

return

643

return

644

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

644

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

645

good = yield candidates

645

good = yield candidates

646

if good is not None:

646

if good is not None:

647

break

647

break

648

649

# If sparse revlog is enabled, we can try to refine the available deltas

650

if not revlog._sparserevlog:

651

yield None

652

return

653

649

# if we have a refinable value, try to refine it

654

# if we have a refinable value, try to refine it

650

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

655

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

651

# refine snapshot down

656

# refine snapshot down

652

previous = None

657

previous = None

653

while previous != good:

658

while previous != good:

654

previous = good

659

previous = good

655

base = revlog.deltaparent(good)

660

base = revlog.deltaparent(good)

656

if base == nullrev:

661

if base == nullrev:

657

break

662

break

658

good = yield (base,)

663

good = yield (base,)

659

# refine snapshot up

664

# refine snapshot up

660

#

665

#

661

# XXX the _findsnapshots call can be expensive and is "duplicated" with

666

# XXX the _findsnapshots call can be expensive and is "duplicated" with

662

# the one done in `_rawgroups`. Once we start working on performance,

667

# the one done in `_rawgroups`. Once we start working on performance,

663

# we should make the two logics share this computation.

668

# we should make the two logics share this computation.

664

snapshots = collections.defaultdict(list)

669

snapshots = collections.defaultdict(list)

665

_findsnapshots(revlog, snapshots, good + 1)

670

_findsnapshots(revlog, snapshots, good + 1)

666

previous = None

671

previous = None

667

while good != previous:

672

while good != previous:

668

previous = good

673

previous = good

669

children = tuple(sorted(c for c in snapshots[good]))

674

children = tuple(sorted(c for c in snapshots[good]))

670

good = yield children

675

good = yield children

671

676

672

# we have found nothing

677

# we have found nothing

673

yield None

678

yield None

674

679

675

def _rawgroups(revlog, p1, p2, cachedelta):

680

def _rawgroups(revlog, p1, p2, cachedelta):

676

"""Provides group of revision to be tested as delta base

681

"""Provides group of revision to be tested as delta base

677

682

678

This lower level function focus on emitting delta theorically interresting

683

This lower level function focus on emitting delta theorically interresting

679

without looking it any practical details.

684

without looking it any practical details.

680

685

681

The group order aims at providing fast or small candidates first.

686

The group order aims at providing fast or small candidates first.

682

"""

687

"""

683

gdelta = revlog._generaldelta

688

gdelta = revlog._generaldelta

684

sparse = revlog._sparserevlog

689

sparse = revlog._sparserevlog

685

curr = len(revlog)

690

curr = len(revlog)

686

prev = curr - 1

691

prev = curr - 1

687

deltachain = lambda rev: revlog._deltachain(rev)[0]

692

deltachain = lambda rev: revlog._deltachain(rev)[0]

688

693

689

if gdelta:

694

if gdelta:

690

# exclude already lazy tested base if any

695

# exclude already lazy tested base if any

691

parents = [p for p in (p1, p2) if p != nullrev]

696

parents = [p for p in (p1, p2) if p != nullrev]

692

697

693

if not revlog._deltabothparents and len(parents) == 2:

698

if not revlog._deltabothparents and len(parents) == 2:

694

parents.sort()

699

parents.sort()

695

# To minimize the chance of having to build a fulltext,

700

# To minimize the chance of having to build a fulltext,

696

# pick first whichever parent is closest to us (max rev)

701

# pick first whichever parent is closest to us (max rev)

697

yield (parents[1],)

702

yield (parents[1],)

698

# then the other one (min rev) if the first did not fit

703

# then the other one (min rev) if the first did not fit

699

yield (parents[0],)

704

yield (parents[0],)

700

elif len(parents) > 0:

705

elif len(parents) > 0:

701

# Test all parents (1 or 2), and keep the best candidate

706

# Test all parents (1 or 2), and keep the best candidate

702

yield parents

707

yield parents

703

708

704

if sparse and parents:

709

if sparse and parents:

705

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

710

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

706

# See if we can use an existing snapshot in the parent chains to use as

711

# See if we can use an existing snapshot in the parent chains to use as

707

# a base for a new intermediate-snapshot

712

# a base for a new intermediate-snapshot

708

#

713

#

709

# search for snapshot in parents delta chain

714

# search for snapshot in parents delta chain

710

# map: snapshot-level: snapshot-rev

715

# map: snapshot-level: snapshot-rev

711

parents_snaps = collections.defaultdict(set)

716

parents_snaps = collections.defaultdict(set)

712

candidate_chains = [deltachain(p) for p in parents]

717

candidate_chains = [deltachain(p) for p in parents]

713

for chain in candidate_chains:

718

for chain in candidate_chains:

714

for idx, s in enumerate(chain):

719

for idx, s in enumerate(chain):

715

if not revlog.issnapshot(s):

720

if not revlog.issnapshot(s):

716

break

721

break

717

parents_snaps[idx].add(s)

722

parents_snaps[idx].add(s)

718

snapfloor = min(parents_snaps[0]) + 1

723

snapfloor = min(parents_snaps[0]) + 1

719

_findsnapshots(revlog, snapshots, snapfloor)

724

_findsnapshots(revlog, snapshots, snapfloor)

720

# search for the highest "unrelated" revision

725

# search for the highest "unrelated" revision

721

#

726

#

722

# Adding snapshots used by "unrelated" revision increase the odd we

727

# Adding snapshots used by "unrelated" revision increase the odd we

723

# reuse an independant, yet better snapshot chain.

728

# reuse an independant, yet better snapshot chain.

724

#

729

#

725

# XXX instead of building a set of revisions, we could lazily enumerate

730

# XXX instead of building a set of revisions, we could lazily enumerate

726

# over the chains. That would be more efficient, however we stick to

731

# over the chains. That would be more efficient, however we stick to

727

# simple code for now.

732

# simple code for now.

728

all_revs = set()

733

all_revs = set()

729

for chain in candidate_chains:

734

for chain in candidate_chains:

730

all_revs.update(chain)

735

all_revs.update(chain)

731

other = None

736

other = None

732

for r in revlog.revs(prev, snapfloor):

737

for r in revlog.revs(prev, snapfloor):

733

if r not in all_revs:

738

if r not in all_revs:

734

other = r

739

other = r

735

break

740

break

736

if other is not None:

741

if other is not None:

737

# To avoid unfair competition, we won't use unrelated intermediate

742

# To avoid unfair competition, we won't use unrelated intermediate

738

# snapshot that are deeper than the ones from the parent delta

743

# snapshot that are deeper than the ones from the parent delta

739

# chain.

744

# chain.

740

max_depth = max(parents_snaps.keys())

745

max_depth = max(parents_snaps.keys())

741

chain = deltachain(other)

746

chain = deltachain(other)

742

for idx, s in enumerate(chain):

747

for idx, s in enumerate(chain):

743

if s < snapfloor:

748

if s < snapfloor:

744

continue

749

continue

745

if max_depth < idx:

750

if max_depth < idx:

746

break

751

break

747

if not revlog.issnapshot(s):

752

if not revlog.issnapshot(s):

748

break

753

break

749

parents_snaps[idx].add(s)

754

parents_snaps[idx].add(s)

750

# Test them as possible intermediate snapshot base

755

# Test them as possible intermediate snapshot base

751

# We test them from highest to lowest level. High level one are more

756

# We test them from highest to lowest level. High level one are more

752

# likely to result in small delta

757

# likely to result in small delta

753

floor = None

758

floor = None

754

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

759

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

755

siblings = set()

760

siblings = set()

756

for s in snaps:

761

for s in snaps:

757

siblings.update(snapshots[s])

762

siblings.update(snapshots[s])

758

# Before considering making a new intermediate snapshot, we check

763

# Before considering making a new intermediate snapshot, we check

759

# if an existing snapshot, children of base we consider, would be

764

# if an existing snapshot, children of base we consider, would be

760

# suitable.

765

# suitable.

761

#

766

#

762

# It give a change to reuse a delta chain "unrelated" to the

767

# It give a change to reuse a delta chain "unrelated" to the

763

# current revision instead of starting our own. Without such

768

# current revision instead of starting our own. Without such

764

# re-use, topological branches would keep reopening new chains.

769

# re-use, topological branches would keep reopening new chains.

765

# Creating more and more snapshot as the repository grow.

770

# Creating more and more snapshot as the repository grow.

766

771

767

if floor is not None:

772

if floor is not None:

768

# We only do this for siblings created after the one in our

773

# We only do this for siblings created after the one in our

769

# parent's delta chain. Those created before has less chances

774

# parent's delta chain. Those created before has less chances

770

# to be valid base since our ancestors had to create a new

775

# to be valid base since our ancestors had to create a new

771

# snapshot.

776

# snapshot.

772

siblings = [r for r in siblings if floor < r]

777

siblings = [r for r in siblings if floor < r]

773

yield tuple(sorted(siblings))

778

yield tuple(sorted(siblings))

774

# then test the base from our parent's delta chain.

779

# then test the base from our parent's delta chain.

775

yield tuple(sorted(snaps))

780

yield tuple(sorted(snaps))

776

floor = min(snaps)

781

floor = min(snaps)

777

# No suitable base found in the parent chain, search if any full

782

# No suitable base found in the parent chain, search if any full

778

# snapshots emitted since parent's base would be a suitable base for an

783

# snapshots emitted since parent's base would be a suitable base for an

779

# intermediate snapshot.

784

# intermediate snapshot.

780

#

785

#

781

# It give a chance to reuse a delta chain unrelated to the current

786

# It give a chance to reuse a delta chain unrelated to the current

782

# revisions instead of starting our own. Without such re-use,

787

# revisions instead of starting our own. Without such re-use,

783

# topological branches would keep reopening new full chains. Creating

788

# topological branches would keep reopening new full chains. Creating

784

# more and more snapshot as the repository grow.

789

# more and more snapshot as the repository grow.

785

yield tuple(snapshots[nullrev])

790

yield tuple(snapshots[nullrev])

786

791

787

if not sparse:

792

if not sparse:

788

# other approach failed try against prev to hopefully save us a

793

# other approach failed try against prev to hopefully save us a

789

# fulltext.

794

# fulltext.

790

yield (prev,)

795

yield (prev,)

791

796

792

class deltacomputer(object):

797

class deltacomputer(object):

793

def __init__(self, revlog):

798

def __init__(self, revlog):

794

self.revlog = revlog

799

self.revlog = revlog

795

800

796

def buildtext(self, revinfo, fh):

801

def buildtext(self, revinfo, fh):

797

"""Builds a fulltext version of a revision

802

"""Builds a fulltext version of a revision

798

803

799

revinfo: _revisioninfo instance that contains all needed info

804

revinfo: _revisioninfo instance that contains all needed info

800

fh: file handle to either the .i or the .d revlog file,

805

fh: file handle to either the .i or the .d revlog file,

801

depending on whether it is inlined or not

806

depending on whether it is inlined or not

802

"""

807

"""

803

btext = revinfo.btext

808

btext = revinfo.btext

804

if btext[0] is not None:

809

if btext[0] is not None:

805

return btext[0]

810

return btext[0]

806

811

807

revlog = self.revlog

812

revlog = self.revlog

808

cachedelta = revinfo.cachedelta

813

cachedelta = revinfo.cachedelta

809

baserev = cachedelta[0]

814

baserev = cachedelta[0]

810

delta = cachedelta[1]

815

delta = cachedelta[1]

811

816

812

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

817

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

813

revinfo.p1, revinfo.p2,

818

revinfo.p1, revinfo.p2,

814

revinfo.flags, revinfo.node)

819

revinfo.flags, revinfo.node)

815

return fulltext

820

return fulltext

816

821

817

def _builddeltadiff(self, base, revinfo, fh):

822

def _builddeltadiff(self, base, revinfo, fh):

818

revlog = self.revlog

823

revlog = self.revlog

819

t = self.buildtext(revinfo, fh)

824

t = self.buildtext(revinfo, fh)

820

if revlog.iscensored(base):

825

if revlog.iscensored(base):

821

# deltas based on a censored revision must replace the

826

# deltas based on a censored revision must replace the

822

# full content in one patch, so delta works everywhere

827

# full content in one patch, so delta works everywhere

823

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

828

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

824

delta = header + t

829

delta = header + t

825

else:

830

else:

826

ptext = revlog.revision(base, _df=fh, raw=True)

831

ptext = revlog.revision(base, _df=fh, raw=True)

827

delta = mdiff.textdiff(ptext, t)

832

delta = mdiff.textdiff(ptext, t)

828

833

829

return delta

834

return delta

830

835

831

def _builddeltainfo(self, revinfo, base, fh):

836

def _builddeltainfo(self, revinfo, base, fh):

832

# can we use the cached delta?

837

# can we use the cached delta?

833

delta = None

838

delta = None

834

if revinfo.cachedelta:

839

if revinfo.cachedelta:

835

cachebase, cachediff = revinfo.cachedelta

840

cachebase, cachediff = revinfo.cachedelta

836

#check if the diff still apply

841

#check if the diff still apply

837

currentbase = cachebase

842

currentbase = cachebase

838

while (currentbase != nullrev

843

while (currentbase != nullrev

839

and currentbase != base

844

and currentbase != base

840

and self.revlog.length(currentbase) == 0):

845

and self.revlog.length(currentbase) == 0):

841

currentbase = self.revlog.deltaparent(currentbase)

846

currentbase = self.revlog.deltaparent(currentbase)

842

if currentbase == base:

847

if currentbase == base:

843

delta = revinfo.cachedelta[1]

848

delta = revinfo.cachedelta[1]

844

if delta is None:

849

if delta is None:

845

delta = self._builddeltadiff(base, revinfo, fh)

850

delta = self._builddeltadiff(base, revinfo, fh)

846

revlog = self.revlog

851

revlog = self.revlog

847

header, data = revlog.compress(delta)

852

header, data = revlog.compress(delta)

848

deltalen = len(header) + len(data)

853

deltalen = len(header) + len(data)

849

chainbase = revlog.chainbase(base)

854

chainbase = revlog.chainbase(base)

850

offset = revlog.end(len(revlog) - 1)

855

offset = revlog.end(len(revlog) - 1)

851

dist = deltalen + offset - revlog.start(chainbase)

856

dist = deltalen + offset - revlog.start(chainbase)

852

if revlog._generaldelta:

857

if revlog._generaldelta:

853

deltabase = base

858

deltabase = base

854

else:

859

else:

855

deltabase = chainbase

860

deltabase = chainbase

856

chainlen, compresseddeltalen = revlog._chaininfo(base)

861

chainlen, compresseddeltalen = revlog._chaininfo(base)

857

chainlen += 1

862

chainlen += 1

858

compresseddeltalen += deltalen

863

compresseddeltalen += deltalen

859

864

860

revlog = self.revlog

865

revlog = self.revlog

861

snapshotdepth = None

866

snapshotdepth = None

862

if deltabase == nullrev:

867

if deltabase == nullrev:

863

snapshotdepth = 0

868

snapshotdepth = 0

864

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

869

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

865

# A delta chain should always be one full snapshot,

870

# A delta chain should always be one full snapshot,

866

# zero or more semi-snapshots, and zero or more deltas

871

# zero or more semi-snapshots, and zero or more deltas

867

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

872

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

868

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

873

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

869

snapshotdepth = len(revlog._deltachain(deltabase)[0])

874

snapshotdepth = len(revlog._deltachain(deltabase)[0])

870

875

871

return _deltainfo(dist, deltalen, (header, data), deltabase,

876

return _deltainfo(dist, deltalen, (header, data), deltabase,

872

chainbase, chainlen, compresseddeltalen,

877

chainbase, chainlen, compresseddeltalen,

873

snapshotdepth)

878

snapshotdepth)

874

879

875

def _fullsnapshotinfo(self, fh, revinfo):

880

def _fullsnapshotinfo(self, fh, revinfo):

876

curr = len(self.revlog)

881

curr = len(self.revlog)

877

rawtext = self.buildtext(revinfo, fh)

882

rawtext = self.buildtext(revinfo, fh)

878

data = self.revlog.compress(rawtext)

883

data = self.revlog.compress(rawtext)

879

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

884

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

880

deltabase = chainbase = curr

885

deltabase = chainbase = curr

881

snapshotdepth = 0

886

snapshotdepth = 0

882

chainlen = 1

887

chainlen = 1

883

888

884

return _deltainfo(dist, deltalen, data, deltabase,

889

return _deltainfo(dist, deltalen, data, deltabase,

885

chainbase, chainlen, compresseddeltalen,

890

chainbase, chainlen, compresseddeltalen,

886

snapshotdepth)

891

snapshotdepth)

887

892

888

def finddeltainfo(self, revinfo, fh):

893

def finddeltainfo(self, revinfo, fh):

889

"""Find an acceptable delta against a candidate revision

894

"""Find an acceptable delta against a candidate revision

890

895

891

revinfo: information about the revision (instance of _revisioninfo)

896

revinfo: information about the revision (instance of _revisioninfo)

892

fh: file handle to either the .i or the .d revlog file,

897

fh: file handle to either the .i or the .d revlog file,

893

depending on whether it is inlined or not

898

depending on whether it is inlined or not

894

899

895

Returns the first acceptable candidate revision, as ordered by

900

Returns the first acceptable candidate revision, as ordered by

896

_candidategroups

901

_candidategroups

897

902

898

If no suitable deltabase is found, we return delta info for a full

903

If no suitable deltabase is found, we return delta info for a full

899

snapshot.

904

snapshot.

900

"""

905

"""

901

if not revinfo.textlen:

906

if not revinfo.textlen:

902

return self._fullsnapshotinfo(fh, revinfo)

907

return self._fullsnapshotinfo(fh, revinfo)

903

908

904

# no delta for flag processor revision (see "candelta" for why)

909

# no delta for flag processor revision (see "candelta" for why)

905

# not calling candelta since only one revision needs test, also to

910

# not calling candelta since only one revision needs test, also to

906

# avoid overhead fetching flags again.

911

# avoid overhead fetching flags again.

907

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

912

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

908

return self._fullsnapshotinfo(fh, revinfo)

913

return self._fullsnapshotinfo(fh, revinfo)

909

914

910

cachedelta = revinfo.cachedelta

915

cachedelta = revinfo.cachedelta

911

p1 = revinfo.p1

916

p1 = revinfo.p1

912

p2 = revinfo.p2

917

p2 = revinfo.p2

913

revlog = self.revlog

918

revlog = self.revlog

914

919

915

deltainfo = None

920

deltainfo = None

916

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

921

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

917

groups = _candidategroups(self.revlog, revinfo.textlen,

922

groups = _candidategroups(self.revlog, revinfo.textlen,

918

p1r, p2r, cachedelta)

923

p1r, p2r, cachedelta)

919

candidaterevs = next(groups)

924

candidaterevs = next(groups)

920

while candidaterevs is not None:

925

while candidaterevs is not None:

921

nominateddeltas = []

926

nominateddeltas = []

922

if deltainfo is not None:

927

if deltainfo is not None:

923

# if we already found a good delta,

928

# if we already found a good delta,

924

# challenge it against refined candidates

929

# challenge it against refined candidates

925

nominateddeltas.append(deltainfo)

930

nominateddeltas.append(deltainfo)

926

for candidaterev in candidaterevs:

931

for candidaterev in candidaterevs:

927

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

932

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

928

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

933

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

929

nominateddeltas.append(candidatedelta)

934

nominateddeltas.append(candidatedelta)

930

if nominateddeltas:

935

if nominateddeltas:

931

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

936

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

932

if deltainfo is not None:

937

if deltainfo is not None:

933

candidaterevs = groups.send(deltainfo.base)

938

candidaterevs = groups.send(deltainfo.base)

934

else:

939

else:

935

candidaterevs = next(groups)

940

candidaterevs = next(groups)

936

941

937

if deltainfo is None:

942

if deltainfo is None:

938

deltainfo = self._fullsnapshotinfo(fh, revinfo)

943

deltainfo = self._fullsnapshotinfo(fh, revinfo)

939

return deltainfo

944

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 if revlog._sparserevlog:
                     # As sparse-read will be used, we can consider that the distance,
                     # instead of being the span of the whole chunk,
                     # is the span of the largest read chunk
                     base = deltainfo.base
                     if base != nullrev:
                         deltachain = revlog._deltachain(base)[0]
                     else:
                         deltachain = []
                     # search for the first non-snapshot revision
                     for idx, r in enumerate(deltachain):
                         if not revlog.issnapshot(r):
                             break
                     deltachain = deltachain[idx:]
                     chunks = slicechunk(revlog, deltachain, deltainfo)
                     all_span = [segmentspan(revlog, revs, deltainfo)
                                 for revs in chunks]
                     distance = max(all_span)
                 else:
                     distance = deltainfo.distance
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
                     # In multiple place, we are ignoring irrelevant data range below a
                     # certain size. Be also apply this tradeoff here and relax span
                     # constraint for small enought content.
                     maxdist = revlog._srmingapsize
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 if maxdist < distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
+                # If sparse revlog is enabled, we can try to refine the available deltas
+                if not revlog._sparserevlog:
+                    yield None
+                    return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo