upstream/mercurial-mirror Commit - r40745:39d29542

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

# maximum <delta-chain-data>/<revision-text-length> ratio

35

# maximum <delta-chain-data>/<revision-text-length> ratio

36

LIMIT_DELTA2TEXT = 2

36

LIMIT_DELTA2TEXT = 2

37

38

class _testrevlog(object):

38

class _testrevlog(object):

39

"""minimalist fake revlog to use in doctests"""

39

"""minimalist fake revlog to use in doctests"""

40

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

"""data is an list of revision payload boundaries"""

42

"""data is an list of revision payload boundaries"""

43

self._data = data

43

self._data = data

44

self._srdensitythreshold = density

44

self._srdensitythreshold = density

45

self._srmingapsize = mingap

45

self._srmingapsize = mingap

46

self._snapshot = set(snapshot)

46

self._snapshot = set(snapshot)

47

self.index = None

47

48

def start(self, rev):

49

def start(self, rev):

49

if rev == 0:

50

if rev == 0:

50

return 0

51

return 0

51

return self._data[rev - 1]

52

return self._data[rev - 1]

52

53

def end(self, rev):

54

def end(self, rev):

54

return self._data[rev]

55

return self._data[rev]

55

56

def length(self, rev):

57

def length(self, rev):

57

return self.end(rev) - self.start(rev)

58

return self.end(rev) - self.start(rev)

58

59

def __len__(self):

60

def __len__(self):

60

return len(self._data)

61

return len(self._data)

61

62

def issnapshot(self, rev):

63

def issnapshot(self, rev):

63

return rev in self._snapshot

64

return rev in self._snapshot

64

65

def slicechunk(revlog, revs, targetsize=None):

66

def slicechunk(revlog, revs, targetsize=None):

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

67

"""slice revs to reduce the amount of unrelated data to be read from disk.

67

68

``revs`` is sliced into groups that should be read in one time.

69

``revs`` is sliced into groups that should be read in one time.

69

Assume that revs are sorted.

70

Assume that revs are sorted.

70

71

The initial chunk is sliced until the overall density (payload/chunks-span

72

The initial chunk is sliced until the overall density (payload/chunks-span

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

73

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

73

`revlog._srmingapsize` is skipped.

74

`revlog._srmingapsize` is skipped.

74

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

76

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

76

For consistency with other slicing choice, this limit won't go lower than

77

For consistency with other slicing choice, this limit won't go lower than

77

`revlog._srmingapsize`.

78

`revlog._srmingapsize`.

78

79

If individual revisions chunk are larger than this limit, they will still

80

If individual revisions chunk are larger than this limit, they will still

80

be raised individually.

81

be raised individually.

81

82

>>> data = [

83

>>> data = [

83

... 5, #00 (5)

84

... 5, #00 (5)

84

... 10, #01 (5)

85

... 10, #01 (5)

85

... 12, #02 (2)

86

... 12, #02 (2)

86

... 12, #03 (empty)

87

... 12, #03 (empty)

87

... 27, #04 (15)

88

... 27, #04 (15)

88

... 31, #05 (4)

89

... 31, #05 (4)

89

... 31, #06 (empty)

90

... 31, #06 (empty)

90

... 42, #07 (11)

91

... 42, #07 (11)

91

... 47, #08 (5)

92

... 47, #08 (5)

92

... 47, #09 (empty)

93

... 47, #09 (empty)

93

... 48, #10 (1)

94

... 48, #10 (1)

94

... 51, #11 (3)

95

... 51, #11 (3)

95

... 74, #12 (23)

96

... 74, #12 (23)

96

... 85, #13 (11)

97

... 85, #13 (11)

97

... 86, #14 (1)

98

... 86, #14 (1)

98

... 91, #15 (5)

99

... 91, #15 (5)

99

... ]

100

... ]

100

>>> revlog = _testrevlog(data, snapshot=range(16))

101

>>> revlog = _testrevlog(data, snapshot=range(16))

101

102

>>> list(slicechunk(revlog, list(range(16))))

103

>>> list(slicechunk(revlog, list(range(16))))

103

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

104

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

104

>>> list(slicechunk(revlog, [0, 15]))

105

>>> list(slicechunk(revlog, [0, 15]))

105

[[0], [15]]

106

[[0], [15]]

106

>>> list(slicechunk(revlog, [0, 11, 15]))

107

>>> list(slicechunk(revlog, [0, 11, 15]))

107

[[0], [11], [15]]

108

[[0], [11], [15]]

108

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

109

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

109

[[0], [11, 13, 15]]

110

[[0], [11, 13, 15]]

110

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

111

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

111

[[1, 2], [5, 8, 10, 11], [14]]

112

[[1, 2], [5, 8, 10, 11], [14]]

112

113

Slicing with a maximum chunk size

114

Slicing with a maximum chunk size

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

115

[[0], [11], [13], [15]]

116

[[0], [11], [13], [15]]

116

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

117

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

117

[[0], [11], [13, 15]]

118

[[0], [11], [13, 15]]

118

"""

119

"""

119

if targetsize is not None:

120

if targetsize is not None:

120

targetsize = max(targetsize, revlog._srmingapsize)

121

targetsize = max(targetsize, revlog._srmingapsize)

121

# targetsize should not be specified when evaluating delta candidates:

122

# targetsize should not be specified when evaluating delta candidates:

122

# * targetsize is used to ensure we stay within specification when reading,

123

# * targetsize is used to ensure we stay within specification when reading,

123

for chunk in _slicechunktodensity(revlog, revs,

124

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

124

revlog._srdensitythreshold,

125

if densityslicing is None:

125

revlog._srmingapsize):

126

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

127

for chunk in densityslicing(revs,

128

revlog._srdensitythreshold,

129

revlog._srmingapsize):

126

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

127

yield subchunk

131

yield subchunk

128

132

129

def _slicechunktosize(revlog, revs, targetsize=None):

133

def _slicechunktosize(revlog, revs, targetsize=None):

130

"""slice revs to match the target size

134

"""slice revs to match the target size

131

135

132

This is intended to be used on chunk that density slicing selected by that

136

This is intended to be used on chunk that density slicing selected by that

133

are still too large compared to the read garantee of revlog. This might

137

are still too large compared to the read garantee of revlog. This might

134

happens when "minimal gap size" interrupted the slicing or when chain are

138

happens when "minimal gap size" interrupted the slicing or when chain are

135

built in a way that create large blocks next to each other.

139

built in a way that create large blocks next to each other.

136

140

137

>>> data = [

141

>>> data = [

138

... 3, #0 (3)

142

... 3, #0 (3)

139

... 5, #1 (2)

143

... 5, #1 (2)

140

... 6, #2 (1)

144

... 6, #2 (1)

141

... 8, #3 (2)

145

... 8, #3 (2)

142

... 8, #4 (empty)

146

... 8, #4 (empty)

143

... 11, #5 (3)

147

... 11, #5 (3)

144

... 12, #6 (1)

148

... 12, #6 (1)

145

... 13, #7 (1)

149

... 13, #7 (1)

146

... 14, #8 (1)

150

... 14, #8 (1)

147

... ]

151

... ]

148

152

149

== All snapshots cases ==

153

== All snapshots cases ==

150

>>> revlog = _testrevlog(data, snapshot=range(9))

154

>>> revlog = _testrevlog(data, snapshot=range(9))

151

155

152

Cases where chunk is already small enough

156

Cases where chunk is already small enough

153

>>> list(_slicechunktosize(revlog, [0], 3))

157

>>> list(_slicechunktosize(revlog, [0], 3))

154

[[0]]

158

[[0]]

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

159

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

[[6, 7]]

160

[[6, 7]]

157

>>> list(_slicechunktosize(revlog, [0], None))

161

>>> list(_slicechunktosize(revlog, [0], None))

158

[[0]]

162

[[0]]

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

163

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

[[6, 7]]

164

[[6, 7]]

161

165

162

cases where we need actual slicing

166

cases where we need actual slicing

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

167

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

[[0], [1]]

168

[[0], [1]]

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

169

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

[[1], [3]]

170

[[1], [3]]

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

171

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

[[1, 2], [3]]

172

[[1, 2], [3]]

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

173

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

[[3], [5]]

174

[[3], [5]]

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

175

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

[[3], [5]]

176

[[3], [5]]

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

177

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

[[5], [6, 7, 8]]

178

[[5], [6, 7, 8]]

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

179

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

180

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

181

178

Case with too large individual chunk (must return valid chunk)

182

Case with too large individual chunk (must return valid chunk)

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

183

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

[[0], [1]]

184

[[0], [1]]

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

185

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

[[1], [3]]

186

[[1], [3]]

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

187

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

[[3], [5]]

188

[[3], [5]]

185

189

186

== No Snapshot cases ==

190

== No Snapshot cases ==

187

>>> revlog = _testrevlog(data)

191

>>> revlog = _testrevlog(data)

188

192

189

Cases where chunk is already small enough

193

Cases where chunk is already small enough

190

>>> list(_slicechunktosize(revlog, [0], 3))

194

>>> list(_slicechunktosize(revlog, [0], 3))

191

[[0]]

195

[[0]]

192

>>> list(_slicechunktosize(revlog, [6, 7], 3))

196

>>> list(_slicechunktosize(revlog, [6, 7], 3))

193

[[6, 7]]

197

[[6, 7]]

194

>>> list(_slicechunktosize(revlog, [0], None))

198

>>> list(_slicechunktosize(revlog, [0], None))

195

[[0]]

199

[[0]]

196

>>> list(_slicechunktosize(revlog, [6, 7], None))

200

>>> list(_slicechunktosize(revlog, [6, 7], None))

197

[[6, 7]]

201

[[6, 7]]

198

202

199

cases where we need actual slicing

203

cases where we need actual slicing

200

>>> list(_slicechunktosize(revlog, [0, 1], 3))

204

>>> list(_slicechunktosize(revlog, [0, 1], 3))

201

[[0], [1]]

205

[[0], [1]]

202

>>> list(_slicechunktosize(revlog, [1, 3], 3))

206

>>> list(_slicechunktosize(revlog, [1, 3], 3))

203

[[1], [3]]

207

[[1], [3]]

204

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

208

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

205

[[1], [2, 3]]

209

[[1], [2, 3]]

206

>>> list(_slicechunktosize(revlog, [3, 5], 3))

210

>>> list(_slicechunktosize(revlog, [3, 5], 3))

207

[[3], [5]]

211

[[3], [5]]

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

212

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

209

[[3], [4, 5]]

213

[[3], [4, 5]]

210

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

214

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

211

[[5], [6, 7, 8]]

215

[[5], [6, 7, 8]]

212

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

216

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

213

[[0], [1, 2], [3], [5], [6, 7, 8]]

217

[[0], [1, 2], [3], [5], [6, 7, 8]]

214

218

215

Case with too large individual chunk (must return valid chunk)

219

Case with too large individual chunk (must return valid chunk)

216

>>> list(_slicechunktosize(revlog, [0, 1], 2))

220

>>> list(_slicechunktosize(revlog, [0, 1], 2))

217

[[0], [1]]

221

[[0], [1]]

218

>>> list(_slicechunktosize(revlog, [1, 3], 1))

222

>>> list(_slicechunktosize(revlog, [1, 3], 1))

219

[[1], [3]]

223

[[1], [3]]

220

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

224

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

221

[[3], [5]]

225

[[3], [5]]

222

226

223

== mixed case ==

227

== mixed case ==

224

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

228

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

225

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

229

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

226

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

230

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

227

"""

231

"""

228

assert targetsize is None or 0 <= targetsize

232

assert targetsize is None or 0 <= targetsize

229

startdata = revlog.start(revs[0])

233

startdata = revlog.start(revs[0])

230

enddata = revlog.end(revs[-1])

234

enddata = revlog.end(revs[-1])

231

fullspan = enddata - startdata

235

fullspan = enddata - startdata

232

if targetsize is None or fullspan <= targetsize:

236

if targetsize is None or fullspan <= targetsize:

233

yield revs

237

yield revs

234

return

238

return

235

239

236

startrevidx = 0

240

startrevidx = 0

237

endrevidx = 1

241

endrevidx = 1

238

iterrevs = enumerate(revs)

242

iterrevs = enumerate(revs)

239

next(iterrevs) # skip first rev.

243

next(iterrevs) # skip first rev.

240

# first step: get snapshots out of the way

244

# first step: get snapshots out of the way

241

for idx, r in iterrevs:

245

for idx, r in iterrevs:

242

span = revlog.end(r) - startdata

246

span = revlog.end(r) - startdata

243

snapshot = revlog.issnapshot(r)

247

snapshot = revlog.issnapshot(r)

244

if span <= targetsize and snapshot:

248

if span <= targetsize and snapshot:

245

endrevidx = idx + 1

249

endrevidx = idx + 1

246

else:

250

else:

247

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

251

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

248

if chunk:

252

if chunk:

249

yield chunk

253

yield chunk

250

startrevidx = idx

254

startrevidx = idx

251

startdata = revlog.start(r)

255

startdata = revlog.start(r)

252

endrevidx = idx + 1

256

endrevidx = idx + 1

253

if not snapshot:

257

if not snapshot:

254

break

258

break

255

259

256

# for the others, we use binary slicing to quickly converge toward valid

260

# for the others, we use binary slicing to quickly converge toward valid

257

# chunks (otherwise, we might end up looking for start/end of many

261

# chunks (otherwise, we might end up looking for start/end of many

258

# revisions). This logic is not looking for the perfect slicing point, it

262

# revisions). This logic is not looking for the perfect slicing point, it

259

# focuses on quickly converging toward valid chunks.

263

# focuses on quickly converging toward valid chunks.

260

nbitem = len(revs)

264

nbitem = len(revs)

261

while (enddata - startdata) > targetsize:

265

while (enddata - startdata) > targetsize:

262

endrevidx = nbitem

266

endrevidx = nbitem

263

if nbitem - startrevidx <= 1:

267

if nbitem - startrevidx <= 1:

264

break # protect against individual chunk larger than limit

268

break # protect against individual chunk larger than limit

265

localenddata = revlog.end(revs[endrevidx - 1])

269

localenddata = revlog.end(revs[endrevidx - 1])

266

span = localenddata - startdata

270

span = localenddata - startdata

267

while span > targetsize:

271

while span > targetsize:

268

if endrevidx - startrevidx <= 1:

272

if endrevidx - startrevidx <= 1:

269

break # protect against individual chunk larger than limit

273

break # protect against individual chunk larger than limit

270

endrevidx -= (endrevidx - startrevidx) // 2

274

endrevidx -= (endrevidx - startrevidx) // 2

271

localenddata = revlog.end(revs[endrevidx - 1])

275

localenddata = revlog.end(revs[endrevidx - 1])

272

span = localenddata - startdata

276

span = localenddata - startdata

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

277

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

274

if chunk:

278

if chunk:

275

yield chunk

279

yield chunk

276

startrevidx = endrevidx

280

startrevidx = endrevidx

277

startdata = revlog.start(revs[startrevidx])

281

startdata = revlog.start(revs[startrevidx])

278

282

279

chunk = _trimchunk(revlog, revs, startrevidx)

283

chunk = _trimchunk(revlog, revs, startrevidx)

280

if chunk:

284

if chunk:

281

yield chunk

285

yield chunk

282

286

283

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

287

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

284

mingapsize=0):

288

mingapsize=0):

285

"""slice revs to reduce the amount of unrelated data to be read from disk.

289

"""slice revs to reduce the amount of unrelated data to be read from disk.

286

290

287

``revs`` is sliced into groups that should be read in one time.

291

``revs`` is sliced into groups that should be read in one time.

288

Assume that revs are sorted.

292

Assume that revs are sorted.

289

293

290

The initial chunk is sliced until the overall density (payload/chunks-span

294

The initial chunk is sliced until the overall density (payload/chunks-span

291

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

295

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

292

skipped.

296

skipped.

293

297

294

>>> revlog = _testrevlog([

298

>>> revlog = _testrevlog([

295

... 5, #00 (5)

299

... 5, #00 (5)

296

... 10, #01 (5)

300

... 10, #01 (5)

297

... 12, #02 (2)

301

... 12, #02 (2)

298

... 12, #03 (empty)

302

... 12, #03 (empty)

299

... 27, #04 (15)

303

... 27, #04 (15)

300

... 31, #05 (4)

304

... 31, #05 (4)

301

... 31, #06 (empty)

305

... 31, #06 (empty)

302

... 42, #07 (11)

306

... 42, #07 (11)

303

... 47, #08 (5)

307

... 47, #08 (5)

304

... 47, #09 (empty)

308

... 47, #09 (empty)

305

... 48, #10 (1)

309

... 48, #10 (1)

306

... 51, #11 (3)

310

... 51, #11 (3)

307

... 74, #12 (23)

311

... 74, #12 (23)

308

... 85, #13 (11)

312

... 85, #13 (11)

309

... 86, #14 (1)

313

... 86, #14 (1)

310

... 91, #15 (5)

314

... 91, #15 (5)

311

... ])

315

... ])

312

316

313

>>> list(_slicechunktodensity(revlog, list(range(16))))

317

>>> list(_slicechunktodensity(revlog, list(range(16))))

314

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

318

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

315

>>> list(_slicechunktodensity(revlog, [0, 15]))

319

>>> list(_slicechunktodensity(revlog, [0, 15]))

316

[[0], [15]]

320

[[0], [15]]

317

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

321

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

318

[[0], [11], [15]]

322

[[0], [11], [15]]

319

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

323

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

320

[[0], [11, 13, 15]]

324

[[0], [11, 13, 15]]

321

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

325

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

322

[[1, 2], [5, 8, 10, 11], [14]]

326

[[1, 2], [5, 8, 10, 11], [14]]

323

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

327

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

324

... mingapsize=20))

328

... mingapsize=20))

325

[[1, 2, 3, 5, 8, 10, 11], [14]]

329

[[1, 2, 3, 5, 8, 10, 11], [14]]

326

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

330

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

327

... targetdensity=0.95))

331

... targetdensity=0.95))

328

[[1, 2], [5], [8, 10, 11], [14]]

332

[[1, 2], [5], [8, 10, 11], [14]]

329

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

333

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

330

... targetdensity=0.95, mingapsize=12))

334

... targetdensity=0.95, mingapsize=12))

331

[[1, 2], [5, 8, 10, 11], [14]]

335

[[1, 2], [5, 8, 10, 11], [14]]

332

"""

336

"""

333

start = revlog.start

337

start = revlog.start

334

length = revlog.length

338

length = revlog.length

335

339

336

if len(revs) <= 1:

340

if len(revs) <= 1:

337

yield revs

341

yield revs

338

return

342

return

339

343

340

deltachainspan = segmentspan(revlog, revs)

344

deltachainspan = segmentspan(revlog, revs)

341

345

342

if deltachainspan < mingapsize:

346

if deltachainspan < mingapsize:

343

yield revs

347

yield revs

344

return

348

return

345

349

346

readdata = deltachainspan

350

readdata = deltachainspan

347

chainpayload = sum(length(r) for r in revs)

351

chainpayload = sum(length(r) for r in revs)

348

352

349

if deltachainspan:

353

if deltachainspan:

350

density = chainpayload / float(deltachainspan)

354

density = chainpayload / float(deltachainspan)

351

else:

355

else:

352

density = 1.0

356

density = 1.0

353

357

354

if density >= targetdensity:

358

if density >= targetdensity:

355

yield revs

359

yield revs

356

return

360

return

357

361

358

# Store the gaps in a heap to have them sorted by decreasing size

362

# Store the gaps in a heap to have them sorted by decreasing size

359

gaps = []

363

gaps = []

360

prevend = None

364

prevend = None

361

for i, rev in enumerate(revs):

365

for i, rev in enumerate(revs):

362

revstart = start(rev)

366

revstart = start(rev)

363

revlen = length(rev)

367

revlen = length(rev)

364

368

365

# Skip empty revisions to form larger holes

369

# Skip empty revisions to form larger holes

366

if revlen == 0:

370

if revlen == 0:

367

continue

371

continue

368

372

369

if prevend is not None:

373

if prevend is not None:

370

gapsize = revstart - prevend

374

gapsize = revstart - prevend

371

# only consider holes that are large enough

375

# only consider holes that are large enough

372

if gapsize > mingapsize:

376

if gapsize > mingapsize:

373

gaps.append((gapsize, i))

377

gaps.append((gapsize, i))

374

378

375

prevend = revstart + revlen

379

prevend = revstart + revlen

376

# sort the gaps to pop them from largest to small

380

# sort the gaps to pop them from largest to small

377

gaps.sort()

381

gaps.sort()

378

382

379

# Collect the indices of the largest holes until the density is acceptable

383

# Collect the indices of the largest holes until the density is acceptable

380

selected = []

384

selected = []

381

while gaps and density < targetdensity:

385

while gaps and density < targetdensity:

382

gapsize, gapidx = gaps.pop()

386

gapsize, gapidx = gaps.pop()

383

387

384

selected.append(gapidx)

388

selected.append(gapidx)

385

389

386

# the gap sizes are stored as negatives to be sorted decreasingly

390

# the gap sizes are stored as negatives to be sorted decreasingly

387

# by the heap

391

# by the heap

388

readdata -= gapsize

392

readdata -= gapsize

389

if readdata > 0:

393

if readdata > 0:

390

density = chainpayload / float(readdata)

394

density = chainpayload / float(readdata)

391

else:

395

else:

392

density = 1.0

396

density = 1.0

393

selected.sort()

397

selected.sort()

394

398

395

# Cut the revs at collected indices

399

# Cut the revs at collected indices

396

previdx = 0

400

previdx = 0

397

for idx in selected:

401

for idx in selected:

398

402

399

chunk = _trimchunk(revlog, revs, previdx, idx)

403

chunk = _trimchunk(revlog, revs, previdx, idx)

400

if chunk:

404

if chunk:

401

yield chunk

405

yield chunk

402

406

403

previdx = idx

407

previdx = idx

404

408

405

chunk = _trimchunk(revlog, revs, previdx)

409

chunk = _trimchunk(revlog, revs, previdx)

406

if chunk:

410

if chunk:

407

yield chunk

411

yield chunk

408

412

409

def _trimchunk(revlog, revs, startidx, endidx=None):

413

def _trimchunk(revlog, revs, startidx, endidx=None):

410

"""returns revs[startidx:endidx] without empty trailing revs

414

"""returns revs[startidx:endidx] without empty trailing revs

411

415

412

Doctest Setup

416

Doctest Setup

413

>>> revlog = _testrevlog([

417

>>> revlog = _testrevlog([

414

... 5, #0

418

... 5, #0

415

... 10, #1

419

... 10, #1

416

... 12, #2

420

... 12, #2

417

... 12, #3 (empty)

421

... 12, #3 (empty)

418

... 17, #4

422

... 17, #4

419

... 21, #5

423

... 21, #5

420

... 21, #6 (empty)

424

... 21, #6 (empty)

421

... ])

425

... ])

422

426

423

Contiguous cases:

427

Contiguous cases:

424

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

428

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

425

[0, 1, 2, 3, 4, 5]

429

[0, 1, 2, 3, 4, 5]

426

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

430

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

427

[0, 1, 2, 3, 4]

431

[0, 1, 2, 3, 4]

428

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

432

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

429

[0, 1, 2]

433

[0, 1, 2]

430

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

434

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

431

[2]

435

[2]

432

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

436

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

433

[3, 4, 5]

437

[3, 4, 5]

434

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

438

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

435

[3, 4]

439

[3, 4]

436

440

437

Discontiguous cases:

441

Discontiguous cases:

438

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

442

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

439

[1, 3, 5]

443

[1, 3, 5]

440

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

444

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

441

[1]

445

[1]

442

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

446

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

443

[3, 5]

447

[3, 5]

444

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

448

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

445

[3, 5]

449

[3, 5]

446

"""

450

"""

447

length = revlog.length

451

length = revlog.length

448

452

449

if endidx is None:

453

if endidx is None:

450

endidx = len(revs)

454

endidx = len(revs)

451

455

452

# If we have a non-emtpy delta candidate, there are nothing to trim

456

# If we have a non-emtpy delta candidate, there are nothing to trim

453

if revs[endidx - 1] < len(revlog):

457

if revs[endidx - 1] < len(revlog):

454

# Trim empty revs at the end, except the very first revision of a chain

458

# Trim empty revs at the end, except the very first revision of a chain

455

while (endidx > 1

459

while (endidx > 1

456

and endidx > startidx

460

and endidx > startidx

457

and length(revs[endidx - 1]) == 0):

461

and length(revs[endidx - 1]) == 0):

458

endidx -= 1

462

endidx -= 1

459

463

460

return revs[startidx:endidx]

464

return revs[startidx:endidx]

461

465

462

def segmentspan(revlog, revs):

466

def segmentspan(revlog, revs):

463

"""Get the byte span of a segment of revisions

467

"""Get the byte span of a segment of revisions

464

468

465

revs is a sorted array of revision numbers

469

revs is a sorted array of revision numbers

466

470

467

>>> revlog = _testrevlog([

471

>>> revlog = _testrevlog([

468

... 5, #0

472

... 5, #0

469

... 10, #1

473

... 10, #1

470

... 12, #2

474

... 12, #2

471

... 12, #3 (empty)

475

... 12, #3 (empty)

472

... 17, #4

476

... 17, #4

473

... ])

477

... ])

474

478

475

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

479

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

476

17

480

17

477

>>> segmentspan(revlog, [0, 4])

481

>>> segmentspan(revlog, [0, 4])

478

17

482

17

479

>>> segmentspan(revlog, [3, 4])

483

>>> segmentspan(revlog, [3, 4])

480

5

484

5

481

>>> segmentspan(revlog, [1, 2, 3,])

485

>>> segmentspan(revlog, [1, 2, 3,])

482

7

486

7

483

>>> segmentspan(revlog, [1, 3])

487

>>> segmentspan(revlog, [1, 3])

484

7

488

7

485

"""

489

"""

486

if not revs:

490

if not revs:

487

return 0

491

return 0

488

end = revlog.end(revs[-1])

492

end = revlog.end(revs[-1])

489

return end - revlog.start(revs[0])

493

return end - revlog.start(revs[0])

490

494

491

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

495

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

492

"""build full text from a (base, delta) pair and other metadata"""

496

"""build full text from a (base, delta) pair and other metadata"""

493

# special case deltas which replace entire base; no need to decode

497

# special case deltas which replace entire base; no need to decode

494

# base revision. this neatly avoids censored bases, which throw when

498

# base revision. this neatly avoids censored bases, which throw when

495

# they're decoded.

499

# they're decoded.

496

hlen = struct.calcsize(">lll")

500

hlen = struct.calcsize(">lll")

497

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

501

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

498

len(delta) - hlen):

502

len(delta) - hlen):

499

fulltext = delta[hlen:]

503

fulltext = delta[hlen:]

500

else:

504

else:

501

# deltabase is rawtext before changed by flag processors, which is

505

# deltabase is rawtext before changed by flag processors, which is

502

# equivalent to non-raw text

506

# equivalent to non-raw text

503

basetext = revlog.revision(baserev, _df=fh, raw=False)

507

basetext = revlog.revision(baserev, _df=fh, raw=False)

504

fulltext = mdiff.patch(basetext, delta)

508

fulltext = mdiff.patch(basetext, delta)

505

509

506

try:

510

try:

507

res = revlog._processflags(fulltext, flags, 'read', raw=True)

511

res = revlog._processflags(fulltext, flags, 'read', raw=True)

508

fulltext, validatehash = res

512

fulltext, validatehash = res

509

if validatehash:

513

if validatehash:

510

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

514

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

511

if flags & REVIDX_ISCENSORED:

515

if flags & REVIDX_ISCENSORED:

512

raise error.StorageError(_('node %s is not censored') %

516

raise error.StorageError(_('node %s is not censored') %

513

expectednode)

517

expectednode)

514

except error.CensoredNodeError:

518

except error.CensoredNodeError:

515

# must pass the censored index flag to add censored revisions

519

# must pass the censored index flag to add censored revisions

516

if not flags & REVIDX_ISCENSORED:

520

if not flags & REVIDX_ISCENSORED:

517

raise

521

raise

518

return fulltext

522

return fulltext

519

523

520

@attr.s(slots=True, frozen=True)

524

@attr.s(slots=True, frozen=True)

521

class _deltainfo(object):

525

class _deltainfo(object):

522

distance = attr.ib()

526

distance = attr.ib()

523

deltalen = attr.ib()

527

deltalen = attr.ib()

524

data = attr.ib()

528

data = attr.ib()

525

base = attr.ib()

529

base = attr.ib()

526

chainbase = attr.ib()

530

chainbase = attr.ib()

527

chainlen = attr.ib()

531

chainlen = attr.ib()

528

compresseddeltalen = attr.ib()

532

compresseddeltalen = attr.ib()

529

snapshotdepth = attr.ib()

533

snapshotdepth = attr.ib()

530

534

531

def isgooddeltainfo(revlog, deltainfo, revinfo):

535

def isgooddeltainfo(revlog, deltainfo, revinfo):

532

"""Returns True if the given delta is good. Good means that it is within

536

"""Returns True if the given delta is good. Good means that it is within

533

the disk span, disk size, and chain length bounds that we know to be

537

the disk span, disk size, and chain length bounds that we know to be

534

performant."""

538

performant."""

535

if deltainfo is None:

539

if deltainfo is None:

536

return False

540

return False

537

541

538

# - 'deltainfo.distance' is the distance from the base revision --

542

# - 'deltainfo.distance' is the distance from the base revision --

539

# bounding it limits the amount of I/O we need to do.

543

# bounding it limits the amount of I/O we need to do.

540

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

544

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

541

# deltas we need to apply -- bounding it limits the amount of CPU

545

# deltas we need to apply -- bounding it limits the amount of CPU

542

# we consume.

546

# we consume.

543

547

544

textlen = revinfo.textlen

548

textlen = revinfo.textlen

545

defaultmax = textlen * 4

549

defaultmax = textlen * 4

546

maxdist = revlog._maxdeltachainspan

550

maxdist = revlog._maxdeltachainspan

547

if not maxdist:

551

if not maxdist:

548

maxdist = deltainfo.distance # ensure the conditional pass

552

maxdist = deltainfo.distance # ensure the conditional pass

549

maxdist = max(maxdist, defaultmax)

553

maxdist = max(maxdist, defaultmax)

550

554

551

# Bad delta from read span:

555

# Bad delta from read span:

552

#

556

#

553

# If the span of data read is larger than the maximum allowed.

557

# If the span of data read is larger than the maximum allowed.

554

#

558

#

555

# In the sparse-revlog case, we rely on the associated "sparse reading"

559

# In the sparse-revlog case, we rely on the associated "sparse reading"

556

# to avoid issue related to the span of data. In theory, it would be

560

# to avoid issue related to the span of data. In theory, it would be

557

# possible to build pathological revlog where delta pattern would lead

561

# possible to build pathological revlog where delta pattern would lead

558

# to too many reads. However, they do not happen in practice at all. So

562

# to too many reads. However, they do not happen in practice at all. So

559

# we skip the span check entirely.

563

# we skip the span check entirely.

560

if not revlog._sparserevlog and maxdist < deltainfo.distance:

564

if not revlog._sparserevlog and maxdist < deltainfo.distance:

561

return False

565

return False

562

566

563

# Bad delta from new delta size:

567

# Bad delta from new delta size:

564

#

568

#

565

# If the delta size is larger than the target text, storing the

569

# If the delta size is larger than the target text, storing the

566

# delta will be inefficient.

570

# delta will be inefficient.

567

if textlen < deltainfo.deltalen:

571

if textlen < deltainfo.deltalen:

568

return False

572

return False

569

573

570

# Bad delta from cumulated payload size:

574

# Bad delta from cumulated payload size:

571

#

575

#

572

# If the sum of delta get larger than K * target text length.

576

# If the sum of delta get larger than K * target text length.

573

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

577

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

574

return False

578

return False

575

579

576

# Bad delta from chain length:

580

# Bad delta from chain length:

577

#

581

#

578

# If the number of delta in the chain gets too high.

582

# If the number of delta in the chain gets too high.

579

if (revlog._maxchainlen

583

if (revlog._maxchainlen

580

and revlog._maxchainlen < deltainfo.chainlen):

584

and revlog._maxchainlen < deltainfo.chainlen):

581

return False

585

return False

582

586

583

# bad delta from intermediate snapshot size limit

587

# bad delta from intermediate snapshot size limit

584

#

588

#

585

# If an intermediate snapshot size is higher than the limit. The

589

# If an intermediate snapshot size is higher than the limit. The

586

# limit exist to prevent endless chain of intermediate delta to be

590

# limit exist to prevent endless chain of intermediate delta to be

587

# created.

591

# created.

588

if (deltainfo.snapshotdepth is not None and

592

if (deltainfo.snapshotdepth is not None and

589

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

593

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

590

return False

594

return False

591

595

592

# bad delta if new intermediate snapshot is larger than the previous

596

# bad delta if new intermediate snapshot is larger than the previous

593

# snapshot

597

# snapshot

594

if (deltainfo.snapshotdepth

598

if (deltainfo.snapshotdepth

595

and revlog.length(deltainfo.base) < deltainfo.deltalen):

599

and revlog.length(deltainfo.base) < deltainfo.deltalen):

596

return False

600

return False

597

601

598

return True

602

return True

599

603

600

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

604

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

601

"""Provides group of revision to be tested as delta base

605

"""Provides group of revision to be tested as delta base

602

606

603

This top level function focus on emitting groups with unique and worthwhile

607

This top level function focus on emitting groups with unique and worthwhile

604

content. See _raw_candidate_groups for details about the group order.

608

content. See _raw_candidate_groups for details about the group order.

605

"""

609

"""

606

# should we try to build a delta?

610

# should we try to build a delta?

607

if not (len(revlog) and revlog._storedeltachains):

611

if not (len(revlog) and revlog._storedeltachains):

608

yield None

612

yield None

609

return

613

return

610

614

611

deltalength = revlog.length

615

deltalength = revlog.length

612

deltaparent = revlog.deltaparent

616

deltaparent = revlog.deltaparent

613

good = None

617

good = None

614

618

615

deltas_limit = textlen * LIMIT_DELTA2TEXT

619

deltas_limit = textlen * LIMIT_DELTA2TEXT

616

620

617

tested = set([nullrev])

621

tested = set([nullrev])

618

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

622

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

619

while True:

623

while True:

620

temptative = candidates.send(good)

624

temptative = candidates.send(good)

621

if temptative is None:

625

if temptative is None:

622

break

626

break

623

group = []

627

group = []

624

for rev in temptative:

628

for rev in temptative:

625

# skip over empty delta (no need to include them in a chain)

629

# skip over empty delta (no need to include them in a chain)

626

while (revlog._generaldelta

630

while (revlog._generaldelta

627

and not (rev == nullrev

631

and not (rev == nullrev

628

or rev in tested

632

or rev in tested

629

or deltalength(rev))):

633

or deltalength(rev))):

630

tested.add(rev)

634

tested.add(rev)

631

rev = deltaparent(rev)

635

rev = deltaparent(rev)

632

# filter out revision we tested already

636

# filter out revision we tested already

633

if rev in tested:

637

if rev in tested:

634

continue

638

continue

635

tested.add(rev)

639

tested.add(rev)

636

# filter out delta base that will never produce good delta

640

# filter out delta base that will never produce good delta

637

if deltas_limit < revlog.length(rev):

641

if deltas_limit < revlog.length(rev):

638

continue

642

continue

639

# no need to try a delta against nullrev, this will be done as a

643

# no need to try a delta against nullrev, this will be done as a

640

# last resort.

644

# last resort.

641

if rev == nullrev:

645

if rev == nullrev:

642

continue

646

continue

643

# no delta for rawtext-changing revs (see "candelta" for why)

647

# no delta for rawtext-changing revs (see "candelta" for why)

644

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

648

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

645

continue

649

continue

646

group.append(rev)

650

group.append(rev)

647

if group:

651

if group:

648

# XXX: in the sparse revlog case, group can become large,

652

# XXX: in the sparse revlog case, group can become large,

649

# impacting performances. Some bounding or slicing mecanism

653

# impacting performances. Some bounding or slicing mecanism

650

# would help to reduce this impact.

654

# would help to reduce this impact.

651

good = yield tuple(group)

655

good = yield tuple(group)

652

yield None

656

yield None

653

657

654

def _findsnapshots(revlog, cache, start_rev):

658

def _findsnapshots(revlog, cache, start_rev):

655

"""find snapshot from start_rev to tip"""

659

"""find snapshot from start_rev to tip"""

656

deltaparent = revlog.deltaparent

660

deltaparent = revlog.deltaparent

657

issnapshot = revlog.issnapshot

661

issnapshot = revlog.issnapshot

658

for rev in revlog.revs(start_rev):

662

for rev in revlog.revs(start_rev):

659

if issnapshot(rev):

663

if issnapshot(rev):

660

cache[deltaparent(rev)].append(rev)

664

cache[deltaparent(rev)].append(rev)

661

665

662

def _refinedgroups(revlog, p1, p2, cachedelta):

666

def _refinedgroups(revlog, p1, p2, cachedelta):

663

good = None

667

good = None

664

# First we try to reuse a the delta contained in the bundle.

668

# First we try to reuse a the delta contained in the bundle.

665

# (or from the source revlog)

669

# (or from the source revlog)

666

#

670

#

667

# This logic only applies to general delta repositories and can be disabled

671

# This logic only applies to general delta repositories and can be disabled

668

# through configuration. Disabling reuse source delta is useful when

672

# through configuration. Disabling reuse source delta is useful when

669

# we want to make sure we recomputed "optimal" deltas.

673

# we want to make sure we recomputed "optimal" deltas.

670

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

674

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

671

# Assume what we received from the server is a good choice

675

# Assume what we received from the server is a good choice

672

# build delta will reuse the cache

676

# build delta will reuse the cache

673

good = yield (cachedelta[0],)

677

good = yield (cachedelta[0],)

674

if good is not None:

678

if good is not None:

675

yield None

679

yield None

676

return

680

return

677

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

681

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

678

good = yield candidates

682

good = yield candidates

679

if good is not None:

683

if good is not None:

680

break

684

break

681

685

682

# If sparse revlog is enabled, we can try to refine the available deltas

686

# If sparse revlog is enabled, we can try to refine the available deltas

683

if not revlog._sparserevlog:

687

if not revlog._sparserevlog:

684

yield None

688

yield None

685

return

689

return

686

690

687

# if we have a refinable value, try to refine it

691

# if we have a refinable value, try to refine it

688

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

692

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

689

# refine snapshot down

693

# refine snapshot down

690

previous = None

694

previous = None

691

while previous != good:

695

while previous != good:

692

previous = good

696

previous = good

693

base = revlog.deltaparent(good)

697

base = revlog.deltaparent(good)

694

if base == nullrev:

698

if base == nullrev:

695

break

699

break

696

good = yield (base,)

700

good = yield (base,)

697

# refine snapshot up

701

# refine snapshot up

698

#

702

#

699

# XXX the _findsnapshots call can be expensive and is "duplicated" with

703

# XXX the _findsnapshots call can be expensive and is "duplicated" with

700

# the one done in `_rawgroups`. Once we start working on performance,

704

# the one done in `_rawgroups`. Once we start working on performance,

701

# we should make the two logics share this computation.

705

# we should make the two logics share this computation.

702

snapshots = collections.defaultdict(list)

706

snapshots = collections.defaultdict(list)

703

_findsnapshots(revlog, snapshots, good + 1)

707

_findsnapshots(revlog, snapshots, good + 1)

704

previous = None

708

previous = None

705

while good != previous:

709

while good != previous:

706

previous = good

710

previous = good

707

children = tuple(sorted(c for c in snapshots[good]))

711

children = tuple(sorted(c for c in snapshots[good]))

708

good = yield children

712

good = yield children

709

713

710

# we have found nothing

714

# we have found nothing

711

yield None

715

yield None

712

716

713

def _rawgroups(revlog, p1, p2, cachedelta):

717

def _rawgroups(revlog, p1, p2, cachedelta):

714

"""Provides group of revision to be tested as delta base

718

"""Provides group of revision to be tested as delta base

715

719

716

This lower level function focus on emitting delta theorically interresting

720

This lower level function focus on emitting delta theorically interresting

717

without looking it any practical details.

721

without looking it any practical details.

718

722

719

The group order aims at providing fast or small candidates first.

723

The group order aims at providing fast or small candidates first.

720

"""

724

"""

721

gdelta = revlog._generaldelta

725

gdelta = revlog._generaldelta

722

sparse = revlog._sparserevlog

726

sparse = revlog._sparserevlog

723

curr = len(revlog)

727

curr = len(revlog)

724

prev = curr - 1

728

prev = curr - 1

725

deltachain = lambda rev: revlog._deltachain(rev)[0]

729

deltachain = lambda rev: revlog._deltachain(rev)[0]

726

730

727

if gdelta:

731

if gdelta:

728

# exclude already lazy tested base if any

732

# exclude already lazy tested base if any

729

parents = [p for p in (p1, p2) if p != nullrev]

733

parents = [p for p in (p1, p2) if p != nullrev]

730

734

731

if not revlog._deltabothparents and len(parents) == 2:

735

if not revlog._deltabothparents and len(parents) == 2:

732

parents.sort()

736

parents.sort()

733

# To minimize the chance of having to build a fulltext,

737

# To minimize the chance of having to build a fulltext,

734

# pick first whichever parent is closest to us (max rev)

738

# pick first whichever parent is closest to us (max rev)

735

yield (parents[1],)

739

yield (parents[1],)

736

# then the other one (min rev) if the first did not fit

740

# then the other one (min rev) if the first did not fit

737

yield (parents[0],)

741

yield (parents[0],)

738

elif len(parents) > 0:

742

elif len(parents) > 0:

739

# Test all parents (1 or 2), and keep the best candidate

743

# Test all parents (1 or 2), and keep the best candidate

740

yield parents

744

yield parents

741

745

742

if sparse and parents:

746

if sparse and parents:

743

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

747

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

744

# See if we can use an existing snapshot in the parent chains to use as

748

# See if we can use an existing snapshot in the parent chains to use as

745

# a base for a new intermediate-snapshot

749

# a base for a new intermediate-snapshot

746

#

750

#

747

# search for snapshot in parents delta chain

751

# search for snapshot in parents delta chain

748

# map: snapshot-level: snapshot-rev

752

# map: snapshot-level: snapshot-rev

749

parents_snaps = collections.defaultdict(set)

753

parents_snaps = collections.defaultdict(set)

750

candidate_chains = [deltachain(p) for p in parents]

754

candidate_chains = [deltachain(p) for p in parents]

751

for chain in candidate_chains:

755

for chain in candidate_chains:

752

for idx, s in enumerate(chain):

756

for idx, s in enumerate(chain):

753

if not revlog.issnapshot(s):

757

if not revlog.issnapshot(s):

754

break

758

break

755

parents_snaps[idx].add(s)

759

parents_snaps[idx].add(s)

756

snapfloor = min(parents_snaps[0]) + 1

760

snapfloor = min(parents_snaps[0]) + 1

757

_findsnapshots(revlog, snapshots, snapfloor)

761

_findsnapshots(revlog, snapshots, snapfloor)

758

# search for the highest "unrelated" revision

762

# search for the highest "unrelated" revision

759

#

763

#

760

# Adding snapshots used by "unrelated" revision increase the odd we

764

# Adding snapshots used by "unrelated" revision increase the odd we

761

# reuse an independant, yet better snapshot chain.

765

# reuse an independant, yet better snapshot chain.

762

#

766

#

763

# XXX instead of building a set of revisions, we could lazily enumerate

767

# XXX instead of building a set of revisions, we could lazily enumerate

764

# over the chains. That would be more efficient, however we stick to

768

# over the chains. That would be more efficient, however we stick to

765

# simple code for now.

769

# simple code for now.

766

all_revs = set()

770

all_revs = set()

767

for chain in candidate_chains:

771

for chain in candidate_chains:

768

all_revs.update(chain)

772

all_revs.update(chain)

769

other = None

773

other = None

770

for r in revlog.revs(prev, snapfloor):

774

for r in revlog.revs(prev, snapfloor):

771

if r not in all_revs:

775

if r not in all_revs:

772

other = r

776

other = r

773

break

777

break

774

if other is not None:

778

if other is not None:

775

# To avoid unfair competition, we won't use unrelated intermediate

779

# To avoid unfair competition, we won't use unrelated intermediate

776

# snapshot that are deeper than the ones from the parent delta

780

# snapshot that are deeper than the ones from the parent delta

777

# chain.

781

# chain.

778

max_depth = max(parents_snaps.keys())

782

max_depth = max(parents_snaps.keys())

779

chain = deltachain(other)

783

chain = deltachain(other)

780

for idx, s in enumerate(chain):

784

for idx, s in enumerate(chain):

781

if s < snapfloor:

785

if s < snapfloor:

782

continue

786

continue

783

if max_depth < idx:

787

if max_depth < idx:

784

break

788

break

785

if not revlog.issnapshot(s):

789

if not revlog.issnapshot(s):

786

break

790

break

787

parents_snaps[idx].add(s)

791

parents_snaps[idx].add(s)

788

# Test them as possible intermediate snapshot base

792

# Test them as possible intermediate snapshot base

789

# We test them from highest to lowest level. High level one are more

793

# We test them from highest to lowest level. High level one are more

790

# likely to result in small delta

794

# likely to result in small delta

791

floor = None

795

floor = None

792

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

796

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

793

siblings = set()

797

siblings = set()

794

for s in snaps:

798

for s in snaps:

795

siblings.update(snapshots[s])

799

siblings.update(snapshots[s])

796

# Before considering making a new intermediate snapshot, we check

800

# Before considering making a new intermediate snapshot, we check

797

# if an existing snapshot, children of base we consider, would be

801

# if an existing snapshot, children of base we consider, would be

798

# suitable.

802

# suitable.

799

#

803

#

800

# It give a change to reuse a delta chain "unrelated" to the

804

# It give a change to reuse a delta chain "unrelated" to the

801

# current revision instead of starting our own. Without such

805

# current revision instead of starting our own. Without such

802

# re-use, topological branches would keep reopening new chains.

806

# re-use, topological branches would keep reopening new chains.

803

# Creating more and more snapshot as the repository grow.

807

# Creating more and more snapshot as the repository grow.

804

808

805

if floor is not None:

809

if floor is not None:

806

# We only do this for siblings created after the one in our

810

# We only do this for siblings created after the one in our

807

# parent's delta chain. Those created before has less chances

811

# parent's delta chain. Those created before has less chances

808

# to be valid base since our ancestors had to create a new

812

# to be valid base since our ancestors had to create a new

809

# snapshot.

813

# snapshot.

810

siblings = [r for r in siblings if floor < r]

814

siblings = [r for r in siblings if floor < r]

811

yield tuple(sorted(siblings))

815

yield tuple(sorted(siblings))

812

# then test the base from our parent's delta chain.

816

# then test the base from our parent's delta chain.

813

yield tuple(sorted(snaps))

817

yield tuple(sorted(snaps))

814

floor = min(snaps)

818

floor = min(snaps)

815

# No suitable base found in the parent chain, search if any full

819

# No suitable base found in the parent chain, search if any full

816

# snapshots emitted since parent's base would be a suitable base for an

820

# snapshots emitted since parent's base would be a suitable base for an

817

# intermediate snapshot.

821

# intermediate snapshot.

818

#

822

#

819

# It give a chance to reuse a delta chain unrelated to the current

823

# It give a chance to reuse a delta chain unrelated to the current

820

# revisions instead of starting our own. Without such re-use,

824

# revisions instead of starting our own. Without such re-use,

821

# topological branches would keep reopening new full chains. Creating

825

# topological branches would keep reopening new full chains. Creating

822

# more and more snapshot as the repository grow.

826

# more and more snapshot as the repository grow.

823

yield tuple(snapshots[nullrev])

827

yield tuple(snapshots[nullrev])

824

828

825

if not sparse:

829

if not sparse:

826

# other approach failed try against prev to hopefully save us a

830

# other approach failed try against prev to hopefully save us a

827

# fulltext.

831

# fulltext.

828

yield (prev,)

832

yield (prev,)

829

833

830

class deltacomputer(object):

834

class deltacomputer(object):

831

def __init__(self, revlog):

835

def __init__(self, revlog):

832

self.revlog = revlog

836

self.revlog = revlog

833

837

834

def buildtext(self, revinfo, fh):

838

def buildtext(self, revinfo, fh):

835

"""Builds a fulltext version of a revision

839

"""Builds a fulltext version of a revision

836

840

837

revinfo: _revisioninfo instance that contains all needed info

841

revinfo: _revisioninfo instance that contains all needed info

838

fh: file handle to either the .i or the .d revlog file,

842

fh: file handle to either the .i or the .d revlog file,

839

depending on whether it is inlined or not

843

depending on whether it is inlined or not

840

"""

844

"""

841

btext = revinfo.btext

845

btext = revinfo.btext

842

if btext[0] is not None:

846

if btext[0] is not None:

843

return btext[0]

847

return btext[0]

844

848

845

revlog = self.revlog

849

revlog = self.revlog

846

cachedelta = revinfo.cachedelta

850

cachedelta = revinfo.cachedelta

847

baserev = cachedelta[0]

851

baserev = cachedelta[0]

848

delta = cachedelta[1]

852

delta = cachedelta[1]

849

853

850

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

854

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

851

revinfo.p1, revinfo.p2,

855

revinfo.p1, revinfo.p2,

852

revinfo.flags, revinfo.node)

856

revinfo.flags, revinfo.node)

853

return fulltext

857

return fulltext

854

858

855

def _builddeltadiff(self, base, revinfo, fh):

859

def _builddeltadiff(self, base, revinfo, fh):

856

revlog = self.revlog

860

revlog = self.revlog

857

t = self.buildtext(revinfo, fh)

861

t = self.buildtext(revinfo, fh)

858

if revlog.iscensored(base):

862

if revlog.iscensored(base):

859

# deltas based on a censored revision must replace the

863

# deltas based on a censored revision must replace the

860

# full content in one patch, so delta works everywhere

864

# full content in one patch, so delta works everywhere

861

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

865

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

862

delta = header + t

866

delta = header + t

863

else:

867

else:

864

ptext = revlog.revision(base, _df=fh, raw=True)

868

ptext = revlog.revision(base, _df=fh, raw=True)

865

delta = mdiff.textdiff(ptext, t)

869

delta = mdiff.textdiff(ptext, t)

866

870

867

return delta

871

return delta

868

872

869

def _builddeltainfo(self, revinfo, base, fh):

873

def _builddeltainfo(self, revinfo, base, fh):

870

# can we use the cached delta?

874

# can we use the cached delta?

871

delta = None

875

delta = None

872

if revinfo.cachedelta:

876

if revinfo.cachedelta:

873

cachebase, cachediff = revinfo.cachedelta

877

cachebase, cachediff = revinfo.cachedelta

874

#check if the diff still apply

878

#check if the diff still apply

875

currentbase = cachebase

879

currentbase = cachebase

876

while (currentbase != nullrev

880

while (currentbase != nullrev

877

and currentbase != base

881

and currentbase != base

878

and self.revlog.length(currentbase) == 0):

882

and self.revlog.length(currentbase) == 0):

879

currentbase = self.revlog.deltaparent(currentbase)

883

currentbase = self.revlog.deltaparent(currentbase)

880

if currentbase == base:

884

if currentbase == base:

881

delta = revinfo.cachedelta[1]

885

delta = revinfo.cachedelta[1]

882

if delta is None:

886

if delta is None:

883

delta = self._builddeltadiff(base, revinfo, fh)

887

delta = self._builddeltadiff(base, revinfo, fh)

884

revlog = self.revlog

888

revlog = self.revlog

885

header, data = revlog.compress(delta)

889

header, data = revlog.compress(delta)

886

deltalen = len(header) + len(data)

890

deltalen = len(header) + len(data)

887

chainbase = revlog.chainbase(base)

891

chainbase = revlog.chainbase(base)

888

offset = revlog.end(len(revlog) - 1)

892

offset = revlog.end(len(revlog) - 1)

889

dist = deltalen + offset - revlog.start(chainbase)

893

dist = deltalen + offset - revlog.start(chainbase)

890

if revlog._generaldelta:

894

if revlog._generaldelta:

891

deltabase = base

895

deltabase = base

892

else:

896

else:

893

deltabase = chainbase

897

deltabase = chainbase

894

chainlen, compresseddeltalen = revlog._chaininfo(base)

898

chainlen, compresseddeltalen = revlog._chaininfo(base)

895

chainlen += 1

899

chainlen += 1

896

compresseddeltalen += deltalen

900

compresseddeltalen += deltalen

897

901

898

revlog = self.revlog

902

revlog = self.revlog

899

snapshotdepth = None

903

snapshotdepth = None

900

if deltabase == nullrev:

904

if deltabase == nullrev:

901

snapshotdepth = 0

905

snapshotdepth = 0

902

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

906

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

903

# A delta chain should always be one full snapshot,

907

# A delta chain should always be one full snapshot,

904

# zero or more semi-snapshots, and zero or more deltas

908

# zero or more semi-snapshots, and zero or more deltas

905

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

909

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

906

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

910

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

907

snapshotdepth = len(revlog._deltachain(deltabase)[0])

911

snapshotdepth = len(revlog._deltachain(deltabase)[0])

908

912

909

return _deltainfo(dist, deltalen, (header, data), deltabase,

913

return _deltainfo(dist, deltalen, (header, data), deltabase,

910

chainbase, chainlen, compresseddeltalen,

914

chainbase, chainlen, compresseddeltalen,

911

snapshotdepth)

915

snapshotdepth)

912

916

913

def _fullsnapshotinfo(self, fh, revinfo):

917

def _fullsnapshotinfo(self, fh, revinfo):

914

curr = len(self.revlog)

918

curr = len(self.revlog)

915

rawtext = self.buildtext(revinfo, fh)

919

rawtext = self.buildtext(revinfo, fh)

916

data = self.revlog.compress(rawtext)

920

data = self.revlog.compress(rawtext)

917

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

921

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

918

deltabase = chainbase = curr

922

deltabase = chainbase = curr

919

snapshotdepth = 0

923

snapshotdepth = 0

920

chainlen = 1

924

chainlen = 1

921

925

922

return _deltainfo(dist, deltalen, data, deltabase,

926

return _deltainfo(dist, deltalen, data, deltabase,

923

chainbase, chainlen, compresseddeltalen,

927

chainbase, chainlen, compresseddeltalen,

924

snapshotdepth)

928

snapshotdepth)

925

929

926

def finddeltainfo(self, revinfo, fh):

930

def finddeltainfo(self, revinfo, fh):

927

"""Find an acceptable delta against a candidate revision

931

"""Find an acceptable delta against a candidate revision

928

932

929

revinfo: information about the revision (instance of _revisioninfo)

933

revinfo: information about the revision (instance of _revisioninfo)

930

fh: file handle to either the .i or the .d revlog file,

934

fh: file handle to either the .i or the .d revlog file,

931

depending on whether it is inlined or not

935

depending on whether it is inlined or not

932

936

933

Returns the first acceptable candidate revision, as ordered by

937

Returns the first acceptable candidate revision, as ordered by

934

_candidategroups

938

_candidategroups

935

939

936

If no suitable deltabase is found, we return delta info for a full

940

If no suitable deltabase is found, we return delta info for a full

937

snapshot.

941

snapshot.

938

"""

942

"""

939

if not revinfo.textlen:

943

if not revinfo.textlen:

940

return self._fullsnapshotinfo(fh, revinfo)

944

return self._fullsnapshotinfo(fh, revinfo)

941

945

942

# no delta for flag processor revision (see "candelta" for why)

946

# no delta for flag processor revision (see "candelta" for why)

943

# not calling candelta since only one revision needs test, also to

947

# not calling candelta since only one revision needs test, also to

944

# avoid overhead fetching flags again.

948

# avoid overhead fetching flags again.

945

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

949

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

946

return self._fullsnapshotinfo(fh, revinfo)

950

return self._fullsnapshotinfo(fh, revinfo)

947

951

948

cachedelta = revinfo.cachedelta

952

cachedelta = revinfo.cachedelta

949

p1 = revinfo.p1

953

p1 = revinfo.p1

950

p2 = revinfo.p2

954

p2 = revinfo.p2

951

revlog = self.revlog

955

revlog = self.revlog

952

956

953

deltainfo = None

957

deltainfo = None

954

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

958

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

955

groups = _candidategroups(self.revlog, revinfo.textlen,

959

groups = _candidategroups(self.revlog, revinfo.textlen,

956

p1r, p2r, cachedelta)

960

p1r, p2r, cachedelta)

957

candidaterevs = next(groups)

961

candidaterevs = next(groups)

958

while candidaterevs is not None:

962

while candidaterevs is not None:

959

nominateddeltas = []

963

nominateddeltas = []

960

if deltainfo is not None:

964

if deltainfo is not None:

961

# if we already found a good delta,

965

# if we already found a good delta,

962

# challenge it against refined candidates

966

# challenge it against refined candidates

963

nominateddeltas.append(deltainfo)

967

nominateddeltas.append(deltainfo)

964

for candidaterev in candidaterevs:

968

for candidaterev in candidaterevs:

965

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

969

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

966

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

970

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

967

nominateddeltas.append(candidatedelta)

971

nominateddeltas.append(candidatedelta)

968

if nominateddeltas:

972

if nominateddeltas:

969

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

973

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

970

if deltainfo is not None:

974

if deltainfo is not None:

971

candidaterevs = groups.send(deltainfo.base)

975

candidaterevs = groups.send(deltainfo.base)

972

else:

976

else:

973

candidaterevs = next(groups)

977

candidaterevs = next(groups)

974

978

975

if deltainfo is None:

979

if deltainfo is None:

976

deltainfo = self._fullsnapshotinfo(fh, revinfo)

980

deltainfo = self._fullsnapshotinfo(fh, revinfo)

977

return deltainfo

981

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
+                    self.index = None
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
-                for chunk in _slicechunktodensity(revlog, revs,
+                densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
-                                                  revlog._srdensitythreshold,
+                if densityslicing is None:
-                                                  revlog._srmingapsize):
+                    densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
+                for chunk in densityslicing(revs,
+                                            revlog._srdensitythreshold,
+                                            revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo