upstream/mercurial-mirror Commit - r52224:7455cae6

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

18

from .constants import (

18

from .constants import (

19

COMP_MODE_DEFAULT,

19

COMP_MODE_DEFAULT,

20

COMP_MODE_INLINE,

20

COMP_MODE_INLINE,

21

COMP_MODE_PLAIN,

21

COMP_MODE_PLAIN,

22

DELTA_BASE_REUSE_FORCE,

22

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

from .. import revlog

50

from .. import revlog

51

52

self._data = data

52

self._data = data

53

self.data_config = revlog.DataConfig()

53

self.data_config = revlog.DataConfig()

54

self.data_config.sr_density_threshold = density

54

self.data_config.sr_density_threshold = density

55

self.data_config.sr_min_gap_size = mingap

55

self.data_config.sr_min_gap_size = mingap

56

self.delta_config = revlog.DeltaConfig()

56

self.delta_config = revlog.DeltaConfig()

57

self.feature_config = revlog.FeatureConfig()

57

self.feature_config = revlog.FeatureConfig()

58

self._snapshot = set(snapshot)

58

self._snapshot = set(snapshot)

59

self.index = None

59

self.index = None

60

61

def start(self, rev):

61

def start(self, rev):

62

if rev == nullrev:

62

if rev == nullrev:

63

return 0

63

return 0

64

if rev == 0:

64

if rev == 0:

65

return 0

65

return 0

66

return self._data[rev - 1]

66

return self._data[rev - 1]

67

68

def end(self, rev):

68

def end(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return 0

70

return 0

71

return self._data[rev]

71

return self._data[rev]

72

73

def length(self, rev):

73

def length(self, rev):

74

return self.end(rev) - self.start(rev)

74

return self.end(rev) - self.start(rev)

75

76

def __len__(self):

76

def __len__(self):

77

return len(self._data)

77

return len(self._data)

78

79

def issnapshot(self, rev):

79

def issnapshot(self, rev):

80

if rev == nullrev:

80

if rev == nullrev:

81

return True

81

return True

82

return rev in self._snapshot

82

return rev in self._snapshot

83

84

85

def slicechunk(revlog, revs, targetsize=None):

85

def slicechunk(revlog, revs, targetsize=None):

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

87

88

``revs`` is sliced into groups that should be read in one time.

88

``revs`` is sliced into groups that should be read in one time.

89

Assume that revs are sorted.

89

Assume that revs are sorted.

90

91

The initial chunk is sliced until the overall density (payload/chunks-span

91

The initial chunk is sliced until the overall density (payload/chunks-span

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

93

than `revlog.data_config.sr_min_gap_size` is skipped.

93

than `revlog.data_config.sr_min_gap_size` is skipped.

94

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

96

For consistency with other slicing choice, this limit won't go lower than

96

For consistency with other slicing choice, this limit won't go lower than

97

`revlog.data_config.sr_min_gap_size`.

97

`revlog.data_config.sr_min_gap_size`.

98

99

If individual revisions chunk are larger than this limit, they will still

99

If individual revisions chunk are larger than this limit, they will still

100

be raised individually.

100

be raised individually.

101

102

>>> data = [

102

>>> data = [

103

... 5, #00 (5)

103

... 5, #00 (5)

104

... 10, #01 (5)

104

... 10, #01 (5)

105

... 12, #02 (2)

105

... 12, #02 (2)

106

... 12, #03 (empty)

106

... 12, #03 (empty)

107

... 27, #04 (15)

107

... 27, #04 (15)

108

... 31, #05 (4)

108

... 31, #05 (4)

109

... 31, #06 (empty)

109

... 31, #06 (empty)

110

... 42, #07 (11)

110

... 42, #07 (11)

111

... 47, #08 (5)

111

... 47, #08 (5)

112

... 47, #09 (empty)

112

... 47, #09 (empty)

113

... 48, #10 (1)

113

... 48, #10 (1)

114

... 51, #11 (3)

114

... 51, #11 (3)

115

... 74, #12 (23)

115

... 74, #12 (23)

116

... 85, #13 (11)

116

... 85, #13 (11)

117

... 86, #14 (1)

117

... 86, #14 (1)

118

... 91, #15 (5)

118

... 91, #15 (5)

119

... ]

119

... ]

120

>>> revlog = _testrevlog(data, snapshot=range(16))

120

>>> revlog = _testrevlog(data, snapshot=range(16))

121

122

>>> list(slicechunk(revlog, list(range(16))))

122

>>> list(slicechunk(revlog, list(range(16))))

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

124

>>> list(slicechunk(revlog, [0, 15]))

124

>>> list(slicechunk(revlog, [0, 15]))

125

[[0], [15]]

125

[[0], [15]]

126

>>> list(slicechunk(revlog, [0, 11, 15]))

126

>>> list(slicechunk(revlog, [0, 11, 15]))

127

[[0], [11], [15]]

127

[[0], [11], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

129

[[0], [11, 13, 15]]

129

[[0], [11, 13, 15]]

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

131

[[1, 2], [5, 8, 10, 11], [14]]

131

[[1, 2], [5, 8, 10, 11], [14]]

132

133

Slicing with a maximum chunk size

133

Slicing with a maximum chunk size

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

135

[[0], [11], [13], [15]]

135

[[0], [11], [13], [15]]

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

137

[[0], [11], [13, 15]]

137

[[0], [11], [13, 15]]

138

139

Slicing involving nullrev

139

Slicing involving nullrev

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

141

[[-1, 0], [11], [13, 15]]

141

[[-1, 0], [11], [13, 15]]

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

143

[[-1], [13], [15]]

143

[[-1], [13], [15]]

144

"""

144

"""

145

if targetsize is not None:

145

if targetsize is not None:

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

147

# targetsize should not be specified when evaluating delta candidates:

147

# targetsize should not be specified when evaluating delta candidates:

148

# * targetsize is used to ensure we stay within specification when reading,

148

# * targetsize is used to ensure we stay within specification when reading,

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

150

if densityslicing is None:

150

if densityslicing is None:

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

152

for chunk in densityslicing(

152

for chunk in densityslicing(

153

revs,

153

revs,

154

revlog.data_config.sr_density_threshold,

154

revlog.data_config.sr_density_threshold,

155

revlog.data_config.sr_min_gap_size,

155

revlog.data_config.sr_min_gap_size,

156

):

156

):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

yield subchunk

158

yield subchunk

159

160

161

def _slicechunktosize(revlog, revs, targetsize=None):

161

def _slicechunktosize(revlog, revs, targetsize=None):

162

"""slice revs to match the target size

162

"""slice revs to match the target size

163

164

This is intended to be used on chunk that density slicing selected by that

164

This is intended to be used on chunk that density slicing selected by that

165

are still too large compared to the read garantee of revlog. This might

165

are still too large compared to the read garantee of revlog. This might

166

happens when "minimal gap size" interrupted the slicing or when chain are

166

happens when "minimal gap size" interrupted the slicing or when chain are

167

built in a way that create large blocks next to each other.

167

built in a way that create large blocks next to each other.

168

169

>>> data = [

169

>>> data = [

170

... 3, #0 (3)

170

... 3, #0 (3)

171

... 5, #1 (2)

171

... 5, #1 (2)

172

... 6, #2 (1)

172

... 6, #2 (1)

173

... 8, #3 (2)

173

... 8, #3 (2)

174

... 8, #4 (empty)

174

... 8, #4 (empty)

175

... 11, #5 (3)

175

... 11, #5 (3)

176

... 12, #6 (1)

176

... 12, #6 (1)

177

... 13, #7 (1)

177

... 13, #7 (1)

178

... 14, #8 (1)

178

... 14, #8 (1)

179

... ]

179

... ]

180

181

== All snapshots cases ==

181

== All snapshots cases ==

182

>>> revlog = _testrevlog(data, snapshot=range(9))

182

>>> revlog = _testrevlog(data, snapshot=range(9))

183

184

Cases where chunk is already small enough

184

Cases where chunk is already small enough

185

>>> list(_slicechunktosize(revlog, [0], 3))

185

>>> list(_slicechunktosize(revlog, [0], 3))

186

[[0]]

186

[[0]]

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

[[6, 7]]

188

[[6, 7]]

189

>>> list(_slicechunktosize(revlog, [0], None))

189

>>> list(_slicechunktosize(revlog, [0], None))

190

[[0]]

190

[[0]]

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

[[6, 7]]

192

[[6, 7]]

193

194

cases where we need actual slicing

194

cases where we need actual slicing

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

[[0], [1]]

196

[[0], [1]]

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

[[1], [3]]

198

[[1], [3]]

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

[[1, 2], [3]]

200

[[1, 2], [3]]

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

[[3], [5]]

202

[[3], [5]]

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

[[3], [5]]

204

[[3], [5]]

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

[[5], [6, 7, 8]]

206

[[5], [6, 7, 8]]

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

210

Case with too large individual chunk (must return valid chunk)

210

Case with too large individual chunk (must return valid chunk)

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

[[0], [1]]

212

[[0], [1]]

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

[[1], [3]]

214

[[1], [3]]

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

[[3], [5]]

216

[[3], [5]]

217

218

== No Snapshot cases ==

218

== No Snapshot cases ==

219

>>> revlog = _testrevlog(data)

219

>>> revlog = _testrevlog(data)

220

221

Cases where chunk is already small enough

221

Cases where chunk is already small enough

222

>>> list(_slicechunktosize(revlog, [0], 3))

222

>>> list(_slicechunktosize(revlog, [0], 3))

223

[[0]]

223

[[0]]

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

[[6, 7]]

225

[[6, 7]]

226

>>> list(_slicechunktosize(revlog, [0], None))

226

>>> list(_slicechunktosize(revlog, [0], None))

227

[[0]]

227

[[0]]

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

[[6, 7]]

229

[[6, 7]]

230

231

cases where we need actual slicing

231

cases where we need actual slicing

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

[[0], [1]]

233

[[0], [1]]

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

[[1], [3]]

235

[[1], [3]]

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

[[1], [2, 3]]

237

[[1], [2, 3]]

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

[[3], [5]]

239

[[3], [5]]

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

[[3], [4, 5]]

241

[[3], [4, 5]]

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

[[5], [6, 7, 8]]

243

[[5], [6, 7, 8]]

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

247

Case with too large individual chunk (must return valid chunk)

247

Case with too large individual chunk (must return valid chunk)

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

[[0], [1]]

249

[[0], [1]]

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

[[1], [3]]

251

[[1], [3]]

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

[[3], [5]]

253

[[3], [5]]

254

255

== mixed case ==

255

== mixed case ==

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

"""

259

"""

260

assert targetsize is None or 0 <= targetsize

260

assert targetsize is None or 0 <= targetsize

261

startdata = revlog.start(revs[0])

261

startdata = revlog.start(revs[0])

262

enddata = revlog.end(revs[-1])

262

enddata = revlog.end(revs[-1])

263

fullspan = enddata - startdata

263

fullspan = enddata - startdata

264

if targetsize is None or fullspan <= targetsize:

264

if targetsize is None or fullspan <= targetsize:

265

yield revs

265

yield revs

266

return

266

return

267

268

startrevidx = 0

268

startrevidx = 0

269

endrevidx = 1

269

endrevidx = 1

270

iterrevs = enumerate(revs)

270

iterrevs = enumerate(revs)

271

next(iterrevs) # skip first rev.

271

next(iterrevs) # skip first rev.

272

# first step: get snapshots out of the way

272

# first step: get snapshots out of the way

273

for idx, r in iterrevs:

273

for idx, r in iterrevs:

274

span = revlog.end(r) - startdata

274

span = revlog.end(r) - startdata

275

snapshot = revlog.issnapshot(r)

275

snapshot = revlog.issnapshot(r)

276

if span <= targetsize and snapshot:

276

if span <= targetsize and snapshot:

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

else:

278

else:

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

if chunk:

280

if chunk:

281

yield chunk

281

yield chunk

282

startrevidx = idx

282

startrevidx = idx

283

startdata = revlog.start(r)

283

startdata = revlog.start(r)

284

endrevidx = idx + 1

284

endrevidx = idx + 1

285

if not snapshot:

285

if not snapshot:

286

break

286

break

287

288

# for the others, we use binary slicing to quickly converge toward valid

288

# for the others, we use binary slicing to quickly converge toward valid

289

# chunks (otherwise, we might end up looking for start/end of many

289

# chunks (otherwise, we might end up looking for start/end of many

290

# revisions). This logic is not looking for the perfect slicing point, it

290

# revisions). This logic is not looking for the perfect slicing point, it

291

# focuses on quickly converging toward valid chunks.

291

# focuses on quickly converging toward valid chunks.

292

nbitem = len(revs)

292

nbitem = len(revs)

293

while (enddata - startdata) > targetsize:

293

while (enddata - startdata) > targetsize:

294

endrevidx = nbitem

294

endrevidx = nbitem

295

if nbitem - startrevidx <= 1:

295

if nbitem - startrevidx <= 1:

296

break # protect against individual chunk larger than limit

296

break # protect against individual chunk larger than limit

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

while span > targetsize:

299

while span > targetsize:

300

if endrevidx - startrevidx <= 1:

300

if endrevidx - startrevidx <= 1:

301

break # protect against individual chunk larger than limit

301

break # protect against individual chunk larger than limit

302

endrevidx -= (endrevidx - startrevidx) // 2

302

endrevidx -= (endrevidx - startrevidx) // 2

303

localenddata = revlog.end(revs[endrevidx - 1])

303

localenddata = revlog.end(revs[endrevidx - 1])

304

span = localenddata - startdata

304

span = localenddata - startdata

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

startrevidx = endrevidx

308

startrevidx = endrevidx

309

startdata = revlog.start(revs[startrevidx])

309

startdata = revlog.start(revs[startrevidx])

310

311

chunk = _trimchunk(revlog, revs, startrevidx)

311

chunk = _trimchunk(revlog, revs, startrevidx)

312

if chunk:

312

if chunk:

313

yield chunk

313

yield chunk

314

315

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

319

``revs`` is sliced into groups that should be read in one time.

319

``revs`` is sliced into groups that should be read in one time.

320

Assume that revs are sorted.

320

Assume that revs are sorted.

321

322

The initial chunk is sliced until the overall density (payload/chunks-span

322

The initial chunk is sliced until the overall density (payload/chunks-span

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

skipped.

324

skipped.

325

326

>>> revlog = _testrevlog([

326

>>> revlog = _testrevlog([

327

... 5, #00 (5)

327

... 5, #00 (5)

328

... 10, #01 (5)

328

... 10, #01 (5)

329

... 12, #02 (2)

329

... 12, #02 (2)

330

... 12, #03 (empty)

330

... 12, #03 (empty)

331

... 27, #04 (15)

331

... 27, #04 (15)

332

... 31, #05 (4)

332

... 31, #05 (4)

333

... 31, #06 (empty)

333

... 31, #06 (empty)

334

... 42, #07 (11)

334

... 42, #07 (11)

335

... 47, #08 (5)

335

... 47, #08 (5)

336

... 47, #09 (empty)

336

... 47, #09 (empty)

337

... 48, #10 (1)

337

... 48, #10 (1)

338

... 51, #11 (3)

338

... 51, #11 (3)

339

... 74, #12 (23)

339

... 74, #12 (23)

340

... 85, #13 (11)

340

... 85, #13 (11)

341

... 86, #14 (1)

341

... 86, #14 (1)

342

... 91, #15 (5)

342

... 91, #15 (5)

343

... ])

343

... ])

344

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

[[0], [15]]

348

[[0], [15]]

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

[[0], [11], [15]]

350

[[0], [11], [15]]

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

[[0], [11, 13, 15]]

352

[[0], [11, 13, 15]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

[[1, 2], [5, 8, 10, 11], [14]]

354

[[1, 2], [5, 8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... mingapsize=20))

356

... mingapsize=20))

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

... targetdensity=0.95))

359

... targetdensity=0.95))

360

[[1, 2], [5], [8, 10, 11], [14]]

360

[[1, 2], [5], [8, 10, 11], [14]]

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

... targetdensity=0.95, mingapsize=12))

362

... targetdensity=0.95, mingapsize=12))

363

[[1, 2], [5, 8, 10, 11], [14]]

363

[[1, 2], [5, 8, 10, 11], [14]]

364

"""

364

"""

365

start = revlog.start

365

start = revlog.start

366

length = revlog.length

366

length = revlog.length

367

368

if len(revs) <= 1:

368

if len(revs) <= 1:

369

yield revs

369

yield revs

370

return

370

return

371

372

deltachainspan = segmentspan(revlog, revs)

372

deltachainspan = segmentspan(revlog, revs)

373

374

if deltachainspan < mingapsize:

374

if deltachainspan < mingapsize:

375

yield revs

375

yield revs

376

return

376

return

377

378

readdata = deltachainspan

378

readdata = deltachainspan

379

chainpayload = sum(length(r) for r in revs)

379

chainpayload = sum(length(r) for r in revs)

380

381

if deltachainspan:

381

if deltachainspan:

382

density = chainpayload / float(deltachainspan)

382

density = chainpayload / float(deltachainspan)

383

else:

383

else:

384

density = 1.0

384

density = 1.0

385

386

if density >= targetdensity:

386

if density >= targetdensity:

387

yield revs

387

yield revs

388

return

388

return

389

390

# Store the gaps in a heap to have them sorted by decreasing size

390

# Store the gaps in a heap to have them sorted by decreasing size

391

gaps = []

391

gaps = []

392

prevend = None

392

prevend = None

393

for i, rev in enumerate(revs):

393

for i, rev in enumerate(revs):

394

revstart = start(rev)

394

revstart = start(rev)

395

revlen = length(rev)

395

revlen = length(rev)

396

397

# Skip empty revisions to form larger holes

397

# Skip empty revisions to form larger holes

398

if revlen == 0:

398

if revlen == 0:

399

continue

399

continue

400

401

if prevend is not None:

401

if prevend is not None:

402

gapsize = revstart - prevend

402

gapsize = revstart - prevend

403

# only consider holes that are large enough

403

# only consider holes that are large enough

404

if gapsize > mingapsize:

404

if gapsize > mingapsize:

405

gaps.append((gapsize, i))

405

gaps.append((gapsize, i))

406

407

prevend = revstart + revlen

407

prevend = revstart + revlen

408

# sort the gaps to pop them from largest to small

408

# sort the gaps to pop them from largest to small

409

gaps.sort()

409

gaps.sort()

410

411

# Collect the indices of the largest holes until the density is acceptable

411

# Collect the indices of the largest holes until the density is acceptable

412

selected = []

412

selected = []

413

while gaps and density < targetdensity:

413

while gaps and density < targetdensity:

414

gapsize, gapidx = gaps.pop()

414

gapsize, gapidx = gaps.pop()

415

416

selected.append(gapidx)

416

selected.append(gapidx)

417

418

# the gap sizes are stored as negatives to be sorted decreasingly

418

# the gap sizes are stored as negatives to be sorted decreasingly

419

# by the heap

419

# by the heap

420

readdata -= gapsize

420

readdata -= gapsize

421

if readdata > 0:

421

if readdata > 0:

422

density = chainpayload / float(readdata)

422

density = chainpayload / float(readdata)

423

else:

423

else:

424

density = 1.0

424

density = 1.0

425

selected.sort()

425

selected.sort()

426

427

# Cut the revs at collected indices

427

# Cut the revs at collected indices

428

previdx = 0

428

previdx = 0

429

for idx in selected:

429

for idx in selected:

430

431

chunk = _trimchunk(revlog, revs, previdx, idx)

431

chunk = _trimchunk(revlog, revs, previdx, idx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

previdx = idx

435

previdx = idx

436

437

chunk = _trimchunk(revlog, revs, previdx)

437

chunk = _trimchunk(revlog, revs, previdx)

438

if chunk:

438

if chunk:

439

yield chunk

439

yield chunk

440

441

442

def _trimchunk(revlog, revs, startidx, endidx=None):

442

def _trimchunk(revlog, revs, startidx, endidx=None):

443

"""returns revs[startidx:endidx] without empty trailing revs

443

"""returns revs[startidx:endidx] without empty trailing revs

444

445

Doctest Setup

445

Doctest Setup

446

>>> revlog = _testrevlog([

446

>>> revlog = _testrevlog([

447

... 5, #0

447

... 5, #0

448

... 10, #1

448

... 10, #1

449

... 12, #2

449

... 12, #2

450

... 12, #3 (empty)

450

... 12, #3 (empty)

451

... 17, #4

451

... 17, #4

452

... 21, #5

452

... 21, #5

453

... 21, #6 (empty)

453

... 21, #6 (empty)

454

... ])

454

... ])

455

456

Contiguous cases:

456

Contiguous cases:

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

[0, 1, 2, 3, 4, 5]

458

[0, 1, 2, 3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

[0, 1, 2, 3, 4]

460

[0, 1, 2, 3, 4]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

[0, 1, 2]

462

[0, 1, 2]

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

[2]

464

[2]

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

[3, 4, 5]

466

[3, 4, 5]

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

[3, 4]

468

[3, 4]

469

470

Discontiguous cases:

470

Discontiguous cases:

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

[1, 3, 5]

472

[1, 3, 5]

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

[1]

474

[1]

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

[3, 5]

476

[3, 5]

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

[3, 5]

478

[3, 5]

479

"""

479

"""

480

length = revlog.length

480

length = revlog.length

481

482

if endidx is None:

482

if endidx is None:

483

endidx = len(revs)

483

endidx = len(revs)

484

485

# If we have a non-emtpy delta candidate, there are nothing to trim

485

# If we have a non-emtpy delta candidate, there are nothing to trim

486

if revs[endidx - 1] < len(revlog):

486

if revs[endidx - 1] < len(revlog):

487

# Trim empty revs at the end, except the very first revision of a chain

487

# Trim empty revs at the end, except the very first revision of a chain

488

while (

488

while (

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

):

490

):

491

endidx -= 1

491

endidx -= 1

492

493

return revs[startidx:endidx]

493

return revs[startidx:endidx]

494

495

496

def segmentspan(revlog, revs):

496

def segmentspan(revlog, revs):

497

"""Get the byte span of a segment of revisions

497

"""Get the byte span of a segment of revisions

498

499

revs is a sorted array of revision numbers

499

revs is a sorted array of revision numbers

500

501

>>> revlog = _testrevlog([

501

>>> revlog = _testrevlog([

502

... 5, #0

502

... 5, #0

503

... 10, #1

503

... 10, #1

504

... 12, #2

504

... 12, #2

505

... 12, #3 (empty)

505

... 12, #3 (empty)

506

... 17, #4

506

... 17, #4

507

... ])

507

... ])

508

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

17

510

17

511

>>> segmentspan(revlog, [0, 4])

511

>>> segmentspan(revlog, [0, 4])

512

17

512

17

513

>>> segmentspan(revlog, [3, 4])

513

>>> segmentspan(revlog, [3, 4])

514

5

514

5

515

>>> segmentspan(revlog, [1, 2, 3,])

515

>>> segmentspan(revlog, [1, 2, 3,])

516

7

516

7

517

>>> segmentspan(revlog, [1, 3])

517

>>> segmentspan(revlog, [1, 3])

518

7

518

7

519

"""

519

"""

520

if not revs:

520

if not revs:

521

return 0

521

return 0

522

end = revlog.end(revs[-1])

522

end = revlog.end(revs[-1])

523

return end - revlog.start(revs[0])

523

return end - revlog.start(revs[0])

524

525

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

"""build full text from a (base, delta) pair and other metadata"""

527

"""build full text from a (base, delta) pair and other metadata"""

528

# special case deltas which replace entire base; no need to decode

528

# special case deltas which replace entire base; no need to decode

529

# base revision. this neatly avoids censored bases, which throw when

529

# base revision. this neatly avoids censored bases, which throw when

530

# they're decoded.

530

# they're decoded.

531

hlen = struct.calcsize(b">lll")

531

hlen = struct.calcsize(b">lll")

532

if delta[:hlen] == mdiff.replacediffheader(

532

if delta[:hlen] == mdiff.replacediffheader(

533

revlog.rawsize(baserev), len(delta) - hlen

533

revlog.rawsize(baserev), len(delta) - hlen

534

):

534

):

535

fulltext = delta[hlen:]

535

fulltext = delta[hlen:]

536

else:

536

else:

537

# deltabase is rawtext before changed by flag processors, which is

537

# deltabase is rawtext before changed by flag processors, which is

538

# equivalent to non-raw text

538

# equivalent to non-raw text

539

basetext = revlog.revision(baserev)

539

basetext = revlog.revision(baserev)

540

fulltext = mdiff.patch(basetext, delta)

540

fulltext = mdiff.patch(basetext, delta)

541

542

try:

542

try:

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

if validatehash:

544

if validatehash:

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

if flags & REVIDX_ISCENSORED:

546

if flags & REVIDX_ISCENSORED:

547

raise error.StorageError(

547

raise error.StorageError(

548

_(b'node %s is not censored') % expectednode

548

_(b'node %s is not censored') % expectednode

549

)

549

)

550

except error.CensoredNodeError:

550

except error.CensoredNodeError:

551

# must pass the censored index flag to add censored revisions

551

# must pass the censored index flag to add censored revisions

552

if not flags & REVIDX_ISCENSORED:

552

if not flags & REVIDX_ISCENSORED:

553

raise

553

raise

554

return fulltext

554

return fulltext

555

556

557

@attr.s(slots=True, frozen=True)

557

@attr.s(slots=True, frozen=True)

558

class _deltainfo:

558

class _deltainfo:

559

distance = attr.ib()

559

distance = attr.ib()

560

deltalen = attr.ib()

560

deltalen = attr.ib()

561

data = attr.ib()

561

data = attr.ib()

562

base = attr.ib()

562

base = attr.ib()

563

chainbase = attr.ib()

563

chainbase = attr.ib()

564

chainlen = attr.ib()

564

chainlen = attr.ib()

565

compresseddeltalen = attr.ib()

565

compresseddeltalen = attr.ib()

566

snapshotdepth = attr.ib()

566

snapshotdepth = attr.ib()

567

568

569

def drop_u_compression(delta):

569

def drop_u_compression(delta):

570

"""turn into a "u" (no-compression) into no-compression without header

570

"""turn into a "u" (no-compression) into no-compression without header

571

572

This is useful for revlog format that has better compression method.

572

This is useful for revlog format that has better compression method.

573

"""

573

"""

574

assert delta.data[0] == b'u', delta.data[0]

574

assert delta.data[0] == b'u', delta.data[0]

575

return _deltainfo(

575

return _deltainfo(

576

delta.distance,

576

delta.distance,

577

delta.deltalen - 1,

577

delta.deltalen - 1,

578

(b'', delta.data[1]),

578

(b'', delta.data[1]),

579

delta.base,

579

delta.base,

580

delta.chainbase,

580

delta.chainbase,

581

delta.chainlen,

581

delta.chainlen,

582

delta.compresseddeltalen,

582

delta.compresseddeltalen,

583

delta.snapshotdepth,

583

delta.snapshotdepth,

584

)

584

)

585

586

587

def is_good_delta_info(revlog, deltainfo, revinfo):

588

"""Returns True if the given delta is good. Good means that it is within

589

the disk span, disk size, and chain length bounds that we know to be

590

performant."""

591

if deltainfo is None:

592

return False

593

594

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

595

# we should never end up asking such question. Adding the assert as a

596

# safe-guard to detect anything that would be fishy in this regard.

597

assert (

598

revinfo.cachedelta is None

599

or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

600

or not revlog.delta_config.general_delta

601

)

602

603

# - 'deltainfo.distance' is the distance from the base revision --

604

# bounding it limits the amount of I/O we need to do.

605

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

606

# deltas we need to apply -- bounding it limits the amount of CPU

607

# we consume.

608

609

textlen = revinfo.textlen

610

defaultmax = textlen * 4

611

maxdist = revlog.delta_config.max_deltachain_span

612

if not maxdist:

613

maxdist = deltainfo.distance # ensure the conditional pass

614

maxdist = max(maxdist, defaultmax)

615

616

# Bad delta from read span:

617

#

618

# If the span of data read is larger than the maximum allowed.

619

#

620

# In the sparse-revlog case, we rely on the associated "sparse reading"

621

# to avoid issue related to the span of data. In theory, it would be

622

# possible to build pathological revlog where delta pattern would lead

623

# to too many reads. However, they do not happen in practice at all. So

624

# we skip the span check entirely.

625

if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:

626

return False

627

628

# Bad delta from new delta size:

629

#

630

# If the delta size is larger than the target text, storing the

631

# delta will be inefficient.

632

if textlen < deltainfo.deltalen:

633

return False

634

635

# Bad delta from cumulated payload size:

636

#

637

# If the sum of delta get larger than K * target text length.

638

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

639

return False

640

641

# Bad delta from chain length:

642

#

643

# If the number of delta in the chain gets too high.

644

if (

645

revlog.delta_config.max_chain_len

646

and revlog.delta_config.max_chain_len < deltainfo.chainlen

647

):

648

return False

649

650

# bad delta from intermediate snapshot size limit

651

#

652

# If an intermediate snapshot size is higher than the limit. The

653

# limit exist to prevent endless chain of intermediate delta to be

654

# created.

655

if (

656

deltainfo.snapshotdepth is not None

657

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

658

):

659

return False

660

661

# bad delta if new intermediate snapshot is larger than the previous

662

# snapshot

663

if (

664

deltainfo.snapshotdepth

665

and revlog.length(deltainfo.base) < deltainfo.deltalen

666

):

667

return False

668

669

return True

670

671

672

# If a revision's full text is that much bigger than a base candidate full

587

# If a revision's full text is that much bigger than a base candidate full

673

# text's, it is very unlikely that it will produce a valid delta. We no longer

588

# text's, it is very unlikely that it will produce a valid delta. We no longer

674

# consider these candidates.

589

# consider these candidates.

675

LIMIT_BASE2TEXT = 500

590

LIMIT_BASE2TEXT = 500

676

591

677

592

678

class _DeltaSearch:

593

class _DeltaSearch:

679

"""perform the search of a good delta for a single revlog revision

594

"""perform the search of a good delta for a single revlog revision

680

595

681

note: some of the deltacomputer.finddeltainfo logic should probably move

596

note: some of the deltacomputer.finddeltainfo logic should probably move

682

here.

597

here.

683

"""

598

"""

684

599

685

def __init__(

600

def __init__(

686

self,

601

self,

687

revlog,

602

revlog,

688

revinfo,

603

revinfo,

689

p1,

604

p1,

690

p2,

605

p2,

691

cachedelta,

606

cachedelta,

692

excluded_bases=None,

607

excluded_bases=None,

693

target_rev=None,

608

target_rev=None,

694

snapshot_cache=None,

609

snapshot_cache=None,

695

):

610

):

696

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

611

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

697

# so we should never end up asking such question. Adding the assert as

612

# so we should never end up asking such question. Adding the assert as

698

# a safe-guard to detect anything that would be fishy in this regard.

613

# a safe-guard to detect anything that would be fishy in this regard.

699

assert (

614

assert (

700

cachedelta is None

615

cachedelta is None

701

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

616

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

702

or not revlog.delta_config.general_delta

617

or not revlog.delta_config.general_delta

703

)

618

)

704

self.revlog = revlog

619

self.revlog = revlog

705

self.revinfo = revinfo

620

self.revinfo = revinfo

706

self.textlen = revinfo.textlen

621

self.textlen = revinfo.textlen

707

self.p1 = p1

622

self.p1 = p1

708

self.p2 = p2

623

self.p2 = p2

709

self.cachedelta = cachedelta

624

self.cachedelta = cachedelta

710

self.excluded_bases = excluded_bases

625

self.excluded_bases = excluded_bases

711

if target_rev is None:

626

if target_rev is None:

712

self.target_rev = len(self.revlog)

627

self.target_rev = len(self.revlog)

713

self.target_rev = target_rev

628

self.target_rev = target_rev

714

if snapshot_cache is None:

629

if snapshot_cache is None:

715

# map: base-rev: [snapshot-revs]

630

# map: base-rev: [snapshot-revs]

716

snapshot_cache = SnapshotCache()

631

snapshot_cache = SnapshotCache()

717

self.snapshot_cache = snapshot_cache

632

self.snapshot_cache = snapshot_cache

718

633

719

self.tested = {nullrev}

634

self.tested = {nullrev}

720

635

721

def candidate_groups(self):

636

def candidate_groups(self):

722

"""Provides group of revision to be tested as delta base

637

"""Provides group of revision to be tested as delta base

723

638

724

This top level function focus on emitting groups with unique and

639

This top level function focus on emitting groups with unique and

725

worthwhile content. See _raw_candidate_groups for details about the

640

worthwhile content. See _raw_candidate_groups for details about the

726

group order.

641

group order.

727

"""

642

"""

728

# should we try to build a delta?

643

# should we try to build a delta?

729

if not (len(self.revlog) and self.revlog._storedeltachains):

644

if not (len(self.revlog) and self.revlog._storedeltachains):

730

yield None

645

yield None

731

return

646

return

732

647

733

if not self.revlog.delta_config.general_delta:

648

if not self.revlog.delta_config.general_delta:

734

# before general delta, there is only one possible delta base

649

# before general delta, there is only one possible delta base

735

yield (self.target_rev - 1,)

650

yield (self.target_rev - 1,)

736

yield None

651

yield None

737

return

652

return

738

653

739

deltalength = self.revlog.length

654

deltalength = self.revlog.length

740

deltaparent = self.revlog.deltaparent

655

deltaparent = self.revlog.deltaparent

741

sparse = self.revlog.delta_config.sparse_revlog

656

sparse = self.revlog.delta_config.sparse_revlog

742

good = None

657

good = None

743

658

744

deltas_limit = self.textlen * LIMIT_DELTA2TEXT

659

deltas_limit = self.textlen * LIMIT_DELTA2TEXT

745

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

660

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

746

661

747

tested = self.tested # prefetch for speed and code compactness

662

tested = self.tested # prefetch for speed and code compactness

748

candidates = self._refined_groups()

663

candidates = self._refined_groups()

749

while True:

664

while True:

750

temptative = candidates.send(good)

665

temptative = candidates.send(good)

751

if temptative is None:

666

if temptative is None:

752

break

667

break

753

group = []

668

group = []

754

for rev in temptative:

669

for rev in temptative:

755

# skip over empty delta (no need to include them in a chain)

670

# skip over empty delta (no need to include them in a chain)

756

while not (rev == nullrev or rev in tested or deltalength(rev)):

671

while not (rev == nullrev or rev in tested or deltalength(rev)):

757

tested.add(rev)

672

tested.add(rev)

758

rev = deltaparent(rev)

673

rev = deltaparent(rev)

759

# no need to try a delta against nullrev, this will be done as

674

# no need to try a delta against nullrev, this will be done as

760

# a last resort.

675

# a last resort.

761

if rev == nullrev:

676

if rev == nullrev:

762

continue

677

continue

763

# filter out revision we tested already

678

# filter out revision we tested already

764

if rev in tested:

679

if rev in tested:

765

continue

680

continue

766

681

767

# an higher authority deamed the base unworthy (e.g. censored)

682

# an higher authority deamed the base unworthy (e.g. censored)

768

if (

683

if (

769

self.excluded_bases is not None

684

self.excluded_bases is not None

770

and rev in self.excluded_bases

685

and rev in self.excluded_bases

771

):

686

):

772

tested.add(rev)

687

tested.add(rev)

773

continue

688

continue

774

# We are in some recomputation cases and that rev is too high

689

# We are in some recomputation cases and that rev is too high

775

# in the revlog

690

# in the revlog

776

if self.target_rev is not None and rev >= self.target_rev:

691

if self.target_rev is not None and rev >= self.target_rev:

777

tested.add(rev)

692

tested.add(rev)

778

continue

693

continue

779

# filter out delta base that will never produce good delta

694

# filter out delta base that will never produce good delta

780

#

695

#

781

# if the delta of that base is already bigger than the limit

696

# if the delta of that base is already bigger than the limit

782

# for the delta chain size, doing a delta is hopeless.

697

# for the delta chain size, doing a delta is hopeless.

783

if deltas_limit < self.revlog.length(rev):

698

if deltas_limit < self.revlog.length(rev):

784

tested.add(rev)

699

tested.add(rev)

785

continue

700

continue

786

if sparse and self.revlog.rawsize(rev) < (

701

if sparse and self.revlog.rawsize(rev) < (

787

self.textlen // LIMIT_BASE2TEXT

702

self.textlen // LIMIT_BASE2TEXT

788

):

703

):

789

tested.add(rev)

704

tested.add(rev)

790

continue

705

continue

791

# no delta for rawtext-changing revs (see "candelta" for why)

706

# no delta for rawtext-changing revs (see "candelta" for why)

792

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

707

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

793

tested.add(rev)

708

tested.add(rev)

794

continue

709

continue

795

710

796

# If we reach here, we are about to build and test a delta.

711

# If we reach here, we are about to build and test a delta.

797

# The delta building process will compute the chaininfo in all

712

# The delta building process will compute the chaininfo in all

798

# case, since that computation is cached, it is fine to access

713

# case, since that computation is cached, it is fine to access

799

# it here too.

714

# it here too.

800

chainlen, chainsize = self.revlog._chaininfo(rev)

715

chainlen, chainsize = self.revlog._chaininfo(rev)

801

# if chain will be too long, skip base

716

# if chain will be too long, skip base

802

if (

717

if (

803

self.revlog.delta_config.max_chain_len

718

self.revlog.delta_config.max_chain_len

804

and chainlen >= self.revlog.delta_config.max_chain_len

719

and chainlen >= self.revlog.delta_config.max_chain_len

805

):

720

):

806

tested.add(rev)

721

tested.add(rev)

807

continue

722

continue

808

# if chain already have too much data, skip base

723

# if chain already have too much data, skip base

809

if deltas_limit < chainsize:

724

if deltas_limit < chainsize:

810

tested.add(rev)

725

tested.add(rev)

811

continue

726

continue

812

if (

727

if (

813

sparse

728

sparse

814

and self.revlog.delta_config.upper_bound_comp is not None

729

and self.revlog.delta_config.upper_bound_comp is not None

815

):

730

):

816

maxcomp = self.revlog.delta_config.upper_bound_comp

731

maxcomp = self.revlog.delta_config.upper_bound_comp

817

basenotsnap = (self.p1, self.p2, nullrev)

732

basenotsnap = (self.p1, self.p2, nullrev)

818

if rev not in basenotsnap and self.revlog.issnapshot(rev):

733

if rev not in basenotsnap and self.revlog.issnapshot(rev):

819

snapshotdepth = self.revlog.snapshotdepth(rev)

734

snapshotdepth = self.revlog.snapshotdepth(rev)

820

# If text is significantly larger than the base, we can

735

# If text is significantly larger than the base, we can

821

# expect the resulting delta to be proportional to the size

736

# expect the resulting delta to be proportional to the size

822

# difference

737

# difference

823

revsize = self.revlog.rawsize(rev)

738

revsize = self.revlog.rawsize(rev)

824

rawsizedistance = max(self.textlen - revsize, 0)

739

rawsizedistance = max(self.textlen - revsize, 0)

825

# use an estimate of the compression upper bound.

740

# use an estimate of the compression upper bound.

826

lowestrealisticdeltalen = rawsizedistance // maxcomp

741

lowestrealisticdeltalen = rawsizedistance // maxcomp

827

742

828

# check the absolute constraint on the delta size

743

# check the absolute constraint on the delta size

829

snapshotlimit = self.textlen >> snapshotdepth

744

snapshotlimit = self.textlen >> snapshotdepth

830

if snapshotlimit < lowestrealisticdeltalen:

745

if snapshotlimit < lowestrealisticdeltalen:

831

# delta lower bound is larger than accepted upper

746

# delta lower bound is larger than accepted upper

832

# bound

747

# bound

833

tested.add(rev)

748

tested.add(rev)

834

continue

749

continue

835

750

836

# check the relative constraint on the delta size

751

# check the relative constraint on the delta size

837

revlength = self.revlog.length(rev)

752

revlength = self.revlog.length(rev)

838

if revlength < lowestrealisticdeltalen:

753

if revlength < lowestrealisticdeltalen:

839

# delta probable lower bound is larger than target

754

# delta probable lower bound is larger than target

840

# base

755

# base

841

tested.add(rev)

756

tested.add(rev)

842

continue

757

continue

843

758

844

group.append(rev)

759

group.append(rev)

845

if group:

760

if group:

846

# When the size of the candidate group is big, it can result in

761

# When the size of the candidate group is big, it can result in

847

# a quite significant performance impact. To reduce this, we

762

# a quite significant performance impact. To reduce this, we

848

# can send them in smaller batches until the new batch does not

763

# can send them in smaller batches until the new batch does not

849

# provide any improvements.

764

# provide any improvements.

850

#

765

#

851

# This might reduce the overall efficiency of the compression

766

# This might reduce the overall efficiency of the compression

852

# in some corner cases, but that should also prevent very

767

# in some corner cases, but that should also prevent very

853

# pathological cases from being an issue. (eg. 20 000

768

# pathological cases from being an issue. (eg. 20 000

854

# candidates).

769

# candidates).

855

#

770

#

856

# XXX note that the ordering of the group becomes important as

771

# XXX note that the ordering of the group becomes important as

857

# it now impacts the final result. The current order is

772

# it now impacts the final result. The current order is

858

# unprocessed and can be improved.

773

# unprocessed and can be improved.

859

if group_chunk_size == 0:

774

if group_chunk_size == 0:

860

tested.update(group)

775

tested.update(group)

861

good = yield tuple(group)

776

good = yield tuple(group)

862

else:

777

else:

863

prev_good = good

778

prev_good = good

864

for start in range(0, len(group), group_chunk_size):

779

for start in range(0, len(group), group_chunk_size):

865

sub_group = group[start : start + group_chunk_size]

780

sub_group = group[start : start + group_chunk_size]

866

tested.update(sub_group)

781

tested.update(sub_group)

867

good = yield tuple(sub_group)

782

good = yield tuple(sub_group)

868

if prev_good == good:

783

if prev_good == good:

869

break

784

break

870

785

871

yield None

786

yield None

872

787

873

def _refined_groups(self):

788

def _refined_groups(self):

874

good = None

789

good = None

875

# First we try to reuse a the delta contained in the bundle. (or from

790

# First we try to reuse a the delta contained in the bundle. (or from

876

# the source revlog)

791

# the source revlog)

877

#

792

#

878

# This logic only applies to general delta repositories and can be

793

# This logic only applies to general delta repositories and can be

879

# disabled through configuration. Disabling reuse source delta is

794

# disabled through configuration. Disabling reuse source delta is

880

# useful when we want to make sure we recomputed "optimal" deltas.

795

# useful when we want to make sure we recomputed "optimal" deltas.

881

debug_info = None

796

debug_info = None

882

if (

797

if (

883

self.cachedelta is not None

798

self.cachedelta is not None

884

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

799

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

885

):

800

):

886

# Assume what we received from the server is a good choice

801

# Assume what we received from the server is a good choice

887

# build delta will reuse the cache

802

# build delta will reuse the cache

888

if debug_info is not None:

803

if debug_info is not None:

889

debug_info['cached-delta.tested'] += 1

804

debug_info['cached-delta.tested'] += 1

890

good = yield (self.cachedelta[0],)

805

good = yield (self.cachedelta[0],)

891

if good is not None:

806

if good is not None:

892

if debug_info is not None:

807

if debug_info is not None:

893

debug_info['cached-delta.accepted'] += 1

808

debug_info['cached-delta.accepted'] += 1

894

yield None

809

yield None

895

return

810

return

896

groups = self._raw_groups()

811

groups = self._raw_groups()

897

for candidates in groups:

812

for candidates in groups:

898

good = yield candidates

813

good = yield candidates

899

if good is not None:

814

if good is not None:

900

break

815

break

901

816

902

# If sparse revlog is enabled, we can try to refine the available

817

# If sparse revlog is enabled, we can try to refine the available

903

# deltas

818

# deltas

904

if not self.revlog.delta_config.sparse_revlog:

819

if not self.revlog.delta_config.sparse_revlog:

905

yield None

820

yield None

906

return

821

return

907

822

908

# if we have a refinable value, try to refine it

823

# if we have a refinable value, try to refine it

909

if (

824

if (

910

good is not None

825

good is not None

911

and good not in (self.p1, self.p2)

826

and good not in (self.p1, self.p2)

912

and self.revlog.issnapshot(good)

827

and self.revlog.issnapshot(good)

913

):

828

):

914

# refine snapshot down

829

# refine snapshot down

915

previous = None

830

previous = None

916

while previous != good:

831

while previous != good:

917

previous = good

832

previous = good

918

base = self.revlog.deltaparent(good)

833

base = self.revlog.deltaparent(good)

919

if base == nullrev:

834

if base == nullrev:

920

break

835

break

921

good = yield (base,)

836

good = yield (base,)

922

# refine snapshot up

837

# refine snapshot up

923

if not self.snapshot_cache.snapshots:

838

if not self.snapshot_cache.snapshots:

924

self.snapshot_cache.update(self.revlog, good + 1)

839

self.snapshot_cache.update(self.revlog, good + 1)

925

previous = None

840

previous = None

926

while good != previous:

841

while good != previous:

927

previous = good

842

previous = good

928

children = tuple(

843

children = tuple(

929

sorted(c for c in self.snapshot_cache.snapshots[good])

844

sorted(c for c in self.snapshot_cache.snapshots[good])

930

)

845

)

931

good = yield children

846

good = yield children

932

847

933

if debug_info is not None:

848

if debug_info is not None:

934

if good is None:

849

if good is None:

935

debug_info['no-solution'] += 1

850

debug_info['no-solution'] += 1

936

851

937

yield None

852

yield None

938

853

939

def _raw_groups(self):

854

def _raw_groups(self):

940

"""Provides group of revision to be tested as delta base

855

"""Provides group of revision to be tested as delta base

941

856

942

This lower level function focus on emitting delta theorically

857

This lower level function focus on emitting delta theorically

943

interresting without looking it any practical details.

858

interresting without looking it any practical details.

944

859

945

The group order aims at providing fast or small candidates first.

860

The group order aims at providing fast or small candidates first.

946

"""

861

"""

947

# Why search for delta base if we cannot use a delta base ?

862

# Why search for delta base if we cannot use a delta base ?

948

assert self.revlog.delta_config.general_delta

863

assert self.revlog.delta_config.general_delta

949

# also see issue6056

864

# also see issue6056

950

sparse = self.revlog.delta_config.sparse_revlog

865

sparse = self.revlog.delta_config.sparse_revlog

951

curr = len(self.revlog)

866

curr = len(self.revlog)

952

prev = curr - 1

867

prev = curr - 1

953

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

868

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

954

869

955

# exclude already lazy tested base if any

870

# exclude already lazy tested base if any

956

parents = [p for p in (self.p1, self.p2) if p != nullrev]

871

parents = [p for p in (self.p1, self.p2) if p != nullrev]

957

872

958

if (

873

if (

959

not self.revlog.delta_config.delta_both_parents

874

not self.revlog.delta_config.delta_both_parents

960

and len(parents) == 2

875

and len(parents) == 2

961

):

876

):

962

parents.sort()

877

parents.sort()

963

# To minimize the chance of having to build a fulltext,

878

# To minimize the chance of having to build a fulltext,

964

# pick first whichever parent is closest to us (max rev)

879

# pick first whichever parent is closest to us (max rev)

965

yield (parents[1],)

880

yield (parents[1],)

966

# then the other one (min rev) if the first did not fit

881

# then the other one (min rev) if the first did not fit

967

yield (parents[0],)

882

yield (parents[0],)

968

elif len(parents) > 0:

883

elif len(parents) > 0:

969

# Test all parents (1 or 2), and keep the best candidate

884

# Test all parents (1 or 2), and keep the best candidate

970

yield parents

885

yield parents

971

886

972

if sparse and parents:

887

if sparse and parents:

973

# See if we can use an existing snapshot in the parent chains to

888

# See if we can use an existing snapshot in the parent chains to

974

# use as a base for a new intermediate-snapshot

889

# use as a base for a new intermediate-snapshot

975

#

890

#

976

# search for snapshot in parents delta chain map: snapshot-level:

891

# search for snapshot in parents delta chain map: snapshot-level:

977

# snapshot-rev

892

# snapshot-rev

978

parents_snaps = collections.defaultdict(set)

893

parents_snaps = collections.defaultdict(set)

979

candidate_chains = [deltachain(p) for p in parents]

894

candidate_chains = [deltachain(p) for p in parents]

980

for chain in candidate_chains:

895

for chain in candidate_chains:

981

for idx, s in enumerate(chain):

896

for idx, s in enumerate(chain):

982

if not self.revlog.issnapshot(s):

897

if not self.revlog.issnapshot(s):

983

break

898

break

984

parents_snaps[idx].add(s)

899

parents_snaps[idx].add(s)

985

snapfloor = min(parents_snaps[0]) + 1

900

snapfloor = min(parents_snaps[0]) + 1

986

self.snapshot_cache.update(self.revlog, snapfloor)

901

self.snapshot_cache.update(self.revlog, snapfloor)

987

# search for the highest "unrelated" revision

902

# search for the highest "unrelated" revision

988

#

903

#

989

# Adding snapshots used by "unrelated" revision increase the odd we

904

# Adding snapshots used by "unrelated" revision increase the odd we

990

# reuse an independant, yet better snapshot chain.

905

# reuse an independant, yet better snapshot chain.

991

#

906

#

992

# XXX instead of building a set of revisions, we could lazily

907

# XXX instead of building a set of revisions, we could lazily

993

# enumerate over the chains. That would be more efficient, however

908

# enumerate over the chains. That would be more efficient, however

994

# we stick to simple code for now.

909

# we stick to simple code for now.

995

all_revs = set()

910

all_revs = set()

996

for chain in candidate_chains:

911

for chain in candidate_chains:

997

all_revs.update(chain)

912

all_revs.update(chain)

998

other = None

913

other = None

999

for r in self.revlog.revs(prev, snapfloor):

914

for r in self.revlog.revs(prev, snapfloor):

1000

if r not in all_revs:

915

if r not in all_revs:

1001

other = r

916

other = r

1002

break

917

break

1003

if other is not None:

918

if other is not None:

1004

# To avoid unfair competition, we won't use unrelated

919

# To avoid unfair competition, we won't use unrelated

1005

# intermediate snapshot that are deeper than the ones from the

920

# intermediate snapshot that are deeper than the ones from the

1006

# parent delta chain.

921

# parent delta chain.

1007

max_depth = max(parents_snaps.keys())

922

max_depth = max(parents_snaps.keys())

1008

chain = deltachain(other)

923

chain = deltachain(other)

1009

for depth, s in enumerate(chain):

924

for depth, s in enumerate(chain):

1010

if s < snapfloor:

925

if s < snapfloor:

1011

continue

926

continue

1012

if max_depth < depth:

927

if max_depth < depth:

1013

break

928

break

1014

if not self.revlog.issnapshot(s):

929

if not self.revlog.issnapshot(s):

1015

break

930

break

1016

parents_snaps[depth].add(s)

931

parents_snaps[depth].add(s)

1017

# Test them as possible intermediate snapshot base We test them

932

# Test them as possible intermediate snapshot base We test them

1018

# from highest to lowest level. High level one are more likely to

933

# from highest to lowest level. High level one are more likely to

1019

# result in small delta

934

# result in small delta

1020

floor = None

935

floor = None

1021

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

936

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

1022

siblings = set()

937

siblings = set()

1023

for s in snaps:

938

for s in snaps:

1024

siblings.update(self.snapshot_cache.snapshots[s])

939

siblings.update(self.snapshot_cache.snapshots[s])

1025

# Before considering making a new intermediate snapshot, we

940

# Before considering making a new intermediate snapshot, we

1026

# check if an existing snapshot, children of base we consider,

941

# check if an existing snapshot, children of base we consider,

1027

# would be suitable.

942

# would be suitable.

1028

#

943

#

1029

# It give a change to reuse a delta chain "unrelated" to the

944

# It give a change to reuse a delta chain "unrelated" to the

1030

# current revision instead of starting our own. Without such

945

# current revision instead of starting our own. Without such

1031

# re-use, topological branches would keep reopening new chains.

946

# re-use, topological branches would keep reopening new chains.

1032

# Creating more and more snapshot as the repository grow.

947

# Creating more and more snapshot as the repository grow.

1033

948

1034

if floor is not None:

949

if floor is not None:

1035

# We only do this for siblings created after the one in our

950

# We only do this for siblings created after the one in our

1036

# parent's delta chain. Those created before has less

951

# parent's delta chain. Those created before has less

1037

# chances to be valid base since our ancestors had to

952

# chances to be valid base since our ancestors had to

1038

# create a new snapshot.

953

# create a new snapshot.

1039

siblings = [r for r in siblings if floor < r]

954

siblings = [r for r in siblings if floor < r]

1040

yield tuple(sorted(siblings))

955

yield tuple(sorted(siblings))

1041

# then test the base from our parent's delta chain.

956

# then test the base from our parent's delta chain.

1042

yield tuple(sorted(snaps))

957

yield tuple(sorted(snaps))

1043

floor = min(snaps)

958

floor = min(snaps)

1044

# No suitable base found in the parent chain, search if any full

959

# No suitable base found in the parent chain, search if any full

1045

# snapshots emitted since parent's base would be a suitable base

960

# snapshots emitted since parent's base would be a suitable base

1046

# for an intermediate snapshot.

961

# for an intermediate snapshot.

1047

#

962

#

1048

# It give a chance to reuse a delta chain unrelated to the current

963

# It give a chance to reuse a delta chain unrelated to the current

1049

# revisions instead of starting our own. Without such re-use,

964

# revisions instead of starting our own. Without such re-use,

1050

# topological branches would keep reopening new full chains.

965

# topological branches would keep reopening new full chains.

1051

# Creating more and more snapshot as the repository grow.

966

# Creating more and more snapshot as the repository grow.

1052

full = [

967

full = [

1053

r

968

r

1054

for r in self.snapshot_cache.snapshots[nullrev]

969

for r in self.snapshot_cache.snapshots[nullrev]

1055

if snapfloor <= r

970

if snapfloor <= r

1056

]

971

]

1057

yield tuple(sorted(full))

972

yield tuple(sorted(full))

1058

973

1059

if not sparse:

974

if not sparse:

1060

# other approach failed try against prev to hopefully save us a

975

# other approach failed try against prev to hopefully save us a

1061

# fulltext.

976

# fulltext.

1062

yield (prev,)

977

yield (prev,)

1063

978

979

def is_good_delta_info(self, deltainfo):

980

"""Returns True if the given delta is good. Good means that it is

981

within the disk span, disk size, and chain length bounds that we know

982

to be performant."""

983

if deltainfo is None:

984

return False

985

986

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

987

# so we should never end up asking such question. Adding the assert as

988

# a safe-guard to detect anything that would be fishy in this regard.

989

assert (

990

self.revinfo.cachedelta is None

991

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

992

or not self.revlog.delta_config.general_delta

993

)

994

995

# - 'deltainfo.distance' is the distance from the base revision --

996

# bounding it limits the amount of I/O we need to do.

997

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

998

# deltas we need to apply -- bounding it limits the amount of CPU

999

# we consume.

1000

1001

textlen = self.revinfo.textlen

1002

defaultmax = textlen * 4

1003

maxdist = self.revlog.delta_config.max_deltachain_span

1004

if not maxdist:

1005

maxdist = deltainfo.distance # ensure the conditional pass

1006

maxdist = max(maxdist, defaultmax)

1007

1008

# Bad delta from read span:

1009

#

1010

# If the span of data read is larger than the maximum allowed.

1011

#

1012

# In the sparse-revlog case, we rely on the associated "sparse

1013

# reading" to avoid issue related to the span of data. In theory, it

1014

# would be possible to build pathological revlog where delta pattern

1015

# would lead to too many reads. However, they do not happen in

1016

# practice at all. So we skip the span check entirely.

1017

if (

1018

not self.revlog.delta_config.sparse_revlog

1019

and maxdist < deltainfo.distance

1020

):

1021

return False

1022

1023

# Bad delta from new delta size:

1024

#

1025

# If the delta size is larger than the target text, storing the delta

1026

# will be inefficient.

1027

if textlen < deltainfo.deltalen:

1028

return False

1029

1030

# Bad delta from cumulated payload size:

1031

#

1032

# If the sum of delta get larger than K * target text length.

1033

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

1034

return False

1035

1036

# Bad delta from chain length:

1037

#

1038

# If the number of delta in the chain gets too high.

1039

if (

1040

self.revlog.delta_config.max_chain_len

1041

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

1042

):

1043

return False

1044

1045

# bad delta from intermediate snapshot size limit

1046

#

1047

# If an intermediate snapshot size is higher than the limit. The

1048

# limit exist to prevent endless chain of intermediate delta to be

1049

# created.

1050

if (

1051

deltainfo.snapshotdepth is not None

1052

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

1053

):

1054

return False

1055

1056

# bad delta if new intermediate snapshot is larger than the previous

1057

# snapshot

1058

if (

1059

deltainfo.snapshotdepth

1060

and self.revlog.length(deltainfo.base) < deltainfo.deltalen

1061

):

1062

return False

1063

1064

return True

1065

1064

1066

1065

class SnapshotCache:

1067

class SnapshotCache:

1066

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1068

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1067

1069

1068

def __init__(self):

1070

def __init__(self):

1069

self.snapshots = collections.defaultdict(set)

1071

self.snapshots = collections.defaultdict(set)

1070

self._start_rev = None

1072

self._start_rev = None

1071

self._end_rev = None

1073

self._end_rev = None

1072

1074

1073

def update(self, revlog, start_rev=0):

1075

def update(self, revlog, start_rev=0):

1074

"""find snapshots from start_rev to tip"""

1076

"""find snapshots from start_rev to tip"""

1075

nb_revs = len(revlog)

1077

nb_revs = len(revlog)

1076

end_rev = nb_revs - 1

1078

end_rev = nb_revs - 1

1077

if start_rev > end_rev:

1079

if start_rev > end_rev:

1078

return # range is empty

1080

return # range is empty

1079

1081

1080

if self._start_rev is None:

1082

if self._start_rev is None:

1081

assert self._end_rev is None

1083

assert self._end_rev is None

1082

self._update(revlog, start_rev, end_rev)

1084

self._update(revlog, start_rev, end_rev)

1083

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1085

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1084

if start_rev < self._start_rev:

1086

if start_rev < self._start_rev:

1085

self._update(revlog, start_rev, self._start_rev - 1)

1087

self._update(revlog, start_rev, self._start_rev - 1)

1086

if self._end_rev < end_rev:

1088

if self._end_rev < end_rev:

1087

self._update(revlog, self._end_rev + 1, end_rev)

1089

self._update(revlog, self._end_rev + 1, end_rev)

1088

1090

1089

if self._start_rev is None:

1091

if self._start_rev is None:

1090

assert self._end_rev is None

1092

assert self._end_rev is None

1091

self._end_rev = end_rev

1093

self._end_rev = end_rev

1092

self._start_rev = start_rev

1094

self._start_rev = start_rev

1093

else:

1095

else:

1094

self._start_rev = min(self._start_rev, start_rev)

1096

self._start_rev = min(self._start_rev, start_rev)

1095

self._end_rev = max(self._end_rev, end_rev)

1097

self._end_rev = max(self._end_rev, end_rev)

1096

assert self._start_rev <= self._end_rev, (

1098

assert self._start_rev <= self._end_rev, (

1097

self._start_rev,

1099

self._start_rev,

1098

self._end_rev,

1100

self._end_rev,

1099

)

1101

)

1100

1102

1101

def _update(self, revlog, start_rev, end_rev):

1103

def _update(self, revlog, start_rev, end_rev):

1102

"""internal method that actually do update content"""

1104

"""internal method that actually do update content"""

1103

assert self._start_rev is None or (

1105

assert self._start_rev is None or (

1104

start_rev < self._start_rev or start_rev > self._end_rev

1106

start_rev < self._start_rev or start_rev > self._end_rev

1105

), (self._start_rev, self._end_rev, start_rev, end_rev)

1107

), (self._start_rev, self._end_rev, start_rev, end_rev)

1106

assert self._start_rev is None or (

1108

assert self._start_rev is None or (

1107

end_rev < self._start_rev or end_rev > self._end_rev

1109

end_rev < self._start_rev or end_rev > self._end_rev

1108

), (self._start_rev, self._end_rev, start_rev, end_rev)

1110

), (self._start_rev, self._end_rev, start_rev, end_rev)

1109

cache = self.snapshots

1111

cache = self.snapshots

1110

if hasattr(revlog.index, 'findsnapshots'):

1112

if hasattr(revlog.index, 'findsnapshots'):

1111

revlog.index.findsnapshots(cache, start_rev, end_rev)

1113

revlog.index.findsnapshots(cache, start_rev, end_rev)

1112

else:

1114

else:

1113

deltaparent = revlog.deltaparent

1115

deltaparent = revlog.deltaparent

1114

issnapshot = revlog.issnapshot

1116

issnapshot = revlog.issnapshot

1115

for rev in revlog.revs(start_rev, end_rev):

1117

for rev in revlog.revs(start_rev, end_rev):

1116

if issnapshot(rev):

1118

if issnapshot(rev):

1117

cache[deltaparent(rev)].add(rev)

1119

cache[deltaparent(rev)].add(rev)

1118

1120

1119

1121

1120

class deltacomputer:

1122

class deltacomputer:

1121

"""object capable of computing delta and finding delta for multiple revision

1123

"""object capable of computing delta and finding delta for multiple revision

1122

1124

1123

This object is meant to compute and find multiple delta applied to the same

1125

This object is meant to compute and find multiple delta applied to the same

1124

revlog.

1126

revlog.

1125

"""

1127

"""

1126

1128

1127

def __init__(

1129

def __init__(

1128

self,

1130

self,

1129

revlog,

1131

revlog,

1130

write_debug=None,

1132

write_debug=None,

1131

debug_search=False,

1133

debug_search=False,

1132

debug_info=None,

1134

debug_info=None,

1133

):

1135

):

1134

self.revlog = revlog

1136

self.revlog = revlog

1135

self._write_debug = write_debug

1137

self._write_debug = write_debug

1136

if write_debug is None:

1138

if write_debug is None:

1137

self._debug_search = False

1139

self._debug_search = False

1138

else:

1140

else:

1139

self._debug_search = debug_search

1141

self._debug_search = debug_search

1140

self._debug_info = debug_info

1142

self._debug_info = debug_info

1141

self._snapshot_cache = SnapshotCache()

1143

self._snapshot_cache = SnapshotCache()

1142

1144

1143

@property

1145

@property

1144

def _gather_debug(self):

1146

def _gather_debug(self):

1145

return self._write_debug is not None or self._debug_info is not None

1147

return self._write_debug is not None or self._debug_info is not None

1146

1148

1147

def buildtext(self, revinfo):

1149

def buildtext(self, revinfo):

1148

"""Builds a fulltext version of a revision

1150

"""Builds a fulltext version of a revision

1149

1151

1150

revinfo: revisioninfo instance that contains all needed info

1152

revinfo: revisioninfo instance that contains all needed info

1151

"""

1153

"""

1152

btext = revinfo.btext

1154

btext = revinfo.btext

1153

if btext[0] is not None:

1155

if btext[0] is not None:

1154

return btext[0]

1156

return btext[0]

1155

1157

1156

revlog = self.revlog

1158

revlog = self.revlog

1157

cachedelta = revinfo.cachedelta

1159

cachedelta = revinfo.cachedelta

1158

baserev = cachedelta[0]

1160

baserev = cachedelta[0]

1159

delta = cachedelta[1]

1161

delta = cachedelta[1]

1160

1162

1161

fulltext = btext[0] = _textfromdelta(

1163

fulltext = btext[0] = _textfromdelta(

1162

revlog,

1164

revlog,

1163

baserev,

1165

baserev,

1164

delta,

1166

delta,

1165

revinfo.p1,

1167

revinfo.p1,

1166

revinfo.p2,

1168

revinfo.p2,

1167

revinfo.flags,

1169

revinfo.flags,

1168

revinfo.node,

1170

revinfo.node,

1169

)

1171

)

1170

return fulltext

1172

return fulltext

1171

1173

1172

def _builddeltadiff(self, base, revinfo):

1174

def _builddeltadiff(self, base, revinfo):

1173

revlog = self.revlog

1175

revlog = self.revlog

1174

t = self.buildtext(revinfo)

1176

t = self.buildtext(revinfo)

1175

if revlog.iscensored(base):

1177

if revlog.iscensored(base):

1176

# deltas based on a censored revision must replace the

1178

# deltas based on a censored revision must replace the

1177

# full content in one patch, so delta works everywhere

1179

# full content in one patch, so delta works everywhere

1178

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1180

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1179

delta = header + t

1181

delta = header + t

1180

else:

1182

else:

1181

ptext = revlog.rawdata(base)

1183

ptext = revlog.rawdata(base)

1182

delta = mdiff.textdiff(ptext, t)

1184

delta = mdiff.textdiff(ptext, t)

1183

1185

1184

return delta

1186

return delta

1185

1187

1186

def _builddeltainfo(self, revinfo, base, target_rev=None):

1188

def _builddeltainfo(self, revinfo, base, target_rev=None):

1187

# can we use the cached delta?

1189

# can we use the cached delta?

1188

revlog = self.revlog

1190

revlog = self.revlog

1189

chainbase = revlog.chainbase(base)

1191

chainbase = revlog.chainbase(base)

1190

if revlog.delta_config.general_delta:

1192

if revlog.delta_config.general_delta:

1191

deltabase = base

1193

deltabase = base

1192

else:

1194

else:

1193

if target_rev is not None and base != target_rev - 1:

1195

if target_rev is not None and base != target_rev - 1:

1194

msg = (

1196

msg = (

1195

b'general delta cannot use delta for something else '

1197

b'general delta cannot use delta for something else '

1196

b'than `prev`: %d<-%d'

1198

b'than `prev`: %d<-%d'

1197

)

1199

)

1198

msg %= (base, target_rev)

1200

msg %= (base, target_rev)

1199

raise error.ProgrammingError(msg)

1201

raise error.ProgrammingError(msg)

1200

deltabase = chainbase

1202

deltabase = chainbase

1201

snapshotdepth = None

1203

snapshotdepth = None

1202

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1204

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1203

snapshotdepth = 0

1205

snapshotdepth = 0

1204

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1206

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1205

# A delta chain should always be one full snapshot,

1207

# A delta chain should always be one full snapshot,

1206

# zero or more semi-snapshots, and zero or more deltas

1208

# zero or more semi-snapshots, and zero or more deltas

1207

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1209

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1208

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1210

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1209

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1211

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1210

delta = None

1212

delta = None

1211

if revinfo.cachedelta:

1213

if revinfo.cachedelta:

1212

cachebase = revinfo.cachedelta[0]

1214

cachebase = revinfo.cachedelta[0]

1213

# check if the diff still apply

1215

# check if the diff still apply

1214

currentbase = cachebase

1216

currentbase = cachebase

1215

while (

1217

while (

1216

currentbase != nullrev

1218

currentbase != nullrev

1217

and currentbase != base

1219

and currentbase != base

1218

and self.revlog.length(currentbase) == 0

1220

and self.revlog.length(currentbase) == 0

1219

):

1221

):

1220

currentbase = self.revlog.deltaparent(currentbase)

1222

currentbase = self.revlog.deltaparent(currentbase)

1221

if self.revlog.delta_config.lazy_delta and currentbase == base:

1223

if self.revlog.delta_config.lazy_delta and currentbase == base:

1222

delta = revinfo.cachedelta[1]

1224

delta = revinfo.cachedelta[1]

1223

if delta is None:

1225

if delta is None:

1224

delta = self._builddeltadiff(base, revinfo)

1226

delta = self._builddeltadiff(base, revinfo)

1225

if self._debug_search:

1227

if self._debug_search:

1226

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1228

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1227

msg %= len(delta)

1229

msg %= len(delta)

1228

self._write_debug(msg)

1230

self._write_debug(msg)

1229

# snapshotdept need to be neither None nor 0 level snapshot

1231

# snapshotdept need to be neither None nor 0 level snapshot

1230

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1232

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1231

lowestrealisticdeltalen = (

1233

lowestrealisticdeltalen = (

1232

len(delta) // revlog.delta_config.upper_bound_comp

1234

len(delta) // revlog.delta_config.upper_bound_comp

1233

)

1235

)

1234

snapshotlimit = revinfo.textlen >> snapshotdepth

1236

snapshotlimit = revinfo.textlen >> snapshotdepth

1235

if self._debug_search:

1237

if self._debug_search:

1236

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1238

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1237

msg %= lowestrealisticdeltalen

1239

msg %= lowestrealisticdeltalen

1238

self._write_debug(msg)

1240

self._write_debug(msg)

1239

if snapshotlimit < lowestrealisticdeltalen:

1241

if snapshotlimit < lowestrealisticdeltalen:

1240

if self._debug_search:

1242

if self._debug_search:

1241

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1243

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1242

self._write_debug(msg)

1244

self._write_debug(msg)

1243

return None

1245

return None

1244

if revlog.length(base) < lowestrealisticdeltalen:

1246

if revlog.length(base) < lowestrealisticdeltalen:

1245

if self._debug_search:

1247

if self._debug_search:

1246

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1248

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1247

self._write_debug(msg)

1249

self._write_debug(msg)

1248

return None

1250

return None

1249

header, data = revlog._inner.compress(delta)

1251

header, data = revlog._inner.compress(delta)

1250

deltalen = len(header) + len(data)

1252

deltalen = len(header) + len(data)

1251

offset = revlog.end(len(revlog) - 1)

1253

offset = revlog.end(len(revlog) - 1)

1252

dist = deltalen + offset - revlog.start(chainbase)

1254

dist = deltalen + offset - revlog.start(chainbase)

1253

chainlen, compresseddeltalen = revlog._chaininfo(base)

1255

chainlen, compresseddeltalen = revlog._chaininfo(base)

1254

chainlen += 1

1256

chainlen += 1

1255

compresseddeltalen += deltalen

1257

compresseddeltalen += deltalen

1256

1258

1257

return _deltainfo(

1259

return _deltainfo(

1258

dist,

1260

dist,

1259

deltalen,

1261

deltalen,

1260

(header, data),

1262

(header, data),

1261

deltabase,

1263

deltabase,

1262

chainbase,

1264

chainbase,

1263

chainlen,

1265

chainlen,

1264

compresseddeltalen,

1266

compresseddeltalen,

1265

snapshotdepth,

1267

snapshotdepth,

1266

)

1268

)

1267

1269

1268

def _fullsnapshotinfo(self, revinfo, curr):

1270

def _fullsnapshotinfo(self, revinfo, curr):

1269

rawtext = self.buildtext(revinfo)

1271

rawtext = self.buildtext(revinfo)

1270

data = self.revlog._inner.compress(rawtext)

1272

data = self.revlog._inner.compress(rawtext)

1271

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1273

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1272

deltabase = chainbase = curr

1274

deltabase = chainbase = curr

1273

snapshotdepth = 0

1275

snapshotdepth = 0

1274

chainlen = 1

1276

chainlen = 1

1275

1277

1276

return _deltainfo(

1278

return _deltainfo(

1277

dist,

1279

dist,

1278

deltalen,

1280

deltalen,

1279

data,

1281

data,

1280

deltabase,

1282

deltabase,

1281

chainbase,

1283

chainbase,

1282

chainlen,

1284

chainlen,

1283

compresseddeltalen,

1285

compresseddeltalen,

1284

snapshotdepth,

1286

snapshotdepth,

1285

)

1287

)

1286

1288

1287

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1289

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1288

"""Find an acceptable delta against a candidate revision

1290

"""Find an acceptable delta against a candidate revision

1289

1291

1290

revinfo: information about the revision (instance of _revisioninfo)

1292

revinfo: information about the revision (instance of _revisioninfo)

1291

1293

1292

Returns the first acceptable candidate revision, as ordered by

1294

Returns the first acceptable candidate revision, as ordered by

1293

_candidategroups

1295

_candidategroups

1294

1296

1295

If no suitable deltabase is found, we return delta info for a full

1297

If no suitable deltabase is found, we return delta info for a full

1296

snapshot.

1298

snapshot.

1297

1299

1298

`excluded_bases` is an optional set of revision that cannot be used as

1300

`excluded_bases` is an optional set of revision that cannot be used as

1299

a delta base. Use this to recompute delta suitable in censor or strip

1301

a delta base. Use this to recompute delta suitable in censor or strip

1300

context.

1302

context.

1301

"""

1303

"""

1302

if target_rev is None:

1304

if target_rev is None:

1303

target_rev = len(self.revlog)

1305

target_rev = len(self.revlog)

1304

1306

1305

gather_debug = self._gather_debug

1307

gather_debug = self._gather_debug

1306

cachedelta = revinfo.cachedelta

1308

cachedelta = revinfo.cachedelta

1307

revlog = self.revlog

1309

revlog = self.revlog

1308

p1r = p2r = None

1310

p1r = p2r = None

1309

1311

1310

if excluded_bases is None:

1312

if excluded_bases is None:

1311

excluded_bases = set()

1313

excluded_bases = set()

1312

1314

1313

if gather_debug:

1315

if gather_debug:

1314

start = util.timer()

1316

start = util.timer()

1315

dbg = self._one_dbg_data()

1317

dbg = self._one_dbg_data()

1316

dbg['revision'] = target_rev

1318

dbg['revision'] = target_rev

1317

p1r = revlog.rev(revinfo.p1)

1319

p1r = revlog.rev(revinfo.p1)

1318

p2r = revlog.rev(revinfo.p2)

1320

p2r = revlog.rev(revinfo.p2)

1319

if p1r != nullrev:

1321

if p1r != nullrev:

1320

p1_chain_len = revlog._chaininfo(p1r)[0]

1322

p1_chain_len = revlog._chaininfo(p1r)[0]

1321

else:

1323

else:

1322

p1_chain_len = -1

1324

p1_chain_len = -1

1323

if p2r != nullrev:

1325

if p2r != nullrev:

1324

p2_chain_len = revlog._chaininfo(p2r)[0]

1326

p2_chain_len = revlog._chaininfo(p2r)[0]

1325

else:

1327

else:

1326

p2_chain_len = -1

1328

p2_chain_len = -1

1327

dbg['p1-chain-len'] = p1_chain_len

1329

dbg['p1-chain-len'] = p1_chain_len

1328

dbg['p2-chain-len'] = p2_chain_len

1330

dbg['p2-chain-len'] = p2_chain_len

1329

1331

1330

# 1) if the revision is empty, no amount of delta can beat it

1332

# 1) if the revision is empty, no amount of delta can beat it

1331

#

1333

#

1332

# 2) no delta for flag processor revision (see "candelta" for why)

1334

# 2) no delta for flag processor revision (see "candelta" for why)

1333

# not calling candelta since only one revision needs test, also to

1335

# not calling candelta since only one revision needs test, also to

1334

# avoid overhead fetching flags again.

1336

# avoid overhead fetching flags again.

1335

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1337

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1336

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1338

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1337

if gather_debug:

1339

if gather_debug:

1338

end = util.timer()

1340

end = util.timer()

1339

dbg['duration'] = end - start

1341

dbg['duration'] = end - start

1340

dbg[

1342

dbg[

1341

'delta-base'

1343

'delta-base'

1342

] = deltainfo.base # pytype: disable=attribute-error

1344

] = deltainfo.base # pytype: disable=attribute-error

1343

dbg['search_round_count'] = 0

1345

dbg['search_round_count'] = 0

1344

dbg['using-cached-base'] = False

1346

dbg['using-cached-base'] = False

1345

dbg['delta_try_count'] = 0

1347

dbg['delta_try_count'] = 0

1346

dbg['type'] = b"full"

1348

dbg['type'] = b"full"

1347

dbg['snapshot-depth'] = 0

1349

dbg['snapshot-depth'] = 0

1348

self._dbg_process_data(dbg)

1350

self._dbg_process_data(dbg)

1349

return deltainfo

1351

return deltainfo

1350

1352

1351

deltainfo = None

1353

deltainfo = None

1352

1354

1353

# If this source delta are to be forcibly reuse, let us comply early.

1355

# If this source delta are to be forcibly reuse, let us comply early.

1354

if (

1356

if (

1355

revlog.delta_config.general_delta

1357

revlog.delta_config.general_delta

1356

and revinfo.cachedelta is not None

1358

and revinfo.cachedelta is not None

1357

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1359

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1358

):

1360

):

1359

base = revinfo.cachedelta[0]

1361

base = revinfo.cachedelta[0]

1360

if base == nullrev:

1362

if base == nullrev:

1361

dbg_type = b"full"

1363

dbg_type = b"full"

1362

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1364

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1363

if gather_debug:

1365

if gather_debug:

1364

snapshotdepth = 0

1366

snapshotdepth = 0

1365

elif base not in excluded_bases:

1367

elif base not in excluded_bases:

1366

delta = revinfo.cachedelta[1]

1368

delta = revinfo.cachedelta[1]

1367

header, data = revlog.compress(delta)

1369

header, data = revlog.compress(delta)

1368

deltalen = len(header) + len(data)

1370

deltalen = len(header) + len(data)

1369

if gather_debug:

1371

if gather_debug:

1370

offset = revlog.end(len(revlog) - 1)

1372

offset = revlog.end(len(revlog) - 1)

1371

chainbase = revlog.chainbase(base)

1373

chainbase = revlog.chainbase(base)

1372

distance = deltalen + offset - revlog.start(chainbase)

1374

distance = deltalen + offset - revlog.start(chainbase)

1373

chainlen, compresseddeltalen = revlog._chaininfo(base)

1375

chainlen, compresseddeltalen = revlog._chaininfo(base)

1374

chainlen += 1

1376

chainlen += 1

1375

compresseddeltalen += deltalen

1377

compresseddeltalen += deltalen

1376

if base == p1r or base == p2r:

1378

if base == p1r or base == p2r:

1377

dbg_type = b"delta"

1379

dbg_type = b"delta"

1378

snapshotdepth = None

1380

snapshotdepth = None

1379

elif not revlog.issnapshot(base):

1381

elif not revlog.issnapshot(base):

1380

snapshotdepth = None

1382

snapshotdepth = None

1381

else:

1383

else:

1382

dbg_type = b"snapshot"

1384

dbg_type = b"snapshot"

1383

snapshotdepth = revlog.snapshotdepth(base) + 1

1385

snapshotdepth = revlog.snapshotdepth(base) + 1

1384

else:

1386

else:

1385

distance = None

1387

distance = None

1386

chainbase = None

1388

chainbase = None

1387

chainlen = None

1389

chainlen = None

1388

compresseddeltalen = None

1390

compresseddeltalen = None

1389

snapshotdepth = None

1391

snapshotdepth = None

1390

deltainfo = _deltainfo(

1392

deltainfo = _deltainfo(

1391

distance=distance,

1393

distance=distance,

1392

deltalen=deltalen,

1394

deltalen=deltalen,

1393

data=(header, data),

1395

data=(header, data),

1394

base=base,

1396

base=base,

1395

chainbase=chainbase,

1397

chainbase=chainbase,

1396

chainlen=chainlen,

1398

chainlen=chainlen,

1397

compresseddeltalen=compresseddeltalen,

1399

compresseddeltalen=compresseddeltalen,

1398

snapshotdepth=snapshotdepth,

1400

snapshotdepth=snapshotdepth,

1399

)

1401

)

1400

1402

1401

if deltainfo is not None:

1403

if deltainfo is not None:

1402

if gather_debug:

1404

if gather_debug:

1403

end = util.timer()

1405

end = util.timer()

1404

dbg['duration'] = end - start

1406

dbg['duration'] = end - start

1405

dbg[

1407

dbg[

1406

'delta-base'

1408

'delta-base'

1407

] = deltainfo.base # pytype: disable=attribute-error

1409

] = deltainfo.base # pytype: disable=attribute-error

1408

dbg['search_round_count'] = 0

1410

dbg['search_round_count'] = 0

1409

dbg['using-cached-base'] = True

1411

dbg['using-cached-base'] = True

1410

dbg['delta_try_count'] = 0

1412

dbg['delta_try_count'] = 0

1411

dbg['type'] = b"full"

1413

dbg['type'] = b"full"

1412

if snapshotdepth is None:

1414

if snapshotdepth is None:

1413

dbg['snapshot-depth'] = 0

1415

dbg['snapshot-depth'] = 0

1414

else:

1416

else:

1415

dbg['snapshot-depth'] = snapshotdepth

1417

dbg['snapshot-depth'] = snapshotdepth

1416

self._dbg_process_data(dbg)

1418

self._dbg_process_data(dbg)

1417

return deltainfo

1419

return deltainfo

1418

1420

1419

# count the number of different delta we tried (for debug purpose)

1421

# count the number of different delta we tried (for debug purpose)

1420

dbg_try_count = 0

1422

dbg_try_count = 0

1421

# count the number of "search round" we did. (for debug purpose)

1423

# count the number of "search round" we did. (for debug purpose)

1422

dbg_try_rounds = 0

1424

dbg_try_rounds = 0

1423

dbg_type = b'unknown'

1425

dbg_type = b'unknown'

1424

1426

1425

if p1r is None:

1427

if p1r is None:

1426

p1r = revlog.rev(revinfo.p1)

1428

p1r = revlog.rev(revinfo.p1)

1427

p2r = revlog.rev(revinfo.p2)

1429

p2r = revlog.rev(revinfo.p2)

1428

1430

1429

if self._debug_search:

1431

if self._debug_search:

1430

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1432

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1431

msg %= target_rev

1433

msg %= target_rev

1432

self._write_debug(msg)

1434

self._write_debug(msg)

1433

1435

1434

search = _DeltaSearch(

1436

search = _DeltaSearch(

1435

self.revlog,

1437

self.revlog,

1436

revinfo,

1438

revinfo,

1437

p1r,

1439

p1r,

1438

p2r,

1440

p2r,

1439

cachedelta,

1441

cachedelta,

1440

excluded_bases,

1442

excluded_bases,

1441

target_rev,

1443

target_rev,

1442

snapshot_cache=self._snapshot_cache,

1444

snapshot_cache=self._snapshot_cache,

1443

)

1445

)

1444

1446

1445

groups = search.candidate_groups()

1447

groups = search.candidate_groups()

1446

candidaterevs = next(groups)

1448

candidaterevs = next(groups)

1447

while candidaterevs is not None:

1449

while candidaterevs is not None:

1448

dbg_try_rounds += 1

1450

dbg_try_rounds += 1

1449

if self._debug_search:

1451

if self._debug_search:

1450

prev = None

1452

prev = None

1451

if deltainfo is not None:

1453

if deltainfo is not None:

1452

prev = deltainfo.base

1454

prev = deltainfo.base

1453

1455

1454

if (

1456

if (

1455

cachedelta is not None

1457

cachedelta is not None

1456

and len(candidaterevs) == 1

1458

and len(candidaterevs) == 1

1457

and cachedelta[0] in candidaterevs

1459

and cachedelta[0] in candidaterevs

1458

):

1460

):

1459

round_type = b"cached-delta"

1461

round_type = b"cached-delta"

1460

elif p1r in candidaterevs or p2r in candidaterevs:

1462

elif p1r in candidaterevs or p2r in candidaterevs:

1461

round_type = b"parents"

1463

round_type = b"parents"

1462

elif prev is not None and all(c < prev for c in candidaterevs):

1464

elif prev is not None and all(c < prev for c in candidaterevs):

1463

round_type = b"refine-down"

1465

round_type = b"refine-down"

1464

elif prev is not None and all(c > prev for c in candidaterevs):

1466

elif prev is not None and all(c > prev for c in candidaterevs):

1465

round_type = b"refine-up"

1467

round_type = b"refine-up"

1466

else:

1468

else:

1467

round_type = b"search-down"

1469

round_type = b"search-down"

1468

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1470

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1469

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1471

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1470

self._write_debug(msg)

1472

self._write_debug(msg)

1471

nominateddeltas = []

1473

nominateddeltas = []

1472

if deltainfo is not None:

1474

if deltainfo is not None:

1473

if self._debug_search:

1475

if self._debug_search:

1474

msg = (

1476

msg = (

1475

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1477

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1476

)

1478

)

1477

msg %= (deltainfo.base, deltainfo.deltalen)

1479

msg %= (deltainfo.base, deltainfo.deltalen)

1478

self._write_debug(msg)

1480

self._write_debug(msg)

1479

# if we already found a good delta,

1481

# if we already found a good delta,

1480

# challenge it against refined candidates

1482

# challenge it against refined candidates

1481

nominateddeltas.append(deltainfo)

1483

nominateddeltas.append(deltainfo)

1482

for candidaterev in candidaterevs:

1484

for candidaterev in candidaterevs:

1483

if self._debug_search:

1485

if self._debug_search:

1484

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1486

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1485

msg %= candidaterev

1487

msg %= candidaterev

1486

self._write_debug(msg)

1488

self._write_debug(msg)

1487

candidate_type = None

1489

candidate_type = None

1488

if candidaterev == p1r:

1490

if candidaterev == p1r:

1489

candidate_type = b"p1"

1491

candidate_type = b"p1"

1490

elif candidaterev == p2r:

1492

elif candidaterev == p2r:

1491

candidate_type = b"p2"

1493

candidate_type = b"p2"

1492

elif self.revlog.issnapshot(candidaterev):

1494

elif self.revlog.issnapshot(candidaterev):

1493

candidate_type = b"snapshot-%d"

1495

candidate_type = b"snapshot-%d"

1494

candidate_type %= self.revlog.snapshotdepth(

1496

candidate_type %= self.revlog.snapshotdepth(

1495

candidaterev

1497

candidaterev

1496

)

1498

)

1497

1499

1498

if candidate_type is not None:

1500

if candidate_type is not None:

1499

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1501

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1500

msg %= candidate_type

1502

msg %= candidate_type

1501

self._write_debug(msg)

1503

self._write_debug(msg)

1502

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1504

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1503

msg %= self.revlog.length(candidaterev)

1505

msg %= self.revlog.length(candidaterev)

1504

self._write_debug(msg)

1506

self._write_debug(msg)

1505

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1507

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1506

msg %= self.revlog.deltaparent(candidaterev)

1508

msg %= self.revlog.deltaparent(candidaterev)

1507

self._write_debug(msg)

1509

self._write_debug(msg)

1508

1510

1509

dbg_try_count += 1

1511

dbg_try_count += 1

1510

1512

1511

if self._debug_search:

1513

if self._debug_search:

1512

delta_start = util.timer()

1514

delta_start = util.timer()

1513

candidatedelta = self._builddeltainfo(

1515

candidatedelta = self._builddeltainfo(

1514

revinfo,

1516

revinfo,

1515

candidaterev,

1517

candidaterev,

1516

target_rev=target_rev,

1518

target_rev=target_rev,

1517

)

1519

)

1518

if self._debug_search:

1520

if self._debug_search:

1519

delta_end = util.timer()

1521

delta_end = util.timer()

1520

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1522

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1521

msg %= delta_end - delta_start

1523

msg %= delta_end - delta_start

1522

self._write_debug(msg)

1524

self._write_debug(msg)

1523

if candidatedelta is not None:

1525

if candidatedelta is not None:

1524

if is_good_delta_info(~~self~~.~~revlog~~, candidatedelta, ~~revinfo~~):

1526

if search.is_good_delta_info(candidatedelta):

1525

if self._debug_search:

1527

if self._debug_search:

1526

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1528

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1527

msg %= candidatedelta.deltalen

1529

msg %= candidatedelta.deltalen

1528

self._write_debug(msg)

1530

self._write_debug(msg)

1529

nominateddeltas.append(candidatedelta)

1531

nominateddeltas.append(candidatedelta)

1530

elif self._debug_search:

1532

elif self._debug_search:

1531

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1533

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1532

msg %= candidatedelta.deltalen

1534

msg %= candidatedelta.deltalen

1533

self._write_debug(msg)

1535

self._write_debug(msg)

1534

elif self._debug_search:

1536

elif self._debug_search:

1535

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1537

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1536

self._write_debug(msg)

1538

self._write_debug(msg)

1537

if nominateddeltas:

1539

if nominateddeltas:

1538

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1540

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1539

if deltainfo is not None:

1541

if deltainfo is not None:

1540

candidaterevs = groups.send(deltainfo.base)

1542

candidaterevs = groups.send(deltainfo.base)

1541

else:

1543

else:

1542

candidaterevs = next(groups)

1544

candidaterevs = next(groups)

1543

1545

1544

if deltainfo is None:

1546

if deltainfo is None:

1545

dbg_type = b"full"

1547

dbg_type = b"full"

1546

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1548

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1547

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1549

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1548

dbg_type = b"snapshot"

1550

dbg_type = b"snapshot"

1549

else:

1551

else:

1550

dbg_type = b"delta"

1552

dbg_type = b"delta"

1551

1553

1552

if gather_debug:

1554

if gather_debug:

1553

end = util.timer()

1555

end = util.timer()

1554

if dbg_type == b'full':

1556

if dbg_type == b'full':

1555

used_cached = (

1557

used_cached = (

1556

cachedelta is not None

1558

cachedelta is not None

1557

and dbg_try_rounds == 0

1559

and dbg_try_rounds == 0

1558

and dbg_try_count == 0

1560

and dbg_try_count == 0

1559

and cachedelta[0] == nullrev

1561

and cachedelta[0] == nullrev

1560

)

1562

)

1561

else:

1563

else:

1562

used_cached = (

1564

used_cached = (

1563

cachedelta is not None

1565

cachedelta is not None

1564

and dbg_try_rounds == 1

1566

and dbg_try_rounds == 1

1565

and dbg_try_count == 1

1567

and dbg_try_count == 1

1566

and deltainfo.base == cachedelta[0]

1568

and deltainfo.base == cachedelta[0]

1567

)

1569

)

1568

dbg['duration'] = end - start

1570

dbg['duration'] = end - start

1569

dbg[

1571

dbg[

1570

'delta-base'

1572

'delta-base'

1571

] = deltainfo.base # pytype: disable=attribute-error

1573

] = deltainfo.base # pytype: disable=attribute-error

1572

dbg['search_round_count'] = dbg_try_rounds

1574

dbg['search_round_count'] = dbg_try_rounds

1573

dbg['using-cached-base'] = used_cached

1575

dbg['using-cached-base'] = used_cached

1574

dbg['delta_try_count'] = dbg_try_count

1576

dbg['delta_try_count'] = dbg_try_count

1575

dbg['type'] = dbg_type

1577

dbg['type'] = dbg_type

1576

if (

1578

if (

1577

deltainfo.snapshotdepth # pytype: disable=attribute-error

1579

deltainfo.snapshotdepth # pytype: disable=attribute-error

1578

is not None

1580

is not None

1579

):

1581

):

1580

dbg[

1582

dbg[

1581

'snapshot-depth'

1583

'snapshot-depth'

1582

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1584

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1583

else:

1585

else:

1584

dbg['snapshot-depth'] = 0

1586

dbg['snapshot-depth'] = 0

1585

self._dbg_process_data(dbg)

1587

self._dbg_process_data(dbg)

1586

return deltainfo

1588

return deltainfo

1587

1589

1588

def _one_dbg_data(self):

1590

def _one_dbg_data(self):

1589

dbg = {

1591

dbg = {

1590

'duration': None,

1592

'duration': None,

1591

'revision': None,

1593

'revision': None,

1592

'delta-base': None,

1594

'delta-base': None,

1593

'search_round_count': None,

1595

'search_round_count': None,

1594

'using-cached-base': None,

1596

'using-cached-base': None,

1595

'delta_try_count': None,

1597

'delta_try_count': None,

1596

'type': None,

1598

'type': None,

1597

'p1-chain-len': None,

1599

'p1-chain-len': None,

1598

'p2-chain-len': None,

1600

'p2-chain-len': None,

1599

'snapshot-depth': None,

1601

'snapshot-depth': None,

1600

'target-revlog': None,

1602

'target-revlog': None,

1601

}

1603

}

1602

target_revlog = b"UNKNOWN"

1604

target_revlog = b"UNKNOWN"

1603

target_type = self.revlog.target[0]

1605

target_type = self.revlog.target[0]

1604

target_key = self.revlog.target[1]

1606

target_key = self.revlog.target[1]

1605

if target_type == KIND_CHANGELOG:

1607

if target_type == KIND_CHANGELOG:

1606

target_revlog = b'CHANGELOG:'

1608

target_revlog = b'CHANGELOG:'

1607

elif target_type == KIND_MANIFESTLOG:

1609

elif target_type == KIND_MANIFESTLOG:

1608

target_revlog = b'MANIFESTLOG:'

1610

target_revlog = b'MANIFESTLOG:'

1609

if target_key:

1611

if target_key:

1610

target_revlog += b'%s:' % target_key

1612

target_revlog += b'%s:' % target_key

1611

elif target_type == KIND_FILELOG:

1613

elif target_type == KIND_FILELOG:

1612

target_revlog = b'FILELOG:'

1614

target_revlog = b'FILELOG:'

1613

if target_key:

1615

if target_key:

1614

target_revlog += b'%s:' % target_key

1616

target_revlog += b'%s:' % target_key

1615

dbg['target-revlog'] = target_revlog

1617

dbg['target-revlog'] = target_revlog

1616

return dbg

1618

return dbg

1617

1619

1618

def _dbg_process_data(self, dbg):

1620

def _dbg_process_data(self, dbg):

1619

if self._debug_info is not None:

1621

if self._debug_info is not None:

1620

self._debug_info.append(dbg)

1622

self._debug_info.append(dbg)

1621

1623

1622

if self._write_debug is not None:

1624

if self._write_debug is not None:

1623

msg = (

1625

msg = (

1624

b"DBG-DELTAS:"

1626

b"DBG-DELTAS:"

1625

b" %-12s"

1627

b" %-12s"

1626

b" rev=%d:"

1628

b" rev=%d:"

1627

b" delta-base=%d"

1629

b" delta-base=%d"

1628

b" is-cached=%d"

1630

b" is-cached=%d"

1629

b" - search-rounds=%d"

1631

b" - search-rounds=%d"

1630

b" try-count=%d"

1632

b" try-count=%d"

1631

b" - delta-type=%-6s"

1633

b" - delta-type=%-6s"

1632

b" snap-depth=%d"

1634

b" snap-depth=%d"

1633

b" - p1-chain-length=%d"

1635

b" - p1-chain-length=%d"

1634

b" p2-chain-length=%d"

1636

b" p2-chain-length=%d"

1635

b" - duration=%f"

1637

b" - duration=%f"

1636

b"\n"

1638

b"\n"

1637

)

1639

)

1638

msg %= (

1640

msg %= (

1639

dbg["target-revlog"],

1641

dbg["target-revlog"],

1640

dbg["revision"],

1642

dbg["revision"],

1641

dbg["delta-base"],

1643

dbg["delta-base"],

1642

dbg["using-cached-base"],

1644

dbg["using-cached-base"],

1643

dbg["search_round_count"],

1645

dbg["search_round_count"],

1644

dbg["delta_try_count"],

1646

dbg["delta_try_count"],

1645

dbg["type"],

1647

dbg["type"],

1646

dbg["snapshot-depth"],

1648

dbg["snapshot-depth"],

1647

dbg["p1-chain-len"],

1649

dbg["p1-chain-len"],

1648

dbg["p2-chain-len"],

1650

dbg["p2-chain-len"],

1649

dbg["duration"],

1651

dbg["duration"],

1650

)

1652

)

1651

self._write_debug(msg)

1653

self._write_debug(msg)

1652

1654

1653

1655

1654

def delta_compression(default_compression_header, deltainfo):

1656

def delta_compression(default_compression_header, deltainfo):

1655

"""return (COMPRESSION_MODE, deltainfo)

1657

"""return (COMPRESSION_MODE, deltainfo)

1656

1658

1657

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1659

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1658

compression.

1660

compression.

1659

"""

1661

"""

1660

h, d = deltainfo.data

1662

h, d = deltainfo.data

1661

compression_mode = COMP_MODE_INLINE

1663

compression_mode = COMP_MODE_INLINE

1662

if not h and not d:

1664

if not h and not d:

1663

# not data to store at all... declare them uncompressed

1665

# not data to store at all... declare them uncompressed

1664

compression_mode = COMP_MODE_PLAIN

1666

compression_mode = COMP_MODE_PLAIN

1665

elif not h:

1667

elif not h:

1666

t = d[0:1]

1668

t = d[0:1]

1667

if t == b'\0':

1669

if t == b'\0':

1668

compression_mode = COMP_MODE_PLAIN

1670

compression_mode = COMP_MODE_PLAIN

1669

elif t == default_compression_header:

1671

elif t == default_compression_header:

1670

compression_mode = COMP_MODE_DEFAULT

1672

compression_mode = COMP_MODE_DEFAULT

1671

elif h == b'u':

1673

elif h == b'u':

1672

# we have a more efficient way to declare uncompressed

1674

# we have a more efficient way to declare uncompressed

1673

h = b''

1675

h = b''

1674

compression_mode = COMP_MODE_PLAIN

1676

compression_mode = COMP_MODE_PLAIN

1675

deltainfo = drop_u_compression(deltainfo)

1677

deltainfo = drop_u_compression(deltainfo)

1676

return compression_mode, deltainfo

1678

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
                 than `revlog.data_config.sr_min_gap_size` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog.data_config.sr_min_gap_size`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs,
                     revlog.data_config.sr_density_threshold,
                     revlog.data_config.sr_min_gap_size,
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
-            def is_good_delta_info(revlog, deltainfo, revinfo):
-                """Returns True if the given delta is good. Good means that it is within
-                the disk span, disk size, and chain length bounds that we know to be
-                performant."""
-                if deltainfo is None:
-                    return False
-                # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
-                # we should never end up asking such question. Adding the assert as a
-                # safe-guard to detect anything that would be fishy in this regard.
-                assert (
-                    revinfo.cachedelta is None
-                    or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
-                    or not revlog.delta_config.general_delta
-                # - 'deltainfo.distance' is the distance from the base revision --
-                #   bounding it limits the amount of I/O we need to do.
-                # - 'deltainfo.compresseddeltalen' is the sum of the total size of
-                #   deltas we need to apply -- bounding it limits the amount of CPU
-                #   we consume.
-                textlen = revinfo.textlen
-                defaultmax = textlen * 4
-                maxdist = revlog.delta_config.max_deltachain_span
-                if not maxdist:
-                    maxdist = deltainfo.distance  # ensure the conditional pass
-                maxdist = max(maxdist, defaultmax)
-                # Bad delta from read span:
-                #   If the span of data read is larger than the maximum allowed.
-                #   In the sparse-revlog case, we rely on the associated "sparse reading"
-                #   to avoid issue related to the span of data. In theory, it would be
-                #   possible to build pathological revlog where delta pattern would lead
-                #   to too many reads. However, they do not happen in practice at all. So
-                #   we skip the span check entirely.
-                if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
-                    return False
-                # Bad delta from new delta size:
-                #   If the delta size is larger than the target text, storing the
-                #   delta will be inefficient.
-                if textlen < deltainfo.deltalen:
-                    return False
-                # Bad delta from cumulated payload size:
-                #   If the sum of delta get larger than K * target text length.
-                if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
-                    return False
-                # Bad delta from chain length:
-                #   If the number of delta in the chain gets too high.
-                if (
-                    revlog.delta_config.max_chain_len
-                    and revlog.delta_config.max_chain_len < deltainfo.chainlen
-                ):
-                    return False
-                # bad delta from intermediate snapshot size limit
-                #   If an intermediate snapshot size is higher than the limit.  The
-                #   limit exist to prevent endless chain of intermediate delta to be
-                #   created.
-                if (
-                    deltainfo.snapshotdepth is not None
-                    and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
-                ):
-                    return False
-                # bad delta if new intermediate snapshot is larger than the previous
-                # snapshot
-                if (
-                    deltainfo.snapshotdepth
-                    and revlog.length(deltainfo.base) < deltainfo.deltalen
-                ):
-                    return False
-                return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             class _DeltaSearch:
                 """perform the search of a good delta for a single revlog revision
                 note: some of the deltacomputer.finddeltainfo logic should probably move
                 here.
                 """
                 def __init__(
                     self,
                     revlog,
                     revinfo,
                     p1,
                     p2,
                     cachedelta,
                     excluded_bases=None,
                     target_rev=None,
                     snapshot_cache=None,
                 ):
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         cachedelta is None
                         or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not revlog.delta_config.general_delta
                     )
                     self.revlog = revlog
                     self.revinfo = revinfo
                     self.textlen = revinfo.textlen
                     self.p1 = p1
                     self.p2 = p2
                     self.cachedelta = cachedelta
                     self.excluded_bases = excluded_bases
                     if target_rev is None:
                         self.target_rev = len(self.revlog)
                     self.target_rev = target_rev
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     self.snapshot_cache = snapshot_cache
                     self.tested = {nullrev}
                 def candidate_groups(self):
                     """Provides group of revision to be tested as delta base
                     This top level function focus on emitting groups with unique and
                     worthwhile content. See _raw_candidate_groups for details about the
                     group order.
                     """
                     # should we try to build a delta?
                     if not (len(self.revlog) and self.revlog._storedeltachains):
                         yield None
                         return
                     if not self.revlog.delta_config.general_delta:
                         # before general delta, there is only one possible delta base
                         yield (self.target_rev - 1,)
                         yield None
                         return
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     sparse = self.revlog.delta_config.sparse_revlog
                     good = None
                     deltas_limit = self.textlen * LIMIT_DELTA2TEXT
                     group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
                     tested = self.tested  # prefetch for speed and code compactness
                     candidates = self._refined_groups()
                     while True:
                         temptative = candidates.send(good)
                         if temptative is None:
                             break
                         group = []
                         for rev in temptative:
                             # skip over empty delta (no need to include them in a chain)
                             while not (rev == nullrev or rev in tested or deltalength(rev)):
                                 tested.add(rev)
                                 rev = deltaparent(rev)
                             # no need to try a delta against nullrev, this will be done as
                             # a last resort.
                             if rev == nullrev:
                                 continue
                             # filter out revision we tested already
                             if rev in tested:
                                 continue
                             # an higher authority deamed the base unworthy (e.g. censored)
                             if (
                                 self.excluded_bases is not None
                                 and rev in self.excluded_bases
                             ):
                                 tested.add(rev)
                                 continue
                             # We are in some recomputation cases and that rev is too high
                             # in the revlog
                             if self.target_rev is not None and rev >= self.target_rev:
                                 tested.add(rev)
                                 continue
                             # filter out delta base that will never produce good delta
                             #
                             # if the delta of that base is already bigger than the limit
                             # for the delta chain size, doing a delta is hopeless.
                             if deltas_limit < self.revlog.length(rev):
                                 tested.add(rev)
                                 continue
                             if sparse and self.revlog.rawsize(rev) < (
                                 self.textlen // LIMIT_BASE2TEXT
                             ):
                                 tested.add(rev)
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 tested.add(rev)
                                 continue
                             # If we reach here, we are about to build and test a delta.
                             # The delta building process will compute the chaininfo in all
                             # case, since that computation is cached, it is fine to access
                             # it here too.
                             chainlen, chainsize = self.revlog._chaininfo(rev)
                             # if chain will be too long, skip base
                             if (
                                 self.revlog.delta_config.max_chain_len
                                 and chainlen >= self.revlog.delta_config.max_chain_len
                             ):
                                 tested.add(rev)
                                 continue
                             # if chain already have too much data, skip base
                             if deltas_limit < chainsize:
                                 tested.add(rev)
                                 continue
                             if (
                                 sparse
                                 and self.revlog.delta_config.upper_bound_comp is not None
                             ):
                                 maxcomp = self.revlog.delta_config.upper_bound_comp
                                 basenotsnap = (self.p1, self.p2, nullrev)
                                 if rev not in basenotsnap and self.revlog.issnapshot(rev):
                                     snapshotdepth = self.revlog.snapshotdepth(rev)
                                     # If text is significantly larger than the base, we can
                                     # expect the resulting delta to be proportional to the size
                                     # difference
                                     revsize = self.revlog.rawsize(rev)
                                     rawsizedistance = max(self.textlen - revsize, 0)
                                     # use an estimate of the compression upper bound.
                                     lowestrealisticdeltalen = rawsizedistance // maxcomp
                                     # check the absolute constraint on the delta size
                                     snapshotlimit = self.textlen >> snapshotdepth
                                     if snapshotlimit < lowestrealisticdeltalen:
                                         # delta lower bound is larger than accepted upper
                                         # bound
                                         tested.add(rev)
                                         continue
                                     # check the relative constraint on the delta size
                                     revlength = self.revlog.length(rev)
                                     if revlength < lowestrealisticdeltalen:
                                         # delta probable lower bound is larger than target
                                         # base
                                         tested.add(rev)
                                         continue
                             group.append(rev)
                         if group:
                             # When the size of the candidate group is big, it can result in
                             # a quite significant performance impact. To reduce this, we
                             # can send them in smaller batches until the new batch does not
                             # provide any improvements.
                             #
                             # This might reduce the overall efficiency of the compression
                             # in some corner cases, but that should also prevent very
                             # pathological cases from being an issue. (eg. 20 000
                             # candidates).
                             #
                             # XXX note that the ordering of the group becomes important as
                             # it now impacts the final result. The current order is
                             # unprocessed and can be improved.
                             if group_chunk_size == 0:
                                 tested.update(group)
                                 good = yield tuple(group)
                             else:
                                 prev_good = good
                                 for start in range(0, len(group), group_chunk_size):
                                     sub_group = group[start : start + group_chunk_size]
                                     tested.update(sub_group)
                                     good = yield tuple(sub_group)
                                     if prev_good == good:
                                         break
                     yield None
                 def _refined_groups(self):
                     good = None
                     # First we try to reuse a the delta contained in the bundle.  (or from
                     # the source revlog)
                     #
                     # This logic only applies to general delta repositories and can be
                     # disabled through configuration. Disabling reuse source delta is
                     # useful when we want to make sure we recomputed "optimal" deltas.
                     debug_info = None
                     if (
                         self.cachedelta is not None
                         and self.cachedelta[2] > DELTA_BASE_REUSE_NO
                     ):
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         if debug_info is not None:
                             debug_info['cached-delta.tested'] += 1
                         good = yield (self.cachedelta[0],)
                         if good is not None:
                             if debug_info is not None:
                                 debug_info['cached-delta.accepted'] += 1
                             yield None
                             return
                     groups = self._raw_groups()
                     for candidates in groups:
                         good = yield candidates
                         if good is not None:
                             break
                     # If sparse revlog is enabled, we can try to refine the available
                     # deltas
                     if not self.revlog.delta_config.sparse_revlog:
                         yield None
                         return
                     # if we have a refinable value, try to refine it
                     if (
                         good is not None
                         and good not in (self.p1, self.p2)
                         and self.revlog.issnapshot(good)
                     ):
                         # refine snapshot down
                         previous = None
                         while previous != good:
                             previous = good
                             base = self.revlog.deltaparent(good)
                             if base == nullrev:
                                 break
                             good = yield (base,)
                         # refine snapshot up
                         if not self.snapshot_cache.snapshots:
                             self.snapshot_cache.update(self.revlog, good + 1)
                         previous = None
                         while good != previous:
                             previous = good
                             children = tuple(
                                 sorted(c for c in self.snapshot_cache.snapshots[good])
                             )
                             good = yield children
                     if debug_info is not None:
                         if good is None:
                             debug_info['no-solution'] += 1
                     yield None
                 def _raw_groups(self):
                     """Provides group of revision to be tested as delta base
                     This lower level function focus on emitting delta theorically
                     interresting without looking it any practical details.
                     The group order aims at providing fast or small candidates first.
                     """
                     # Why search for delta base if we cannot use a delta base ?
                     assert self.revlog.delta_config.general_delta
                     # also see issue6056
                     sparse = self.revlog.delta_config.sparse_revlog
                     curr = len(self.revlog)
                     prev = curr - 1
                     deltachain = lambda rev: self.revlog._deltachain(rev)[0]
                     # exclude already lazy tested base if any
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     if (
                         not self.revlog.delta_config.delta_both_parents
                         and len(parents) == 2
                     ):
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                     if sparse and parents:
                         # See if we can use an existing snapshot in the parent chains to
                         # use as a base for a new intermediate-snapshot
                         #
                         # search for snapshot in parents delta chain map: snapshot-level:
                         # snapshot-rev
                         parents_snaps = collections.defaultdict(set)
                         candidate_chains = [deltachain(p) for p in parents]
                         for chain in candidate_chains:
                             for idx, s in enumerate(chain):
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[idx].add(s)
                         snapfloor = min(parents_snaps[0]) + 1
                         self.snapshot_cache.update(self.revlog, snapfloor)
                         # search for the highest "unrelated" revision
                         #
                         # Adding snapshots used by "unrelated" revision increase the odd we
                         # reuse an independant, yet better snapshot chain.
                         #
                         # XXX instead of building a set of revisions, we could lazily
                         # enumerate over the chains. That would be more efficient, however
                         # we stick to simple code for now.
                         all_revs = set()
                         for chain in candidate_chains:
                             all_revs.update(chain)
                         other = None
                         for r in self.revlog.revs(prev, snapfloor):
                             if r not in all_revs:
                                 other = r
                                 break
                         if other is not None:
                             # To avoid unfair competition, we won't use unrelated
                             # intermediate snapshot that are deeper than the ones from the
                             # parent delta chain.
                             max_depth = max(parents_snaps.keys())
                             chain = deltachain(other)
                             for depth, s in enumerate(chain):
                                 if s < snapfloor:
                                     continue
                                 if max_depth < depth:
                                     break
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[depth].add(s)
                         # Test them as possible intermediate snapshot base We test them
                         # from highest to lowest level. High level one are more likely to
                         # result in small delta
                         floor = None
                         for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                             siblings = set()
                             for s in snaps:
                                 siblings.update(self.snapshot_cache.snapshots[s])
                             # Before considering making a new intermediate snapshot, we
                             # check if an existing snapshot, children of base we consider,
                             # would be suitable.
                             #
                             # It give a change to reuse a delta chain "unrelated" to the
                             # current revision instead of starting our own. Without such
                             # re-use, topological branches would keep reopening new chains.
                             # Creating more and more snapshot as the repository grow.
                             if floor is not None:
                                 # We only do this for siblings created after the one in our
                                 # parent's delta chain. Those created before has less
                                 # chances to be valid base since our ancestors had to
                                 # create a new snapshot.
                                 siblings = [r for r in siblings if floor < r]
                             yield tuple(sorted(siblings))
                             # then test the base from our parent's delta chain.
                             yield tuple(sorted(snaps))
                             floor = min(snaps)
                         # No suitable base found in the parent chain, search if any full
                         # snapshots emitted since parent's base would be a suitable base
                         # for an intermediate snapshot.
                         #
                         # It give a chance to reuse a delta chain unrelated to the current
                         # revisions instead of starting our own. Without such re-use,
                         # topological branches would keep reopening new full chains.
                         # Creating more and more snapshot as the repository grow.
                         full = [
                             r
                             for r in self.snapshot_cache.snapshots[nullrev]
                             if snapfloor <= r
                         ]
                         yield tuple(sorted(full))
                     if not sparse:
                         # other approach failed try against prev to hopefully save us a
                         # fulltext.
                         yield (prev,)
+                def is_good_delta_info(self, deltainfo):
+                    """Returns True if the given delta is good. Good means that it is
+                    within the disk span, disk size, and chain length bounds that we know
+                    to be performant."""
+                    if deltainfo is None:
+                        return False
+                    # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
+                    # so we should never end up asking such question. Adding the assert as
+                    # a safe-guard to detect anything that would be fishy in this regard.
+                    assert (
+                        self.revinfo.cachedelta is None
+                        or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
+                        or not self.revlog.delta_config.general_delta
+                    )
+                    # - 'deltainfo.distance' is the distance from the base revision --
+                    #   bounding it limits the amount of I/O we need to do.
+                    # - 'deltainfo.compresseddeltalen' is the sum of the total size of
+                    #   deltas we need to apply -- bounding it limits the amount of CPU
+                    #   we consume.
+                    textlen = self.revinfo.textlen
+                    defaultmax = textlen * 4
+                    maxdist = self.revlog.delta_config.max_deltachain_span
+                    if not maxdist:
+                        maxdist = deltainfo.distance  # ensure the conditional pass
+                    maxdist = max(maxdist, defaultmax)
+                    # Bad delta from read span:
+                    #
+                    #   If the span of data read is larger than the maximum allowed.
+                    #
+                    #   In the sparse-revlog case, we rely on the associated "sparse
+                    #   reading" to avoid issue related to the span of data. In theory, it
+                    #   would be possible to build pathological revlog where delta pattern
+                    #   would lead to too many reads. However, they do not happen in
+                    #   practice at all. So we skip the span check entirely.
+                    if (
+                        not self.revlog.delta_config.sparse_revlog
+                        and maxdist < deltainfo.distance
+                    ):
+                        return False
+                    # Bad delta from new delta size:
+                    #
+                    #   If the delta size is larger than the target text, storing the delta
+                    #   will be inefficient.
+                    if textlen < deltainfo.deltalen:
+                        return False
+                    # Bad delta from cumulated payload size:
+                    #
+                    #   If the sum of delta get larger than K * target text length.
+                    if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
+                        return False
+                    # Bad delta from chain length:
+                    #
+                    #   If the number of delta in the chain gets too high.
+                    if (
+                        self.revlog.delta_config.max_chain_len
+                        and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
+                    ):
+                        return False
+                    # bad delta from intermediate snapshot size limit
+                    #
+                    #   If an intermediate snapshot size is higher than the limit.  The
+                    #   limit exist to prevent endless chain of intermediate delta to be
+                    #   created.
+                    if (
+                        deltainfo.snapshotdepth is not None
+                        and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
+                    ):
+                        return False
+                    # bad delta if new intermediate snapshot is larger than the previous
+                    # snapshot
+                    if (
+                        deltainfo.snapshotdepth
+                        and self.revlog.length(deltainfo.base) < deltainfo.deltalen
+                    ):
+                        return False
+                    return True
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 """object capable of computing delta and finding delta for multiple revision
                 This object is meant to compute and find multiple delta applied to the same
                 revlog.
                 """
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog.delta_config.sparse_revlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog.delta_config.lazy_delta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
                         lowestrealisticdeltalen = (
                             len(delta) // revlog.delta_config.upper_bound_comp
                         )
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog._inner.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog._inner.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = 0
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     search = _DeltaSearch(
                         self.revlog,
                         revinfo,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     groups = search.candidate_groups()
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
-                                if is_good_delta_info(self.revlog, candidatedelta, revinfo):
+                                if search.is_good_delta_info(candidatedelta):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     dbg = {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                     target_revlog = b"UNKNOWN"
                     target_type = self.revlog.target[0]
                     target_key = self.revlog.target[1]
                     if target_type == KIND_CHANGELOG:
                         target_revlog = b'CHANGELOG:'
                     elif target_type == KIND_MANIFESTLOG:
                         target_revlog = b'MANIFESTLOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     elif target_type == KIND_FILELOG:
                         target_revlog = b'FILELOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     dbg['target-revlog'] = target_revlog
                     return dbg
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo