upstream/mercurial-mirror Commit - r52252:a224ce56

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import abc

11

import abc

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import nullrev

16

from ..node import nullrev

17

from ..i18n import _

17

from ..i18n import _

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_FORCE,

24

DELTA_BASE_REUSE_NO,

24

DELTA_BASE_REUSE_NO,

25

KIND_CHANGELOG,

25

KIND_CHANGELOG,

26

KIND_FILELOG,

26

KIND_FILELOG,

27

KIND_MANIFESTLOG,

27

KIND_MANIFESTLOG,

28

REVIDX_ISCENSORED,

28

REVIDX_ISCENSORED,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

30

)

30

)

31

32

from ..thirdparty import attr

32

from ..thirdparty import attr

33

34

from .. import (

34

from .. import (

35

error,

35

error,

36

mdiff,

36

mdiff,

37

util,

37

util,

38

)

38

)

39

40

from . import flagutil

40

from . import flagutil

41

42

# maximum <delta-chain-data>/<revision-text-length> ratio

42

# maximum <delta-chain-data>/<revision-text-length> ratio

43

LIMIT_DELTA2TEXT = 2

43

LIMIT_DELTA2TEXT = 2

44

45

46

class _testrevlog:

46

class _testrevlog:

47

"""minimalist fake revlog to use in doctests"""

47

"""minimalist fake revlog to use in doctests"""

48

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

50

"""data is an list of revision payload boundaries"""

50

"""data is an list of revision payload boundaries"""

51

from .. import revlog

51

from .. import revlog

52

53

self._data = data

53

self._data = data

54

self.data_config = revlog.DataConfig()

54

self.data_config = revlog.DataConfig()

55

self.data_config.sr_density_threshold = density

55

self.data_config.sr_density_threshold = density

56

self.data_config.sr_min_gap_size = mingap

56

self.data_config.sr_min_gap_size = mingap

57

self.delta_config = revlog.DeltaConfig()

57

self.delta_config = revlog.DeltaConfig()

58

self.feature_config = revlog.FeatureConfig()

58

self.feature_config = revlog.FeatureConfig()

59

self._snapshot = set(snapshot)

59

self._snapshot = set(snapshot)

60

self.index = None

60

self.index = None

61

62

def start(self, rev):

62

def start(self, rev):

63

if rev == nullrev:

63

if rev == nullrev:

64

return 0

64

return 0

65

if rev == 0:

65

if rev == 0:

66

return 0

66

return 0

67

return self._data[rev - 1]

67

return self._data[rev - 1]

68

69

def end(self, rev):

69

def end(self, rev):

70

if rev == nullrev:

70

if rev == nullrev:

71

return 0

71

return 0

72

return self._data[rev]

72

return self._data[rev]

73

74

def length(self, rev):

74

def length(self, rev):

75

return self.end(rev) - self.start(rev)

75

return self.end(rev) - self.start(rev)

76

77

def __len__(self):

77

def __len__(self):

78

return len(self._data)

78

return len(self._data)

79

80

def issnapshot(self, rev):

80

def issnapshot(self, rev):

81

if rev == nullrev:

81

if rev == nullrev:

82

return True

82

return True

83

return rev in self._snapshot

83

return rev in self._snapshot

84

85

86

def slicechunk(revlog, revs, targetsize=None):

86

def slicechunk(revlog, revs, targetsize=None):

87

"""slice revs to reduce the amount of unrelated data to be read from disk.

87

"""slice revs to reduce the amount of unrelated data to be read from disk.

88

89

``revs`` is sliced into groups that should be read in one time.

89

``revs`` is sliced into groups that should be read in one time.

90

Assume that revs are sorted.

90

Assume that revs are sorted.

91

92

The initial chunk is sliced until the overall density (payload/chunks-span

92

The initial chunk is sliced until the overall density (payload/chunks-span

93

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

93

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

94

than `revlog.data_config.sr_min_gap_size` is skipped.

94

than `revlog.data_config.sr_min_gap_size` is skipped.

95

96

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

96

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

97

For consistency with other slicing choice, this limit won't go lower than

97

For consistency with other slicing choice, this limit won't go lower than

98

`revlog.data_config.sr_min_gap_size`.

98

`revlog.data_config.sr_min_gap_size`.

99

100

If individual revisions chunk are larger than this limit, they will still

100

If individual revisions chunk are larger than this limit, they will still

101

be raised individually.

101

be raised individually.

102

103

>>> data = [

103

>>> data = [

104

... 5, #00 (5)

104

... 5, #00 (5)

105

... 10, #01 (5)

105

... 10, #01 (5)

106

... 12, #02 (2)

106

... 12, #02 (2)

107

... 12, #03 (empty)

107

... 12, #03 (empty)

108

... 27, #04 (15)

108

... 27, #04 (15)

109

... 31, #05 (4)

109

... 31, #05 (4)

110

... 31, #06 (empty)

110

... 31, #06 (empty)

111

... 42, #07 (11)

111

... 42, #07 (11)

112

... 47, #08 (5)

112

... 47, #08 (5)

113

... 47, #09 (empty)

113

... 47, #09 (empty)

114

... 48, #10 (1)

114

... 48, #10 (1)

115

... 51, #11 (3)

115

... 51, #11 (3)

116

... 74, #12 (23)

116

... 74, #12 (23)

117

... 85, #13 (11)

117

... 85, #13 (11)

118

... 86, #14 (1)

118

... 86, #14 (1)

119

... 91, #15 (5)

119

... 91, #15 (5)

120

... ]

120

... ]

121

>>> revlog = _testrevlog(data, snapshot=range(16))

121

>>> revlog = _testrevlog(data, snapshot=range(16))

122

123

>>> list(slicechunk(revlog, list(range(16))))

123

>>> list(slicechunk(revlog, list(range(16))))

124

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

124

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

125

>>> list(slicechunk(revlog, [0, 15]))

125

>>> list(slicechunk(revlog, [0, 15]))

126

[[0], [15]]

126

[[0], [15]]

127

>>> list(slicechunk(revlog, [0, 11, 15]))

127

>>> list(slicechunk(revlog, [0, 11, 15]))

128

[[0], [11], [15]]

128

[[0], [11], [15]]

129

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

129

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

130

[[0], [11, 13, 15]]

130

[[0], [11, 13, 15]]

131

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

131

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

132

[[1, 2], [5, 8, 10, 11], [14]]

132

[[1, 2], [5, 8, 10, 11], [14]]

133

134

Slicing with a maximum chunk size

134

Slicing with a maximum chunk size

135

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

135

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

136

[[0], [11], [13], [15]]

136

[[0], [11], [13], [15]]

137

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

137

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

138

[[0], [11], [13, 15]]

138

[[0], [11], [13, 15]]

139

140

Slicing involving nullrev

140

Slicing involving nullrev

141

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

141

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

142

[[-1, 0], [11], [13, 15]]

142

[[-1, 0], [11], [13, 15]]

143

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

143

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

144

[[-1], [13], [15]]

144

[[-1], [13], [15]]

145

"""

145

"""

146

if targetsize is not None:

146

if targetsize is not None:

147

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

147

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

148

# targetsize should not be specified when evaluating delta candidates:

148

# targetsize should not be specified when evaluating delta candidates:

149

# * targetsize is used to ensure we stay within specification when reading,

149

# * targetsize is used to ensure we stay within specification when reading,

150

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

150

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

151

if densityslicing is None:

151

if densityslicing is None:

152

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

152

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

153

for chunk in densityslicing(

153

for chunk in densityslicing(

154

revs,

154

revs,

155

revlog.data_config.sr_density_threshold,

155

revlog.data_config.sr_density_threshold,

156

revlog.data_config.sr_min_gap_size,

156

revlog.data_config.sr_min_gap_size,

157

):

157

):

158

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

159

yield subchunk

159

yield subchunk

160

161

162

def _slicechunktosize(revlog, revs, targetsize=None):

162

def _slicechunktosize(revlog, revs, targetsize=None):

163

"""slice revs to match the target size

163

"""slice revs to match the target size

164

165

This is intended to be used on chunk that density slicing selected by that

165

This is intended to be used on chunk that density slicing selected by that

166

are still too large compared to the read garantee of revlog. This might

166

are still too large compared to the read garantee of revlog. This might

167

happens when "minimal gap size" interrupted the slicing or when chain are

167

happens when "minimal gap size" interrupted the slicing or when chain are

168

built in a way that create large blocks next to each other.

168

built in a way that create large blocks next to each other.

169

170

>>> data = [

170

>>> data = [

171

... 3, #0 (3)

171

... 3, #0 (3)

172

... 5, #1 (2)

172

... 5, #1 (2)

173

... 6, #2 (1)

173

... 6, #2 (1)

174

... 8, #3 (2)

174

... 8, #3 (2)

175

... 8, #4 (empty)

175

... 8, #4 (empty)

176

... 11, #5 (3)

176

... 11, #5 (3)

177

... 12, #6 (1)

177

... 12, #6 (1)

178

... 13, #7 (1)

178

... 13, #7 (1)

179

... 14, #8 (1)

179

... 14, #8 (1)

180

... ]

180

... ]

181

182

== All snapshots cases ==

182

== All snapshots cases ==

183

>>> revlog = _testrevlog(data, snapshot=range(9))

183

>>> revlog = _testrevlog(data, snapshot=range(9))

184

185

Cases where chunk is already small enough

185

Cases where chunk is already small enough

186

>>> list(_slicechunktosize(revlog, [0], 3))

186

>>> list(_slicechunktosize(revlog, [0], 3))

187

[[0]]

187

[[0]]

188

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

>>> list(_slicechunktosize(revlog, [6, 7], 3))

189

[[6, 7]]

189

[[6, 7]]

190

>>> list(_slicechunktosize(revlog, [0], None))

190

>>> list(_slicechunktosize(revlog, [0], None))

191

[[0]]

191

[[0]]

192

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

>>> list(_slicechunktosize(revlog, [6, 7], None))

193

[[6, 7]]

193

[[6, 7]]

194

195

cases where we need actual slicing

195

cases where we need actual slicing

196

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

>>> list(_slicechunktosize(revlog, [0, 1], 3))

197

[[0], [1]]

197

[[0], [1]]

198

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

>>> list(_slicechunktosize(revlog, [1, 3], 3))

199

[[1], [3]]

199

[[1], [3]]

200

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

201

[[1, 2], [3]]

201

[[1, 2], [3]]

202

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

>>> list(_slicechunktosize(revlog, [3, 5], 3))

203

[[3], [5]]

203

[[3], [5]]

204

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

205

[[3], [5]]

205

[[3], [5]]

206

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

207

[[5], [6, 7, 8]]

207

[[5], [6, 7, 8]]

208

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

209

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

[[0], [1, 2], [3], [5], [6, 7, 8]]

210

211

Case with too large individual chunk (must return valid chunk)

211

Case with too large individual chunk (must return valid chunk)

212

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

>>> list(_slicechunktosize(revlog, [0, 1], 2))

213

[[0], [1]]

213

[[0], [1]]

214

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

>>> list(_slicechunktosize(revlog, [1, 3], 1))

215

[[1], [3]]

215

[[1], [3]]

216

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

217

[[3], [5]]

217

[[3], [5]]

218

219

== No Snapshot cases ==

219

== No Snapshot cases ==

220

>>> revlog = _testrevlog(data)

220

>>> revlog = _testrevlog(data)

221

222

Cases where chunk is already small enough

222

Cases where chunk is already small enough

223

>>> list(_slicechunktosize(revlog, [0], 3))

223

>>> list(_slicechunktosize(revlog, [0], 3))

224

[[0]]

224

[[0]]

225

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

>>> list(_slicechunktosize(revlog, [6, 7], 3))

226

[[6, 7]]

226

[[6, 7]]

227

>>> list(_slicechunktosize(revlog, [0], None))

227

>>> list(_slicechunktosize(revlog, [0], None))

228

[[0]]

228

[[0]]

229

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

>>> list(_slicechunktosize(revlog, [6, 7], None))

230

[[6, 7]]

230

[[6, 7]]

231

232

cases where we need actual slicing

232

cases where we need actual slicing

233

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

>>> list(_slicechunktosize(revlog, [0, 1], 3))

234

[[0], [1]]

234

[[0], [1]]

235

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

>>> list(_slicechunktosize(revlog, [1, 3], 3))

236

[[1], [3]]

236

[[1], [3]]

237

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

238

[[1], [2, 3]]

238

[[1], [2, 3]]

239

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

>>> list(_slicechunktosize(revlog, [3, 5], 3))

240

[[3], [5]]

240

[[3], [5]]

241

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

242

[[3], [4, 5]]

242

[[3], [4, 5]]

243

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

244

[[5], [6, 7, 8]]

244

[[5], [6, 7, 8]]

245

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

246

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

[[0], [1, 2], [3], [5], [6, 7, 8]]

247

248

Case with too large individual chunk (must return valid chunk)

248

Case with too large individual chunk (must return valid chunk)

249

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

>>> list(_slicechunktosize(revlog, [0, 1], 2))

250

[[0], [1]]

250

[[0], [1]]

251

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

>>> list(_slicechunktosize(revlog, [1, 3], 1))

252

[[1], [3]]

252

[[1], [3]]

253

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

254

[[3], [5]]

254

[[3], [5]]

255

256

== mixed case ==

256

== mixed case ==

257

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

258

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

259

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

260

"""

260

"""

261

assert targetsize is None or 0 <= targetsize

261

assert targetsize is None or 0 <= targetsize

262

startdata = revlog.start(revs[0])

262

startdata = revlog.start(revs[0])

263

enddata = revlog.end(revs[-1])

263

enddata = revlog.end(revs[-1])

264

fullspan = enddata - startdata

264

fullspan = enddata - startdata

265

if targetsize is None or fullspan <= targetsize:

265

if targetsize is None or fullspan <= targetsize:

266

yield revs

266

yield revs

267

return

267

return

268

269

startrevidx = 0

269

startrevidx = 0

270

endrevidx = 1

270

endrevidx = 1

271

iterrevs = enumerate(revs)

271

iterrevs = enumerate(revs)

272

next(iterrevs) # skip first rev.

272

next(iterrevs) # skip first rev.

273

# first step: get snapshots out of the way

273

# first step: get snapshots out of the way

274

for idx, r in iterrevs:

274

for idx, r in iterrevs:

275

span = revlog.end(r) - startdata

275

span = revlog.end(r) - startdata

276

snapshot = revlog.issnapshot(r)

276

snapshot = revlog.issnapshot(r)

277

if span <= targetsize and snapshot:

277

if span <= targetsize and snapshot:

278

endrevidx = idx + 1

278

endrevidx = idx + 1

279

else:

279

else:

280

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

281

if chunk:

281

if chunk:

282

yield chunk

282

yield chunk

283

startrevidx = idx

283

startrevidx = idx

284

startdata = revlog.start(r)

284

startdata = revlog.start(r)

285

endrevidx = idx + 1

285

endrevidx = idx + 1

286

if not snapshot:

286

if not snapshot:

287

break

287

break

288

289

# for the others, we use binary slicing to quickly converge toward valid

289

# for the others, we use binary slicing to quickly converge toward valid

290

# chunks (otherwise, we might end up looking for start/end of many

290

# chunks (otherwise, we might end up looking for start/end of many

291

# revisions). This logic is not looking for the perfect slicing point, it

291

# revisions). This logic is not looking for the perfect slicing point, it

292

# focuses on quickly converging toward valid chunks.

292

# focuses on quickly converging toward valid chunks.

293

nbitem = len(revs)

293

nbitem = len(revs)

294

while (enddata - startdata) > targetsize:

294

while (enddata - startdata) > targetsize:

295

endrevidx = nbitem

295

endrevidx = nbitem

296

if nbitem - startrevidx <= 1:

296

if nbitem - startrevidx <= 1:

297

break # protect against individual chunk larger than limit

297

break # protect against individual chunk larger than limit

298

localenddata = revlog.end(revs[endrevidx - 1])

298

localenddata = revlog.end(revs[endrevidx - 1])

299

span = localenddata - startdata

299

span = localenddata - startdata

300

while span > targetsize:

300

while span > targetsize:

301

if endrevidx - startrevidx <= 1:

301

if endrevidx - startrevidx <= 1:

302

break # protect against individual chunk larger than limit

302

break # protect against individual chunk larger than limit

303

endrevidx -= (endrevidx - startrevidx) // 2

303

endrevidx -= (endrevidx - startrevidx) // 2

304

localenddata = revlog.end(revs[endrevidx - 1])

304

localenddata = revlog.end(revs[endrevidx - 1])

305

span = localenddata - startdata

305

span = localenddata - startdata

306

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

307

if chunk:

307

if chunk:

308

yield chunk

308

yield chunk

309

startrevidx = endrevidx

309

startrevidx = endrevidx

310

startdata = revlog.start(revs[startrevidx])

310

startdata = revlog.start(revs[startrevidx])

311

312

chunk = _trimchunk(revlog, revs, startrevidx)

312

chunk = _trimchunk(revlog, revs, startrevidx)

313

if chunk:

313

if chunk:

314

yield chunk

314

yield chunk

315

316

317

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

318

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

"""slice revs to reduce the amount of unrelated data to be read from disk.

319

320

``revs`` is sliced into groups that should be read in one time.

320

``revs`` is sliced into groups that should be read in one time.

321

Assume that revs are sorted.

321

Assume that revs are sorted.

322

323

The initial chunk is sliced until the overall density (payload/chunks-span

323

The initial chunk is sliced until the overall density (payload/chunks-span

324

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

325

skipped.

325

skipped.

326

327

>>> revlog = _testrevlog([

327

>>> revlog = _testrevlog([

328

... 5, #00 (5)

328

... 5, #00 (5)

329

... 10, #01 (5)

329

... 10, #01 (5)

330

... 12, #02 (2)

330

... 12, #02 (2)

331

... 12, #03 (empty)

331

... 12, #03 (empty)

332

... 27, #04 (15)

332

... 27, #04 (15)

333

... 31, #05 (4)

333

... 31, #05 (4)

334

... 31, #06 (empty)

334

... 31, #06 (empty)

335

... 42, #07 (11)

335

... 42, #07 (11)

336

... 47, #08 (5)

336

... 47, #08 (5)

337

... 47, #09 (empty)

337

... 47, #09 (empty)

338

... 48, #10 (1)

338

... 48, #10 (1)

339

... 51, #11 (3)

339

... 51, #11 (3)

340

... 74, #12 (23)

340

... 74, #12 (23)

341

... 85, #13 (11)

341

... 85, #13 (11)

342

... 86, #14 (1)

342

... 86, #14 (1)

343

... 91, #15 (5)

343

... 91, #15 (5)

344

... ])

344

... ])

345

346

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

>>> list(_slicechunktodensity(revlog, list(range(16))))

347

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

348

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

>>> list(_slicechunktodensity(revlog, [0, 15]))

349

[[0], [15]]

349

[[0], [15]]

350

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

351

[[0], [11], [15]]

351

[[0], [11], [15]]

352

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

353

[[0], [11, 13, 15]]

353

[[0], [11, 13, 15]]

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

355

[[1, 2], [5, 8, 10, 11], [14]]

355

[[1, 2], [5, 8, 10, 11], [14]]

356

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

357

... mingapsize=20))

357

... mingapsize=20))

358

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

[[1, 2, 3, 5, 8, 10, 11], [14]]

359

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

360

... targetdensity=0.95))

360

... targetdensity=0.95))

361

[[1, 2], [5], [8, 10, 11], [14]]

361

[[1, 2], [5], [8, 10, 11], [14]]

362

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

363

... targetdensity=0.95, mingapsize=12))

363

... targetdensity=0.95, mingapsize=12))

364

[[1, 2], [5, 8, 10, 11], [14]]

364

[[1, 2], [5, 8, 10, 11], [14]]

365

"""

365

"""

366

start = revlog.start

366

start = revlog.start

367

length = revlog.length

367

length = revlog.length

368

369

if len(revs) <= 1:

369

if len(revs) <= 1:

370

yield revs

370

yield revs

371

return

371

return

372

373

deltachainspan = segmentspan(revlog, revs)

373

deltachainspan = segmentspan(revlog, revs)

374

375

if deltachainspan < mingapsize:

375

if deltachainspan < mingapsize:

376

yield revs

376

yield revs

377

return

377

return

378

379

readdata = deltachainspan

379

readdata = deltachainspan

380

chainpayload = sum(length(r) for r in revs)

380

chainpayload = sum(length(r) for r in revs)

381

382

if deltachainspan:

382

if deltachainspan:

383

density = chainpayload / float(deltachainspan)

383

density = chainpayload / float(deltachainspan)

384

else:

384

else:

385

density = 1.0

385

density = 1.0

386

387

if density >= targetdensity:

387

if density >= targetdensity:

388

yield revs

388

yield revs

389

return

389

return

390

391

# Store the gaps in a heap to have them sorted by decreasing size

391

# Store the gaps in a heap to have them sorted by decreasing size

392

gaps = []

392

gaps = []

393

prevend = None

393

prevend = None

394

for i, rev in enumerate(revs):

394

for i, rev in enumerate(revs):

395

revstart = start(rev)

395

revstart = start(rev)

396

revlen = length(rev)

396

revlen = length(rev)

397

398

# Skip empty revisions to form larger holes

398

# Skip empty revisions to form larger holes

399

if revlen == 0:

399

if revlen == 0:

400

continue

400

continue

401

402

if prevend is not None:

402

if prevend is not None:

403

gapsize = revstart - prevend

403

gapsize = revstart - prevend

404

# only consider holes that are large enough

404

# only consider holes that are large enough

405

if gapsize > mingapsize:

405

if gapsize > mingapsize:

406

gaps.append((gapsize, i))

406

gaps.append((gapsize, i))

407

408

prevend = revstart + revlen

408

prevend = revstart + revlen

409

# sort the gaps to pop them from largest to small

409

# sort the gaps to pop them from largest to small

410

gaps.sort()

410

gaps.sort()

411

412

# Collect the indices of the largest holes until the density is acceptable

412

# Collect the indices of the largest holes until the density is acceptable

413

selected = []

413

selected = []

414

while gaps and density < targetdensity:

414

while gaps and density < targetdensity:

415

gapsize, gapidx = gaps.pop()

415

gapsize, gapidx = gaps.pop()

416

417

selected.append(gapidx)

417

selected.append(gapidx)

418

419

# the gap sizes are stored as negatives to be sorted decreasingly

419

# the gap sizes are stored as negatives to be sorted decreasingly

420

# by the heap

420

# by the heap

421

readdata -= gapsize

421

readdata -= gapsize

422

if readdata > 0:

422

if readdata > 0:

423

density = chainpayload / float(readdata)

423

density = chainpayload / float(readdata)

424

else:

424

else:

425

density = 1.0

425

density = 1.0

426

selected.sort()

426

selected.sort()

427

428

# Cut the revs at collected indices

428

# Cut the revs at collected indices

429

previdx = 0

429

previdx = 0

430

for idx in selected:

430

for idx in selected:

431

432

chunk = _trimchunk(revlog, revs, previdx, idx)

432

chunk = _trimchunk(revlog, revs, previdx, idx)

433

if chunk:

433

if chunk:

434

yield chunk

434

yield chunk

435

436

previdx = idx

436

previdx = idx

437

438

chunk = _trimchunk(revlog, revs, previdx)

438

chunk = _trimchunk(revlog, revs, previdx)

439

if chunk:

439

if chunk:

440

yield chunk

440

yield chunk

441

442

443

def _trimchunk(revlog, revs, startidx, endidx=None):

443

def _trimchunk(revlog, revs, startidx, endidx=None):

444

"""returns revs[startidx:endidx] without empty trailing revs

444

"""returns revs[startidx:endidx] without empty trailing revs

445

446

Doctest Setup

446

Doctest Setup

447

>>> revlog = _testrevlog([

447

>>> revlog = _testrevlog([

448

... 5, #0

448

... 5, #0

449

... 10, #1

449

... 10, #1

450

... 12, #2

450

... 12, #2

451

... 12, #3 (empty)

451

... 12, #3 (empty)

452

... 17, #4

452

... 17, #4

453

... 21, #5

453

... 21, #5

454

... 21, #6 (empty)

454

... 21, #6 (empty)

455

... ])

455

... ])

456

457

Contiguous cases:

457

Contiguous cases:

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

459

[0, 1, 2, 3, 4, 5]

459

[0, 1, 2, 3, 4, 5]

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

461

[0, 1, 2, 3, 4]

461

[0, 1, 2, 3, 4]

462

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

463

[0, 1, 2]

463

[0, 1, 2]

464

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

465

[2]

465

[2]

466

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

467

[3, 4, 5]

467

[3, 4, 5]

468

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

469

[3, 4]

469

[3, 4]

470

471

Discontiguous cases:

471

Discontiguous cases:

472

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

473

[1, 3, 5]

473

[1, 3, 5]

474

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

475

[1]

475

[1]

476

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

477

[3, 5]

477

[3, 5]

478

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

479

[3, 5]

479

[3, 5]

480

"""

480

"""

481

length = revlog.length

481

length = revlog.length

482

483

if endidx is None:

483

if endidx is None:

484

endidx = len(revs)

484

endidx = len(revs)

485

486

# If we have a non-emtpy delta candidate, there are nothing to trim

486

# If we have a non-emtpy delta candidate, there are nothing to trim

487

if revs[endidx - 1] < len(revlog):

487

if revs[endidx - 1] < len(revlog):

488

# Trim empty revs at the end, except the very first revision of a chain

488

# Trim empty revs at the end, except the very first revision of a chain

489

while (

489

while (

490

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

491

):

491

):

492

endidx -= 1

492

endidx -= 1

493

494

return revs[startidx:endidx]

494

return revs[startidx:endidx]

495

496

497

def segmentspan(revlog, revs):

497

def segmentspan(revlog, revs):

498

"""Get the byte span of a segment of revisions

498

"""Get the byte span of a segment of revisions

499

500

revs is a sorted array of revision numbers

500

revs is a sorted array of revision numbers

501

502

>>> revlog = _testrevlog([

502

>>> revlog = _testrevlog([

503

... 5, #0

503

... 5, #0

504

... 10, #1

504

... 10, #1

505

... 12, #2

505

... 12, #2

506

... 12, #3 (empty)

506

... 12, #3 (empty)

507

... 17, #4

507

... 17, #4

508

... ])

508

... ])

509

510

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

511

17

511

17

512

>>> segmentspan(revlog, [0, 4])

512

>>> segmentspan(revlog, [0, 4])

513

17

513

17

514

>>> segmentspan(revlog, [3, 4])

514

>>> segmentspan(revlog, [3, 4])

515

5

515

5

516

>>> segmentspan(revlog, [1, 2, 3,])

516

>>> segmentspan(revlog, [1, 2, 3,])

517

7

517

7

518

>>> segmentspan(revlog, [1, 3])

518

>>> segmentspan(revlog, [1, 3])

519

7

519

7

520

"""

520

"""

521

if not revs:

521

if not revs:

522

return 0

522

return 0

523

end = revlog.end(revs[-1])

523

end = revlog.end(revs[-1])

524

return end - revlog.start(revs[0])

524

return end - revlog.start(revs[0])

525

526

527

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

528

"""build full text from a (base, delta) pair and other metadata"""

528

"""build full text from a (base, delta) pair and other metadata"""

529

# special case deltas which replace entire base; no need to decode

529

# special case deltas which replace entire base; no need to decode

530

# base revision. this neatly avoids censored bases, which throw when

530

# base revision. this neatly avoids censored bases, which throw when

531

# they're decoded.

531

# they're decoded.

532

hlen = struct.calcsize(b">lll")

532

hlen = struct.calcsize(b">lll")

533

if delta[:hlen] == mdiff.replacediffheader(

533

if delta[:hlen] == mdiff.replacediffheader(

534

revlog.rawsize(baserev), len(delta) - hlen

534

revlog.rawsize(baserev), len(delta) - hlen

535

):

535

):

536

fulltext = delta[hlen:]

536

fulltext = delta[hlen:]

537

else:

537

else:

538

# deltabase is rawtext before changed by flag processors, which is

538

# deltabase is rawtext before changed by flag processors, which is

539

# equivalent to non-raw text

539

# equivalent to non-raw text

540

basetext = revlog.revision(baserev)

540

basetext = revlog.revision(baserev)

541

fulltext = mdiff.patch(basetext, delta)

541

fulltext = mdiff.patch(basetext, delta)

542

543

try:

543

try:

544

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

545

if validatehash:

545

if validatehash:

546

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

547

if flags & REVIDX_ISCENSORED:

547

if flags & REVIDX_ISCENSORED:

548

raise error.StorageError(

548

raise error.StorageError(

549

_(b'node %s is not censored') % expectednode

549

_(b'node %s is not censored') % expectednode

550

)

550

)

551

except error.CensoredNodeError:

551

except error.CensoredNodeError:

552

# must pass the censored index flag to add censored revisions

552

# must pass the censored index flag to add censored revisions

553

if not flags & REVIDX_ISCENSORED:

553

if not flags & REVIDX_ISCENSORED:

554

raise

554

raise

555

return fulltext

555

return fulltext

556

557

558

@attr.s(slots=True, frozen=True)

558

@attr.s(slots=True, frozen=True)

559

class _deltainfo:

559

class _deltainfo:

560

distance = attr.ib()

560

distance = attr.ib()

561

deltalen = attr.ib()

561

deltalen = attr.ib()

562

data = attr.ib()

562

data = attr.ib()

563

base = attr.ib()

563

base = attr.ib()

564

chainbase = attr.ib()

564

chainbase = attr.ib()

565

chainlen = attr.ib()

565

chainlen = attr.ib()

566

compresseddeltalen = attr.ib()

566

compresseddeltalen = attr.ib()

567

snapshotdepth = attr.ib()

567

snapshotdepth = attr.ib()

568

569

570

def drop_u_compression(delta):

570

def drop_u_compression(delta):

571

"""turn into a "u" (no-compression) into no-compression without header

571

"""turn into a "u" (no-compression) into no-compression without header

572

573

This is useful for revlog format that has better compression method.

573

This is useful for revlog format that has better compression method.

574

"""

574

"""

575

assert delta.data[0] == b'u', delta.data[0]

575

assert delta.data[0] == b'u', delta.data[0]

576

return _deltainfo(

576

return _deltainfo(

577

delta.distance,

577

delta.distance,

578

delta.deltalen - 1,

578

delta.deltalen - 1,

579

(b'', delta.data[1]),

579

(b'', delta.data[1]),

580

delta.base,

580

delta.base,

581

delta.chainbase,

581

delta.chainbase,

582

delta.chainlen,

582

delta.chainlen,

583

delta.compresseddeltalen,

583

delta.compresseddeltalen,

584

delta.snapshotdepth,

584

delta.snapshotdepth,

585

)

585

)

586

587

588

# If a revision's full text is that much bigger than a base candidate full

588

# If a revision's full text is that much bigger than a base candidate full

589

# text's, it is very unlikely that it will produce a valid delta. We no longer

589

# text's, it is very unlikely that it will produce a valid delta. We no longer

590

# consider these candidates.

590

# consider these candidates.

591

LIMIT_BASE2TEXT = 500

591

LIMIT_BASE2TEXT = 500

592

593

### stage of the search, used for debug and to select and to adjust some logic.

593

### stage of the search, used for debug and to select and to adjust some logic.

594

# initial stage, next step is unknown

594

# initial stage, next step is unknown

595

_STAGE_UNSPECIFIED = "unspecified"

595

_STAGE_UNSPECIFIED = "unspecified"

596

# trying the cached delta

596

# trying the cached delta

597

_STAGE_CACHED = "cached"

597

_STAGE_CACHED = "cached"

598

# trying delta based on parents

598

# trying delta based on parents

599

_STAGE_PARENTS = "parents"

599

_STAGE_PARENTS = "parents"

600

# trying to build a valid snapshot of any level

600

# trying to build a valid snapshot of any level

601

_STAGE_SNAPSHOT = "snapshot"

601

_STAGE_SNAPSHOT = "snapshot"

602

# trying to build a delta based of the previous revision

602

# trying to build a delta based of the previous revision

603

_STAGE_PREV = "prev"

603

_STAGE_PREV = "prev"

604

# trying to build a full snapshot

604

# trying to build a full snapshot

605

_STAGE_FULL = "full"

605

_STAGE_FULL = "full"

606

607

608

class _BaseDeltaSearch(abc.ABC):

608

class _BaseDeltaSearch(abc.ABC):

609

"""perform the search of a good delta for a single revlog revision

609

"""perform the search of a good delta for a single revlog revision

610

611

note: some of the deltacomputer.finddeltainfo logic should probably move

611

note: some of the deltacomputer.finddeltainfo logic should probably move

612

here.

612

here.

613

"""

613

"""

614

615

def __init__(

615

def __init__(

616

self,

616

self,

617

revlog,

617

revlog,

618

revinfo,

618

revinfo,

619

p1,

619

p1,

620

p2,

620

p2,

621

cachedelta,

621

cachedelta,

622

excluded_bases=None,

622

excluded_bases=None,

623

target_rev=None,

623

target_rev=None,

624

snapshot_cache=None,

624

snapshot_cache=None,

625

):

625

):

626

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

626

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

627

# so we should never end up asking such question. Adding the assert as

627

# so we should never end up asking such question. Adding the assert as

628

# a safe-guard to detect anything that would be fishy in this regard.

628

# a safe-guard to detect anything that would be fishy in this regard.

629

assert (

629

assert (

630

cachedelta is None

630

cachedelta is None

631

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

631

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

632

or not revlog.delta_config.general_delta

632

or not revlog.delta_config.general_delta

633

)

633

)

634

self.revlog = revlog

634

self.revlog = revlog

635

self.revinfo = revinfo

635

self.revinfo = revinfo

636

self.textlen = revinfo.textlen

636

self.textlen = revinfo.textlen

637

self.p1 = p1

637

self.p1 = p1

638

self.p2 = p2

638

self.p2 = p2

639

self.cachedelta = cachedelta

639

self.cachedelta = cachedelta

640

self.excluded_bases = excluded_bases

640

self.excluded_bases = excluded_bases

641

if target_rev is None:

641

if target_rev is None:

642

self.target_rev = len(self.revlog)

642

self.target_rev = len(self.revlog)

643

self.target_rev = target_rev

643

self.target_rev = target_rev

644

if snapshot_cache is None:

644

if snapshot_cache is None:

645

# map: base-rev: [snapshot-revs]

645

# map: base-rev: [snapshot-revs]

646

snapshot_cache = SnapshotCache()

646

snapshot_cache = SnapshotCache()

647

self.snapshot_cache = snapshot_cache

647

self.snapshot_cache = snapshot_cache

648

649

self.tested = {nullrev}

649

self.tested = {nullrev}

650

651

self.current_stage = _STAGE_UNSPECIFIED

651

self.current_stage = _STAGE_UNSPECIFIED

652

self.current_group = None

652

self.current_group = None

653

self._init_group()

653

self._init_group()

654

655

def is_good_delta_info(self, deltainfo):

655

def is_good_delta_info(self, deltainfo):

656

"""Returns True if the given delta is good.

656

"""Returns True if the given delta is good.

657

658

Good means that it is within the disk span, disk size, and chain length

658

Good means that it is within the disk span, disk size, and chain length

659

bounds that we know to be performant.

659

bounds that we know to be performant.

660

"""

660

"""

661

if not self._is_good_delta_info_universal(deltainfo):

661

if not self._is_good_delta_info_universal(deltainfo):

662

return False

662

return False

663

if not self._is_good_delta_info_chain_quality(deltainfo):

663

if not self._is_good_delta_info_chain_quality(deltainfo):

664

return False

664

return False

665

return True

665

return True

666

667

def _is_good_delta_info_universal(self, deltainfo):

667

def _is_good_delta_info_universal(self, deltainfo):

668

"""Returns True if the given delta is good.

668

"""Returns True if the given delta is good.

669

670

This performs generic checks needed by all format variants.

670

This performs generic checks needed by all format variants.

671

672

This is used by is_good_delta_info.

672

This is used by is_good_delta_info.

673

"""

673

"""

674

675

if deltainfo is None:

675

if deltainfo is None:

676

return False

676

return False

677

678

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

678

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

679

# so we should never end up asking such question. Adding the assert as

679

# so we should never end up asking such question. Adding the assert as

680

# a safe-guard to detect anything that would be fishy in this regard.

680

# a safe-guard to detect anything that would be fishy in this regard.

681

assert (

681

assert (

682

self.revinfo.cachedelta is None

682

self.revinfo.cachedelta is None

683

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

683

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

684

or not self.revlog.delta_config.general_delta

684

or not self.revlog.delta_config.general_delta

685

)

685

)

686

687

# Bad delta from new delta size:

687

# Bad delta from new delta size:

688

#

688

#

689

# If the delta size is larger than the target text, storing the delta

689

# If the delta size is larger than the target text, storing the delta

690

# will be inefficient.

690

# will be inefficient.

691

if self.revinfo.textlen < deltainfo.deltalen:

691

if self.revinfo.textlen < deltainfo.deltalen:

692

return False

692

return False

693

694

return True

694

return True

695

696

def _is_good_delta_info_chain_quality(self, deltainfo):

696

def _is_good_delta_info_chain_quality(self, deltainfo):

697

"""Returns True if the chain associated with the delta is good.

697

"""Returns True if the chain associated with the delta is good.

698

699

This performs checks for format that use delta chains.

699

This performs checks for format that use delta chains.

700

701

This is used by is_good_delta_info.

701

This is used by is_good_delta_info.

702

"""

702

"""

703

# - 'deltainfo.distance' is the distance from the base revision --

703

# - 'deltainfo.distance' is the distance from the base revision --

704

# bounding it limits the amount of I/O we need to do.

704

# bounding it limits the amount of I/O we need to do.

705

706

defaultmax = self.revinfo.textlen * 4

706

defaultmax = self.revinfo.textlen * 4

707

maxdist = self.revlog.delta_config.max_deltachain_span

707

maxdist = self.revlog.delta_config.max_deltachain_span

708

if not maxdist:

708

if not maxdist:

709

maxdist = deltainfo.distance # ensure the conditional pass

709

maxdist = deltainfo.distance # ensure the conditional pass

710

maxdist = max(maxdist, defaultmax)

710

maxdist = max(maxdist, defaultmax)

711

712

# Bad delta from read span:

712

# Bad delta from read span:

713

#

713

#

714

# If the span of data read is larger than the maximum allowed.

714

# If the span of data read is larger than the maximum allowed.

715

#

715

#

716

# In the sparse-revlog case, we rely on the associated "sparse

716

# In the sparse-revlog case, we rely on the associated "sparse

717

# reading" to avoid issue related to the span of data. In theory, it

717

# reading" to avoid issue related to the span of data. In theory, it

718

# would be possible to build pathological revlog where delta pattern

718

# would be possible to build pathological revlog where delta pattern

719

# would lead to too many reads. However, they do not happen in

719

# would lead to too many reads. However, they do not happen in

720

# practice at all. So we skip the span check entirely.

720

# practice at all. So we skip the span check entirely.

721

if (

721

if (

722

not self.revlog.delta_config.sparse_revlog

722

not self.revlog.delta_config.sparse_revlog

723

and maxdist < deltainfo.distance

723

and maxdist < deltainfo.distance

724

):

724

):

725

return False

725

return False

726

727

# Bad delta from cumulated payload size:

727

# Bad delta from cumulated payload size:

728

#

728

#

729

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

729

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

730

# deltas we need to apply -- bounding it limits the amount of CPU

730

# deltas we need to apply -- bounding it limits the amount of CPU

731

# we consume.

731

# we consume.

732

max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT

732

max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT

733

# If the sum of delta get larger than K * target text length.

733

# If the sum of delta get larger than K * target text length.

734

if max_chain_data < deltainfo.compresseddeltalen:

734

if max_chain_data < deltainfo.compresseddeltalen:

735

return False

735

return False

736

737

# Bad delta from chain length:

737

# Bad delta from chain length:

738

#

738

#

739

# If the number of delta in the chain gets too high.

739

# If the number of delta in the chain gets too high.

740

if (

740

if (

741

self.revlog.delta_config.max_chain_len

741

self.revlog.delta_config.max_chain_len

742

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

742

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

743

):

743

):

744

return False

744

return False

745

return True

745

return True

746

747

@property

747

@property

748

def done(self):

748

def done(self):

749

"""True when all possible candidate have been tested"""

749

"""True when all possible candidate have been tested"""

750

return self.current_group is None

750

return self.current_group is None

751

752

@abc.abstractmethod

752

@abc.abstractmethod

753

def next_group(self, good_delta=None):

753

def next_group(self, good_delta=None):

754

"""move to the next group to test

754

"""move to the next group to test

755

756

The group of revision to test will be available in

756

The group of revision to test will be available in

757

`self.current_group`. If the previous group had any good delta, the

757

`self.current_group`. If the previous group had any good delta, the

758

best one can be passed as the `good_delta` parameter to help selecting

758

best one can be passed as the `good_delta` parameter to help selecting

759

the next group.

759

the next group.

760

761

If not revision remains to be, `self.done` will be True and

761

If not revision remains to be, `self.done` will be True and

762

`self.current_group` will be None.

762

`self.current_group` will be None.

763

"""

763

"""

764

pass

764

pass

765

766

@abc.abstractmethod

766

@abc.abstractmethod

767

def _init_group(self):

767

def _init_group(self):

768

pass

768

pass

769

770

771

class _NoDeltaSearch(_BaseDeltaSearch):

771

class _NoDeltaSearch(_BaseDeltaSearch):

772

"""Search for no delta.

772

"""Search for no delta.

773

774

This search variant is to be used in case where we should not store delta.

774

This search variant is to be used in case where we should not store delta.

775

"""

775

"""

776

777

def _init_group(self):

777

def _init_group(self):

778

self.current_stage = _STAGE_FULL

778

self.current_stage = _STAGE_FULL

779

780

def next_group(self, good_delta=None):

780

def next_group(self, good_delta=None):

781

pass

781

pass

782

783

784

class _PrevDeltaSearch(_BaseDeltaSearch):

784

class _PrevDeltaSearch(_BaseDeltaSearch):

785

"""Search for delta against the previous revision only

785

"""Search for delta against the previous revision only

786

787

This search variant is to be used when the format does not allow for delta

787

This search variant is to be used when the format does not allow for delta

788

against arbitrary bases.

788

against arbitrary bases.

789

"""

789

"""

790

791

def _init_group(self):

791

def _init_group(self):

792

self.current_stage = _STAGE_PREV

792

self.current_stage = _STAGE_PREV

793

self.current_group = [self.target_rev - 1]

793

self.current_group = [self.target_rev - 1]

794

self.tested.update(self.current_group)

794

self.tested.update(self.current_group)

795

796

def next_group(self, good_delta=None):

796

def next_group(self, good_delta=None):

797

self.current_stage = _STAGE_FULL

797

self.current_stage = _STAGE_FULL

798

self.current_group = None

798

self.current_group = None

799

800

801

class _GeneralDeltaSearch(_BaseDeltaSearch):

801

class _GeneralDeltaSearch(_BaseDeltaSearch):

802

"""Delta search variant for general-delta repository"""

802

"""Delta search variant for general-delta repository"""

803

804

def _init_group(self):

804

def _init_group(self):

805

# Why search for delta base if we cannot use a delta base ?

805

# Why search for delta base if we cannot use a delta base ?

806

# also see issue6056

806

# also see issue6056

807

assert self.revlog.delta_config.general_delta

807

assert self.revlog.delta_config.general_delta

808

self._candidates_iterator = self._iter_groups()

808

self._candidates_iterator = self._iter_groups()

809

self._last_good = None

809

self._last_good = None

810

if (

810

if (

811

self.cachedelta is not None

811

self.cachedelta is not None

812

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

812

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

813

and self._pre_filter_rev(self.cachedelta[0])

813

and self._pre_filter_rev(self.cachedelta[0])

814

):

814

):

815

# First we try to reuse a the delta contained in the bundle. (or from

815

# First we try to reuse a the delta contained in the bundle. (or from

816

# the source revlog)

816

# the source revlog)

817

#

817

#

818

# This logic only applies to general delta repositories and can be

818

# This logic only applies to general delta repositories and can be

819

# disabled through configuration. Disabling reuse source delta is

819

# disabled through configuration. Disabling reuse source delta is

820

# useful when we want to make sure we recomputed "optimal" deltas.

820

# useful when we want to make sure we recomputed "optimal" deltas.

821

self.current_stage = _STAGE_CACHED

821

self.current_stage = _STAGE_CACHED

822

self._internal_group = (self.cachedelta[0],)

822

self._internal_group = (self.cachedelta[0],)

823

self._internal_idx = 0

823

self._internal_idx = 0

824

self.current_group = self._internal_group

824

self.current_group = self._internal_group

825

self.tested.update(self.current_group)

825

self.tested.update(self.current_group)

826

else:

826

else:

827

self._next_internal_group()

827

self._next_internal_group()

828

829

def _next_internal_group(self):

829

def _next_internal_group(self):

830

# self._internal_group can be larger than self.current_group

830

# self._internal_group can be larger than self.current_group

831

self._internal_idx = 0

831

self._internal_idx = 0

832

group = self._candidates_iterator.send(self._last_good)

832

group = self._candidates_iterator.send(self._last_good)

833

if group is not None:

833

if group is not None:

834

group = self._pre_filter_candidate_revs(group)

834

group = self._pre_filter_candidate_revs(group)

835

self._internal_group = group

835

self._internal_group = group

836

if self._internal_group is None:

836

if self._internal_group is None:

837

self.current_group = None

837

self.current_group = None

838

elif len(self._internal_group) == 0:

838

elif len(self._internal_group) == 0:

839

self.next_group()

839

self.next_group()

840

else:

840

else:

841

chunk_size = self.revlog.delta_config.candidate_group_chunk_size

841

chunk_size = self.revlog.delta_config.candidate_group_chunk_size

842

if chunk_size > 0:

842

if chunk_size > 0:

843

self.current_group = self._internal_group[:chunk_size]

843

self.current_group = self._internal_group[:chunk_size]

844

self._internal_idx += chunk_size

844

self._internal_idx += chunk_size

845

else:

845

else:

846

self.current_group = self._internal_group

846

self.current_group = self._internal_group

847

self._internal_idx += len(self.current_group)

847

self._internal_idx += len(self.current_group)

848

849

self.tested.update(self.current_group)

849

self.tested.update(self.current_group)

850

851

def next_group(self, good_delta=None):

851

def next_group(self, good_delta=None):

852

old_good = self._last_good

852

old_good = self._last_good

853

if good_delta is not None:

853

if good_delta is not None:

854

self._last_good = good_delta.base

854

self._last_good = good_delta.base

855

if self.current_stage == _STAGE_CACHED and good_delta is not None:

855

if self.current_stage == _STAGE_CACHED and good_delta is not None:

856

# the cache is good, let us use the cache as requested

856

# the cache is good, let us use the cache as requested

857

self._candidates_iterator = None

857

self._candidates_iterator = None

858

self._internal_group = None

858

self._internal_group = None

859

self._internal_idx = None

859

self._internal_idx = None

860

self.current_group = None

860

self.current_group = None

861

return

861

return

862

863

if (self._internal_idx < len(self._internal_group)) and (

863

if (self._internal_idx < len(self._internal_group)) and (

864

old_good != good_delta

864

old_good != good_delta

865

):

865

):

866

# When the size of the candidate group is big, it can result in

866

# When the size of the candidate group is big, it can result in

867

# a quite significant performance impact. To reduce this, we

867

# a quite significant performance impact. To reduce this, we

868

# can send them in smaller batches until the new batch does not

868

# can send them in smaller batches until the new batch does not

869

# provide any improvements.

869

# provide any improvements.

870

#

870

#

871

# This might reduce the overall efficiency of the compression

871

# This might reduce the overall efficiency of the compression

872

# in some corner cases, but that should also prevent very

872

# in some corner cases, but that should also prevent very

873

# pathological cases from being an issue. (eg. 20 000

873

# pathological cases from being an issue. (eg. 20 000

874

# candidates).

874

# candidates).

875

#

875

#

876

# XXX note that the ordering of the group becomes important as

876

# XXX note that the ordering of the group becomes important as

877

# it now impacts the final result. The current order is

877

# it now impacts the final result. The current order is

878

# unprocessed and can be improved.

878

# unprocessed and can be improved.

879

next_idx = self._internal_idx + self._group_chunk_size

879

next_idx = self._internal_idx + self._group_chunk_size

880

self.current_group = self._internal_group[

880

self.current_group = self._internal_group[

881

self._internal_idx : next_idx

881

self._internal_idx : next_idx

882

]

882

]

883

self.tested.update(self.current_group)

883

self.tested.update(self.current_group)

884

self._internal_idx = next_idx

884

self._internal_idx = next_idx

885

else:

885

else:

886

self._next_internal_group()

886

self._next_internal_group()

887

888

def _pre_filter_candidate_revs(self, temptative):

888

def _pre_filter_candidate_revs(self, temptative):

889

"""filter possible candidate before computing a delta

889

"""filter possible candidate before computing a delta

890

891

This function use various criteria to pre-filter candidate delta base

891

This function use various criteria to pre-filter candidate delta base

892

before we compute a delta and evaluate its quality.

892

before we compute a delta and evaluate its quality.

893

894

Such pre-filter limit the number of computed delta, an expensive operation.

894

Such pre-filter limit the number of computed delta, an expensive operation.

895

896

return the updated list of revision to test

896

return the updated list of revision to test

897

"""

897

"""

898

deltalength = self.revlog.length

898

deltalength = self.revlog.length

899

deltaparent = self.revlog.deltaparent

899

deltaparent = self.revlog.deltaparent

900

901

tested = self.tested

901

tested = self.tested

902

group = []

902

group = []

903

for rev in temptative:

903

for rev in temptative:

904

# skip over empty delta (no need to include them in a chain)

904

# skip over empty delta (no need to include them in a chain)

905

while not (rev == nullrev or rev in tested or deltalength(rev)):

905

while not (rev == nullrev or rev in tested or deltalength(rev)):

906

tested.add(rev)

906

tested.add(rev)

907

rev = deltaparent(rev)

907

rev = deltaparent(rev)

908

if self._pre_filter_rev(rev):

908

if self._pre_filter_rev(rev):

909

group.append(rev)

909

group.append(rev)

910

else:

910

else:

911

self.tested.add(rev)

911

self.tested.add(rev)

912

return group

912

return group

913

914

def _pre_filter_rev_universal(self, rev):

914

def _pre_filter_rev_universal(self, rev):

915

"""pre filtering that is need in all cases.

915

"""pre filtering that is need in all cases.

916

917

return True if it seems okay to test a rev, False otherwise.

917

return True if it seems okay to test a rev, False otherwise.

918

919

used by _pre_filter_rev.

919

used by _pre_filter_rev.

920

"""

920

"""

921

# no need to try a delta against nullrev, this will be done as

921

# no need to try a delta against nullrev, this will be done as

922

# a last resort.

922

# a last resort.

923

if rev == nullrev:

923

if rev == nullrev:

924

return False

924

return False

925

# filter out revision we tested already

925

# filter out revision we tested already

926

if rev in self.tested:

926

if rev in self.tested:

927

return False

927

return False

928

929

# an higher authority deamed the base unworthy (e.g. censored)

929

# an higher authority deamed the base unworthy (e.g. censored)

930

if self.excluded_bases is not None and rev in self.excluded_bases:

930

if self.excluded_bases is not None and rev in self.excluded_bases:

931

return False

931

return False

932

# We are in some recomputation cases and that rev is too high

932

# We are in some recomputation cases and that rev is too high

933

# in the revlog

933

# in the revlog

934

if self.target_rev is not None and rev >= self.target_rev:

934

if self.target_rev is not None and rev >= self.target_rev:

935

return False

935

return False

936

# no delta for rawtext-changing revs (see "candelta" for why)

936

# no delta for rawtext-changing revs (see "candelta" for why)

937

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

937

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

938

return False

938

return False

939

return True

939

return True

940

941

def _pre_filter_rev_sparse(self, rev):

942

"""pre filtering that is needed in sparse revlog cases

943

944

return True if it seems okay to test a rev, False otherwise.

945

946

used by _pre_filter_rev.

947

"""

948

assert self.revlog.delta_config.sparse_revlog

949

# if the revision we test again is too small, the resulting delta

950

# will be large anyway as that amount of data to be added is big

951

if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):

952

return False

953

954

if self.revlog.delta_config.upper_bound_comp is not None:

955

maxcomp = self.revlog.delta_config.upper_bound_comp

956

basenotsnap = (self.p1, self.p2, nullrev)

957

if rev not in basenotsnap and self.revlog.issnapshot(rev):

958

snapshotdepth = self.revlog.snapshotdepth(rev)

959

# If text is significantly larger than the base, we can

960

# expect the resulting delta to be proportional to the size

961

# difference

962

revsize = self.revlog.rawsize(rev)

963

rawsizedistance = max(self.textlen - revsize, 0)

964

# use an estimate of the compression upper bound.

965

lowestrealisticdeltalen = rawsizedistance // maxcomp

966

967

# check the absolute constraint on the delta size

968

snapshotlimit = self.textlen >> snapshotdepth

969

if snapshotlimit < lowestrealisticdeltalen:

970

# delta lower bound is larger than accepted upper

971

# bound

972

return False

973

974

# check the relative constraint on the delta size

975

revlength = self.revlog.length(rev)

976

if revlength < lowestrealisticdeltalen:

977

# delta probable lower bound is larger than target

978

# base

979

return False

980

return True

981

982

def _pre_filter_rev_delta_chain(self, rev):

941

def _pre_filter_rev_delta_chain(self, rev):

983

"""pre filtering that is needed in sparse revlog cases

942

"""pre filtering that is needed in sparse revlog cases

984

943

985

return True if it seems okay to test a rev, False otherwise.

944

return True if it seems okay to test a rev, False otherwise.

986

945

987

used by _pre_filter_rev.

946

used by _pre_filter_rev.

988

"""

947

"""

989

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

948

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

990

# filter out delta base that will never produce good delta

949

# filter out delta base that will never produce good delta

991

#

950

#

992

# if the delta of that base is already bigger than the limit

951

# if the delta of that base is already bigger than the limit

993

# for the delta chain size, doing a delta is hopeless.

952

# for the delta chain size, doing a delta is hopeless.

994

if deltas_limit < self.revlog.length(rev):

953

if deltas_limit < self.revlog.length(rev):

995

return False

954

return False

996

955

997

# If we reach here, we are about to build and test a delta.

956

# If we reach here, we are about to build and test a delta.

998

# The delta building process will compute the chaininfo in all

957

# The delta building process will compute the chaininfo in all

999

# case, since that computation is cached, it is fine to access

958

# case, since that computation is cached, it is fine to access

1000

# it here too.

959

# it here too.

1001

chainlen, chainsize = self.revlog._chaininfo(rev)

960

chainlen, chainsize = self.revlog._chaininfo(rev)

1002

# if chain will be too long, skip base

961

# if chain will be too long, skip base

1003

if (

962

if (

1004

self.revlog.delta_config.max_chain_len

963

self.revlog.delta_config.max_chain_len

1005

and chainlen >= self.revlog.delta_config.max_chain_len

964

and chainlen >= self.revlog.delta_config.max_chain_len

1006

):

965

):

1007

return False

966

return False

1008

# if chain already have too much data, skip base

967

# if chain already have too much data, skip base

1009

if deltas_limit < chainsize:

968

if deltas_limit < chainsize:

1010

return False

969

return False

1011

return True

970

return True

1012

971

1013

def _pre_filter_rev(self, rev):

972

def _pre_filter_rev(self, rev):

1014

"""return True if it seems okay to test a rev, False otherwise"""

973

"""return True if it seems okay to test a rev, False otherwise"""

1015

if not self._pre_filter_rev_universal(rev):

974

if not self._pre_filter_rev_universal(rev):

1016

return False

975

return False

1017

if not self._pre_filter_rev_delta_chain(rev):

976

if not self._pre_filter_rev_delta_chain(rev):

1018

return False

977

return False

1019

if self.revlog.delta_config.sparse_revlog:

1020

if not self._pre_filter_rev_sparse(rev):

1021

return False

1022

1023

return True

978

return True

1024

979

1025

def _iter_parents(self):

980

def _iter_parents(self):

1026

# exclude already lazy tested base if any

981

# exclude already lazy tested base if any

1027

parents = [p for p in (self.p1, self.p2) if p != nullrev]

982

parents = [p for p in (self.p1, self.p2) if p != nullrev]

1028

983

1029

self.current_stage = _STAGE_PARENTS

984

self.current_stage = _STAGE_PARENTS

1030

if (

985

if (

1031

not self.revlog.delta_config.delta_both_parents

986

not self.revlog.delta_config.delta_both_parents

1032

and len(parents) == 2

987

and len(parents) == 2

1033

):

988

):

1034

parents.sort()

989

parents.sort()

1035

# To minimize the chance of having to build a fulltext,

990

# To minimize the chance of having to build a fulltext,

1036

# pick first whichever parent is closest to us (max rev)

991

# pick first whichever parent is closest to us (max rev)

1037

yield (parents[1],)

992

yield (parents[1],)

1038

# then the other one (min rev) if the first did not fit

993

# then the other one (min rev) if the first did not fit

1039

yield (parents[0],)

994

yield (parents[0],)

1040

elif len(parents) > 0:

995

elif len(parents) > 0:

1041

# Test all parents (1 or 2), and keep the best candidate

996

# Test all parents (1 or 2), and keep the best candidate

1042

yield parents

997

yield parents

1043

998

1044

def _iter_prev(self):

999

def _iter_prev(self):

1045

# other approach failed try against prev to hopefully save us a

1000

# other approach failed try against prev to hopefully save us a

1046

# fulltext.

1001

# fulltext.

1047

self.current_stage = _STAGE_PREV

1002

self.current_stage = _STAGE_PREV

1048

yield (self.target_rev - 1,)

1003

yield (self.target_rev - 1,)

1049

1004

1050

def _iter_groups(self):

1005

def _iter_groups(self):

1051

good = None

1006

good = None

1052

for group in self._iter_parents():

1007

for group in self._iter_parents():

1053

good = yield group

1008

good = yield group

1054

if good is not None:

1009

if good is not None:

1055

break

1010

break

1056

else:

1011

else:

1057

assert good is None

1012

assert good is None

1058

yield from self._iter_prev()

1013

yield from self._iter_prev()

1059

yield None

1014

yield None

1060

1015

1061

1016

1062

class _SparseDeltaSearch(_GeneralDeltaSearch):

1017

class _SparseDeltaSearch(_GeneralDeltaSearch):

1063

"""Delta search variants for sparse-revlog"""

1018

"""Delta search variants for sparse-revlog"""

1064

1019

1065

def is_good_delta_info(self, deltainfo):

1020

def is_good_delta_info(self, deltainfo):

1066

"""Returns True if the given delta is good.

1021

"""Returns True if the given delta is good.

1067

1022

1068

Good means that it is within the disk span, disk size, and chain length

1023

Good means that it is within the disk span, disk size, and chain length

1069

bounds that we know to be performant.

1024

bounds that we know to be performant.

1070

"""

1025

"""

1071

if not self._is_good_delta_info_universal(deltainfo):

1026

if not self._is_good_delta_info_universal(deltainfo):

1072

return False

1027

return False

1073

if not self._is_good_delta_info_chain_quality(deltainfo):

1028

if not self._is_good_delta_info_chain_quality(deltainfo):

1074

return False

1029

return False

1075

if not self._is_good_delta_info_snapshot_constraints(deltainfo):

1030

if not self._is_good_delta_info_snapshot_constraints(deltainfo):

1076

return False

1031

return False

1077

return True

1032

return True

1078

1033

1079

def _is_good_delta_info_snapshot_constraints(self, deltainfo):

1034

def _is_good_delta_info_snapshot_constraints(self, deltainfo):

1080

"""Returns True if the chain associated with snapshots

1035

"""Returns True if the chain associated with snapshots

1081

1036

1082

This performs checks for format that use sparse-revlog and intermediate

1037

This performs checks for format that use sparse-revlog and intermediate

1083

snapshots.

1038

snapshots.

1084

1039

1085

This is used by is_good_delta_info.

1040

This is used by is_good_delta_info.

1086

"""

1041

"""

1087

# if not a snapshot, this method has no filtering to do

1042

# if not a snapshot, this method has no filtering to do

1088

if deltainfo.snapshotdepth is None:

1043

if deltainfo.snapshotdepth is None:

1089

return True

1044

return True

1090

# bad delta from intermediate snapshot size limit

1045

# bad delta from intermediate snapshot size limit

1091

#

1046

#

1092

# If an intermediate snapshot size is higher than the limit. The

1047

# If an intermediate snapshot size is higher than the limit. The

1093

# limit exist to prevent endless chain of intermediate delta to be

1048

# limit exist to prevent endless chain of intermediate delta to be

1094

# created.

1049

# created.

1095

if (

1050

if (

1096

self.revinfo.textlen >> deltainfo.snapshotdepth

1051

self.revinfo.textlen >> deltainfo.snapshotdepth

1097

) < deltainfo.deltalen:

1052

) < deltainfo.deltalen:

1098

return False

1053

return False

1099

1054

1100

# bad delta if new intermediate snapshot is larger than the previous

1055

# bad delta if new intermediate snapshot is larger than the previous

1101

# snapshot

1056

# snapshot

1102

if self.revlog.length(deltainfo.base) < deltainfo.deltalen:

1057

if self.revlog.length(deltainfo.base) < deltainfo.deltalen:

1103

return False

1058

return False

1104

1059

1105

return True

1060

return True

1106

1061

1062

def _pre_filter_rev(self, rev):

1063

"""return True if it seems okay to test a rev, False otherwise"""

1064

if not self._pre_filter_rev_universal(rev):

1065

return False

1066

if not self._pre_filter_rev_delta_chain(rev):

1067

return False

1068

if not self._pre_filter_rev_sparse(rev):

1069

return False

1070

return True

1071

1072

def _pre_filter_rev_sparse(self, rev):

1073

"""pre filtering that is needed in sparse revlog cases

1074

1075

return True if it seems okay to test a rev, False otherwise.

1076

1077

used by _pre_filter_rev.

1078

"""

1079

assert self.revlog.delta_config.sparse_revlog

1080

# if the revision we test again is too small, the resulting delta

1081

# will be large anyway as that amount of data to be added is big

1082

if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):

1083

return False

1084

1085

if self.revlog.delta_config.upper_bound_comp is not None:

1086

maxcomp = self.revlog.delta_config.upper_bound_comp

1087

basenotsnap = (self.p1, self.p2, nullrev)

1088

if rev not in basenotsnap and self.revlog.issnapshot(rev):

1089

snapshotdepth = self.revlog.snapshotdepth(rev)

1090

# If text is significantly larger than the base, we can

1091

# expect the resulting delta to be proportional to the size

1092

# difference

1093

revsize = self.revlog.rawsize(rev)

1094

rawsizedistance = max(self.textlen - revsize, 0)

1095

# use an estimate of the compression upper bound.

1096

lowestrealisticdeltalen = rawsizedistance // maxcomp

1097

1098

# check the absolute constraint on the delta size

1099

snapshotlimit = self.textlen >> snapshotdepth

1100

if snapshotlimit < lowestrealisticdeltalen:

1101

# delta lower bound is larger than accepted upper

1102

# bound

1103

return False

1104

1105

# check the relative constraint on the delta size

1106

revlength = self.revlog.length(rev)

1107

if revlength < lowestrealisticdeltalen:

1108

# delta probable lower bound is larger than target

1109

# base

1110

return False

1111

return True

1112

1107

def _iter_snapshots_base(self):

1113

def _iter_snapshots_base(self):

1108

assert self.revlog.delta_config.sparse_revlog

1114

assert self.revlog.delta_config.sparse_revlog

1109

assert self.current_stage == _STAGE_SNAPSHOT

1115

assert self.current_stage == _STAGE_SNAPSHOT

1110

prev = self.target_rev - 1

1116

prev = self.target_rev - 1

1111

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

1117

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

1112

1118

1113

parents = [p for p in (self.p1, self.p2) if p != nullrev]

1119

parents = [p for p in (self.p1, self.p2) if p != nullrev]

1114

if not parents:

1120

if not parents:

1115

return

1121

return

1116

# See if we can use an existing snapshot in the parent chains to

1122

# See if we can use an existing snapshot in the parent chains to

1117

# use as a base for a new intermediate-snapshot

1123

# use as a base for a new intermediate-snapshot

1118

#

1124

#

1119

# search for snapshot in parents delta chain map: snapshot-level:

1125

# search for snapshot in parents delta chain map: snapshot-level:

1120

# snapshot-rev

1126

# snapshot-rev

1121

parents_snaps = collections.defaultdict(set)

1127

parents_snaps = collections.defaultdict(set)

1122

candidate_chains = [deltachain(p) for p in parents]

1128

candidate_chains = [deltachain(p) for p in parents]

1123

for chain in candidate_chains:

1129

for chain in candidate_chains:

1124

for idx, s in enumerate(chain):

1130

for idx, s in enumerate(chain):

1125

if not self.revlog.issnapshot(s):

1131

if not self.revlog.issnapshot(s):

1126

break

1132

break

1127

parents_snaps[idx].add(s)

1133

parents_snaps[idx].add(s)

1128

snapfloor = min(parents_snaps[0]) + 1

1134

snapfloor = min(parents_snaps[0]) + 1

1129

self.snapshot_cache.update(self.revlog, snapfloor)

1135

self.snapshot_cache.update(self.revlog, snapfloor)

1130

# search for the highest "unrelated" revision

1136

# search for the highest "unrelated" revision

1131

#

1137

#

1132

# Adding snapshots used by "unrelated" revision increase the odd we

1138

# Adding snapshots used by "unrelated" revision increase the odd we

1133

# reuse an independant, yet better snapshot chain.

1139

# reuse an independant, yet better snapshot chain.

1134

#

1140

#

1135

# XXX instead of building a set of revisions, we could lazily

1141

# XXX instead of building a set of revisions, we could lazily

1136

# enumerate over the chains. That would be more efficient, however

1142

# enumerate over the chains. That would be more efficient, however

1137

# we stick to simple code for now.

1143

# we stick to simple code for now.

1138

all_revs = set()

1144

all_revs = set()

1139

for chain in candidate_chains:

1145

for chain in candidate_chains:

1140

all_revs.update(chain)

1146

all_revs.update(chain)

1141

other = None

1147

other = None

1142

for r in self.revlog.revs(prev, snapfloor):

1148

for r in self.revlog.revs(prev, snapfloor):

1143

if r not in all_revs:

1149

if r not in all_revs:

1144

other = r

1150

other = r

1145

break

1151

break

1146

if other is not None:

1152

if other is not None:

1147

# To avoid unfair competition, we won't use unrelated

1153

# To avoid unfair competition, we won't use unrelated

1148

# intermediate snapshot that are deeper than the ones from the

1154

# intermediate snapshot that are deeper than the ones from the

1149

# parent delta chain.

1155

# parent delta chain.

1150

max_depth = max(parents_snaps.keys())

1156

max_depth = max(parents_snaps.keys())

1151

chain = deltachain(other)

1157

chain = deltachain(other)

1152

for depth, s in enumerate(chain):

1158

for depth, s in enumerate(chain):

1153

if s < snapfloor:

1159

if s < snapfloor:

1154

continue

1160

continue

1155

if max_depth < depth:

1161

if max_depth < depth:

1156

break

1162

break

1157

if not self.revlog.issnapshot(s):

1163

if not self.revlog.issnapshot(s):

1158

break

1164

break

1159

parents_snaps[depth].add(s)

1165

parents_snaps[depth].add(s)

1160

# Test them as possible intermediate snapshot base We test them

1166

# Test them as possible intermediate snapshot base We test them

1161

# from highest to lowest level. High level one are more likely to

1167

# from highest to lowest level. High level one are more likely to

1162

# result in small delta

1168

# result in small delta

1163

floor = None

1169

floor = None

1164

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

1170

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

1165

siblings = set()

1171

siblings = set()

1166

for s in snaps:

1172

for s in snaps:

1167

siblings.update(self.snapshot_cache.snapshots[s])

1173

siblings.update(self.snapshot_cache.snapshots[s])

1168

# Before considering making a new intermediate snapshot, we

1174

# Before considering making a new intermediate snapshot, we

1169

# check if an existing snapshot, children of base we consider,

1175

# check if an existing snapshot, children of base we consider,

1170

# would be suitable.

1176

# would be suitable.

1171

#

1177

#

1172

# It give a change to reuse a delta chain "unrelated" to the

1178

# It give a change to reuse a delta chain "unrelated" to the

1173

# current revision instead of starting our own. Without such

1179

# current revision instead of starting our own. Without such

1174

# re-use, topological branches would keep reopening new chains.

1180

# re-use, topological branches would keep reopening new chains.

1175

# Creating more and more snapshot as the repository grow.

1181

# Creating more and more snapshot as the repository grow.

1176

1182

1177

if floor is not None:

1183

if floor is not None:

1178

# We only do this for siblings created after the one in our

1184

# We only do this for siblings created after the one in our

1179

# parent's delta chain. Those created before has less

1185

# parent's delta chain. Those created before has less

1180

# chances to be valid base since our ancestors had to

1186

# chances to be valid base since our ancestors had to

1181

# create a new snapshot.

1187

# create a new snapshot.

1182

siblings = [r for r in siblings if floor < r]

1188

siblings = [r for r in siblings if floor < r]

1183

yield tuple(sorted(siblings))

1189

yield tuple(sorted(siblings))

1184

# then test the base from our parent's delta chain.

1190

# then test the base from our parent's delta chain.

1185

yield tuple(sorted(snaps))

1191

yield tuple(sorted(snaps))

1186

floor = min(snaps)

1192

floor = min(snaps)

1187

# No suitable base found in the parent chain, search if any full

1193

# No suitable base found in the parent chain, search if any full

1188

# snapshots emitted since parent's base would be a suitable base

1194

# snapshots emitted since parent's base would be a suitable base

1189

# for an intermediate snapshot.

1195

# for an intermediate snapshot.

1190

#

1196

#

1191

# It give a chance to reuse a delta chain unrelated to the current

1197

# It give a chance to reuse a delta chain unrelated to the current

1192

# revisions instead of starting our own. Without such re-use,

1198

# revisions instead of starting our own. Without such re-use,

1193

# topological branches would keep reopening new full chains.

1199

# topological branches would keep reopening new full chains.

1194

# Creating more and more snapshot as the repository grow.

1200

# Creating more and more snapshot as the repository grow.

1195

full = [

1201

full = [

1196

r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r

1202

r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r

1197

]

1203

]

1198

yield tuple(sorted(full))

1204

yield tuple(sorted(full))

1199

1205

1200

def _iter_snapshots(self):

1206

def _iter_snapshots(self):

1201

assert self.revlog.delta_config.sparse_revlog

1207

assert self.revlog.delta_config.sparse_revlog

1202

self.current_stage = _STAGE_SNAPSHOT

1208

self.current_stage = _STAGE_SNAPSHOT

1203

good = None

1209

good = None

1204

groups = self._iter_snapshots_base()

1210

groups = self._iter_snapshots_base()

1205

for candidates in groups:

1211

for candidates in groups:

1206

good = yield candidates

1212

good = yield candidates

1207

if good is not None:

1213

if good is not None:

1208

break

1214

break

1209

# if we have a refinable value, try to refine it

1215

# if we have a refinable value, try to refine it

1210

if (

1216

if (

1211

good is not None

1217

good is not None

1212

and good not in (self.p1, self.p2)

1218

and good not in (self.p1, self.p2)

1213

and self.revlog.issnapshot(good)

1219

and self.revlog.issnapshot(good)

1214

):

1220

):

1215

assert self.current_stage == _STAGE_SNAPSHOT

1221

assert self.current_stage == _STAGE_SNAPSHOT

1216

# refine snapshot down

1222

# refine snapshot down

1217

previous = None

1223

previous = None

1218

while previous != good:

1224

while previous != good:

1219

previous = good

1225

previous = good

1220

base = self.revlog.deltaparent(good)

1226

base = self.revlog.deltaparent(good)

1221

if base == nullrev:

1227

if base == nullrev:

1222

break

1228

break

1223

good = yield (base,)

1229

good = yield (base,)

1224

# refine snapshot up

1230

# refine snapshot up

1225

if not self.snapshot_cache.snapshots:

1231

if not self.snapshot_cache.snapshots:

1226

self.snapshot_cache.update(self.revlog, good + 1)

1232

self.snapshot_cache.update(self.revlog, good + 1)

1227

previous = None

1233

previous = None

1228

while good != previous:

1234

while good != previous:

1229

previous = good

1235

previous = good

1230

children = tuple(

1236

children = tuple(

1231

sorted(c for c in self.snapshot_cache.snapshots[good])

1237

sorted(c for c in self.snapshot_cache.snapshots[good])

1232

)

1238

)

1233

good = yield children

1239

good = yield children

1234

yield None

1240

yield None

1235

1241

1236

def _iter_groups(self):

1242

def _iter_groups(self):

1237

good = None

1243

good = None

1238

for group in self._iter_parents():

1244

for group in self._iter_parents():

1239

good = yield group

1245

good = yield group

1240

if good is not None:

1246

if good is not None:

1241

break

1247

break

1242

else:

1248

else:

1243

assert good is None

1249

assert good is None

1244

assert self.revlog.delta_config.sparse_revlog

1250

assert self.revlog.delta_config.sparse_revlog

1245

# If sparse revlog is enabled, we can try to refine the

1251

# If sparse revlog is enabled, we can try to refine the

1246

# available deltas

1252

# available deltas

1247

iter_snap = self._iter_snapshots()

1253

iter_snap = self._iter_snapshots()

1248

group = iter_snap.send(None)

1254

group = iter_snap.send(None)

1249

while group is not None:

1255

while group is not None:

1250

good = yield group

1256

good = yield group

1251

group = iter_snap.send(good)

1257

group = iter_snap.send(good)

1252

yield None

1258

yield None

1253

1259

1254

1260

1255

class SnapshotCache:

1261

class SnapshotCache:

1256

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1262

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1257

1263

1258

def __init__(self):

1264

def __init__(self):

1259

self.snapshots = collections.defaultdict(set)

1265

self.snapshots = collections.defaultdict(set)

1260

self._start_rev = None

1266

self._start_rev = None

1261

self._end_rev = None

1267

self._end_rev = None

1262

1268

1263

def update(self, revlog, start_rev=0):

1269

def update(self, revlog, start_rev=0):

1264

"""find snapshots from start_rev to tip"""

1270

"""find snapshots from start_rev to tip"""

1265

nb_revs = len(revlog)

1271

nb_revs = len(revlog)

1266

end_rev = nb_revs - 1

1272

end_rev = nb_revs - 1

1267

if start_rev > end_rev:

1273

if start_rev > end_rev:

1268

return # range is empty

1274

return # range is empty

1269

1275

1270

if self._start_rev is None:

1276

if self._start_rev is None:

1271

assert self._end_rev is None

1277

assert self._end_rev is None

1272

self._update(revlog, start_rev, end_rev)

1278

self._update(revlog, start_rev, end_rev)

1273

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1279

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1274

if start_rev < self._start_rev:

1280

if start_rev < self._start_rev:

1275

self._update(revlog, start_rev, self._start_rev - 1)

1281

self._update(revlog, start_rev, self._start_rev - 1)

1276

if self._end_rev < end_rev:

1282

if self._end_rev < end_rev:

1277

self._update(revlog, self._end_rev + 1, end_rev)

1283

self._update(revlog, self._end_rev + 1, end_rev)

1278

1284

1279

if self._start_rev is None:

1285

if self._start_rev is None:

1280

assert self._end_rev is None

1286

assert self._end_rev is None

1281

self._end_rev = end_rev

1287

self._end_rev = end_rev

1282

self._start_rev = start_rev

1288

self._start_rev = start_rev

1283

else:

1289

else:

1284

self._start_rev = min(self._start_rev, start_rev)

1290

self._start_rev = min(self._start_rev, start_rev)

1285

self._end_rev = max(self._end_rev, end_rev)

1291

self._end_rev = max(self._end_rev, end_rev)

1286

assert self._start_rev <= self._end_rev, (

1292

assert self._start_rev <= self._end_rev, (

1287

self._start_rev,

1293

self._start_rev,

1288

self._end_rev,

1294

self._end_rev,

1289

)

1295

)

1290

1296

1291

def _update(self, revlog, start_rev, end_rev):

1297

def _update(self, revlog, start_rev, end_rev):

1292

"""internal method that actually do update content"""

1298

"""internal method that actually do update content"""

1293

assert self._start_rev is None or (

1299

assert self._start_rev is None or (

1294

start_rev < self._start_rev or start_rev > self._end_rev

1300

start_rev < self._start_rev or start_rev > self._end_rev

1295

), (self._start_rev, self._end_rev, start_rev, end_rev)

1301

), (self._start_rev, self._end_rev, start_rev, end_rev)

1296

assert self._start_rev is None or (

1302

assert self._start_rev is None or (

1297

end_rev < self._start_rev or end_rev > self._end_rev

1303

end_rev < self._start_rev or end_rev > self._end_rev

1298

), (self._start_rev, self._end_rev, start_rev, end_rev)

1304

), (self._start_rev, self._end_rev, start_rev, end_rev)

1299

cache = self.snapshots

1305

cache = self.snapshots

1300

if hasattr(revlog.index, 'findsnapshots'):

1306

if hasattr(revlog.index, 'findsnapshots'):

1301

revlog.index.findsnapshots(cache, start_rev, end_rev)

1307

revlog.index.findsnapshots(cache, start_rev, end_rev)

1302

else:

1308

else:

1303

deltaparent = revlog.deltaparent

1309

deltaparent = revlog.deltaparent

1304

issnapshot = revlog.issnapshot

1310

issnapshot = revlog.issnapshot

1305

for rev in revlog.revs(start_rev, end_rev):

1311

for rev in revlog.revs(start_rev, end_rev):

1306

if issnapshot(rev):

1312

if issnapshot(rev):

1307

cache[deltaparent(rev)].add(rev)

1313

cache[deltaparent(rev)].add(rev)

1308

1314

1309

1315

1310

class deltacomputer:

1316

class deltacomputer:

1311

"""object capable of computing delta and finding delta for multiple revision

1317

"""object capable of computing delta and finding delta for multiple revision

1312

1318

1313

This object is meant to compute and find multiple delta applied to the same

1319

This object is meant to compute and find multiple delta applied to the same

1314

revlog.

1320

revlog.

1315

"""

1321

"""

1316

1322

1317

def __init__(

1323

def __init__(

1318

self,

1324

self,

1319

revlog,

1325

revlog,

1320

write_debug=None,

1326

write_debug=None,

1321

debug_search=False,

1327

debug_search=False,

1322

debug_info=None,

1328

debug_info=None,

1323

):

1329

):

1324

self.revlog = revlog

1330

self.revlog = revlog

1325

self._write_debug = write_debug

1331

self._write_debug = write_debug

1326

if write_debug is None:

1332

if write_debug is None:

1327

self._debug_search = False

1333

self._debug_search = False

1328

else:

1334

else:

1329

self._debug_search = debug_search

1335

self._debug_search = debug_search

1330

self._debug_info = debug_info

1336

self._debug_info = debug_info

1331

self._snapshot_cache = SnapshotCache()

1337

self._snapshot_cache = SnapshotCache()

1332

1338

1333

@property

1339

@property

1334

def _gather_debug(self):

1340

def _gather_debug(self):

1335

return self._write_debug is not None or self._debug_info is not None

1341

return self._write_debug is not None or self._debug_info is not None

1336

1342

1337

def buildtext(self, revinfo):

1343

def buildtext(self, revinfo):

1338

"""Builds a fulltext version of a revision

1344

"""Builds a fulltext version of a revision

1339

1345

1340

revinfo: revisioninfo instance that contains all needed info

1346

revinfo: revisioninfo instance that contains all needed info

1341

"""

1347

"""

1342

btext = revinfo.btext

1348

btext = revinfo.btext

1343

if btext[0] is not None:

1349

if btext[0] is not None:

1344

return btext[0]

1350

return btext[0]

1345

1351

1346

revlog = self.revlog

1352

revlog = self.revlog

1347

cachedelta = revinfo.cachedelta

1353

cachedelta = revinfo.cachedelta

1348

baserev = cachedelta[0]

1354

baserev = cachedelta[0]

1349

delta = cachedelta[1]

1355

delta = cachedelta[1]

1350

1356

1351

fulltext = btext[0] = _textfromdelta(

1357

fulltext = btext[0] = _textfromdelta(

1352

revlog,

1358

revlog,

1353

baserev,

1359

baserev,

1354

delta,

1360

delta,

1355

revinfo.p1,

1361

revinfo.p1,

1356

revinfo.p2,

1362

revinfo.p2,

1357

revinfo.flags,

1363

revinfo.flags,

1358

revinfo.node,

1364

revinfo.node,

1359

)

1365

)

1360

return fulltext

1366

return fulltext

1361

1367

1362

def _builddeltadiff(self, base, revinfo):

1368

def _builddeltadiff(self, base, revinfo):

1363

revlog = self.revlog

1369

revlog = self.revlog

1364

t = self.buildtext(revinfo)

1370

t = self.buildtext(revinfo)

1365

if revlog.iscensored(base):

1371

if revlog.iscensored(base):

1366

# deltas based on a censored revision must replace the

1372

# deltas based on a censored revision must replace the

1367

# full content in one patch, so delta works everywhere

1373

# full content in one patch, so delta works everywhere

1368

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1374

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1369

delta = header + t

1375

delta = header + t

1370

else:

1376

else:

1371

ptext = revlog.rawdata(base)

1377

ptext = revlog.rawdata(base)

1372

delta = mdiff.textdiff(ptext, t)

1378

delta = mdiff.textdiff(ptext, t)

1373

1379

1374

return delta

1380

return delta

1375

1381

1376

def _builddeltainfo(

1382

def _builddeltainfo(

1377

self, revinfo, base, target_rev=None, as_snapshot=False

1383

self, revinfo, base, target_rev=None, as_snapshot=False

1378

):

1384

):

1379

# can we use the cached delta?

1385

# can we use the cached delta?

1380

revlog = self.revlog

1386

revlog = self.revlog

1381

chainbase = revlog.chainbase(base)

1387

chainbase = revlog.chainbase(base)

1382

if revlog.delta_config.general_delta:

1388

if revlog.delta_config.general_delta:

1383

deltabase = base

1389

deltabase = base

1384

else:

1390

else:

1385

if target_rev is not None and base != target_rev - 1:

1391

if target_rev is not None and base != target_rev - 1:

1386

msg = (

1392

msg = (

1387

b'general delta cannot use delta for something else '

1393

b'general delta cannot use delta for something else '

1388

b'than `prev`: %d<-%d'

1394

b'than `prev`: %d<-%d'

1389

)

1395

)

1390

msg %= (base, target_rev)

1396

msg %= (base, target_rev)

1391

raise error.ProgrammingError(msg)

1397

raise error.ProgrammingError(msg)

1392

deltabase = chainbase

1398

deltabase = chainbase

1393

snapshotdepth = None

1399

snapshotdepth = None

1394

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1400

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1395

snapshotdepth = 0

1401

snapshotdepth = 0

1396

elif revlog.delta_config.sparse_revlog and as_snapshot:

1402

elif revlog.delta_config.sparse_revlog and as_snapshot:

1397

assert revlog.issnapshot(deltabase)

1403

assert revlog.issnapshot(deltabase)

1398

# A delta chain should always be one full snapshot,

1404

# A delta chain should always be one full snapshot,

1399

# zero or more semi-snapshots, and zero or more deltas

1405

# zero or more semi-snapshots, and zero or more deltas

1400

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1406

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1401

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1407

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1402

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1408

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1403

delta = None

1409

delta = None

1404

if revinfo.cachedelta:

1410

if revinfo.cachedelta:

1405

cachebase = revinfo.cachedelta[0]

1411

cachebase = revinfo.cachedelta[0]

1406

# check if the diff still apply

1412

# check if the diff still apply

1407

currentbase = cachebase

1413

currentbase = cachebase

1408

while (

1414

while (

1409

currentbase != nullrev

1415

currentbase != nullrev

1410

and currentbase != base

1416

and currentbase != base

1411

and self.revlog.length(currentbase) == 0

1417

and self.revlog.length(currentbase) == 0

1412

):

1418

):

1413

currentbase = self.revlog.deltaparent(currentbase)

1419

currentbase = self.revlog.deltaparent(currentbase)

1414

if self.revlog.delta_config.lazy_delta and currentbase == base:

1420

if self.revlog.delta_config.lazy_delta and currentbase == base:

1415

delta = revinfo.cachedelta[1]

1421

delta = revinfo.cachedelta[1]

1416

if delta is None:

1422

if delta is None:

1417

delta = self._builddeltadiff(base, revinfo)

1423

delta = self._builddeltadiff(base, revinfo)

1418

if self._debug_search:

1424

if self._debug_search:

1419

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1425

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1420

msg %= len(delta)

1426

msg %= len(delta)

1421

self._write_debug(msg)

1427

self._write_debug(msg)

1422

# snapshotdept need to be neither None nor 0 level snapshot

1428

# snapshotdept need to be neither None nor 0 level snapshot

1423

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1429

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1424

lowestrealisticdeltalen = (

1430

lowestrealisticdeltalen = (

1425

len(delta) // revlog.delta_config.upper_bound_comp

1431

len(delta) // revlog.delta_config.upper_bound_comp

1426

)

1432

)

1427

snapshotlimit = revinfo.textlen >> snapshotdepth

1433

snapshotlimit = revinfo.textlen >> snapshotdepth

1428

if self._debug_search:

1434

if self._debug_search:

1429

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1435

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1430

msg %= lowestrealisticdeltalen

1436

msg %= lowestrealisticdeltalen

1431

self._write_debug(msg)

1437

self._write_debug(msg)

1432

if snapshotlimit < lowestrealisticdeltalen:

1438

if snapshotlimit < lowestrealisticdeltalen:

1433

if self._debug_search:

1439

if self._debug_search:

1434

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1440

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1435

self._write_debug(msg)

1441

self._write_debug(msg)

1436

return None

1442

return None

1437

if revlog.length(base) < lowestrealisticdeltalen:

1443

if revlog.length(base) < lowestrealisticdeltalen:

1438

if self._debug_search:

1444

if self._debug_search:

1439

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1445

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1440

self._write_debug(msg)

1446

self._write_debug(msg)

1441

return None

1447

return None

1442

header, data = revlog._inner.compress(delta)

1448

header, data = revlog._inner.compress(delta)

1443

deltalen = len(header) + len(data)

1449

deltalen = len(header) + len(data)

1444

offset = revlog.end(len(revlog) - 1)

1450

offset = revlog.end(len(revlog) - 1)

1445

dist = deltalen + offset - revlog.start(chainbase)

1451

dist = deltalen + offset - revlog.start(chainbase)

1446

chainlen, compresseddeltalen = revlog._chaininfo(base)

1452

chainlen, compresseddeltalen = revlog._chaininfo(base)

1447

chainlen += 1

1453

chainlen += 1

1448

compresseddeltalen += deltalen

1454

compresseddeltalen += deltalen

1449

1455

1450

return _deltainfo(

1456

return _deltainfo(

1451

dist,

1457

dist,

1452

deltalen,

1458

deltalen,

1453

(header, data),

1459

(header, data),

1454

deltabase,

1460

deltabase,

1455

chainbase,

1461

chainbase,

1456

chainlen,

1462

chainlen,

1457

compresseddeltalen,

1463

compresseddeltalen,

1458

snapshotdepth,

1464

snapshotdepth,

1459

)

1465

)

1460

1466

1461

def _fullsnapshotinfo(self, revinfo, curr):

1467

def _fullsnapshotinfo(self, revinfo, curr):

1462

rawtext = self.buildtext(revinfo)

1468

rawtext = self.buildtext(revinfo)

1463

data = self.revlog._inner.compress(rawtext)

1469

data = self.revlog._inner.compress(rawtext)

1464

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1470

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1465

deltabase = chainbase = curr

1471

deltabase = chainbase = curr

1466

snapshotdepth = 0

1472

snapshotdepth = 0

1467

chainlen = 1

1473

chainlen = 1

1468

1474

1469

return _deltainfo(

1475

return _deltainfo(

1470

dist,

1476

dist,

1471

deltalen,

1477

deltalen,

1472

data,

1478

data,

1473

deltabase,

1479

deltabase,

1474

chainbase,

1480

chainbase,

1475

chainlen,

1481

chainlen,

1476

compresseddeltalen,

1482

compresseddeltalen,

1477

snapshotdepth,

1483

snapshotdepth,

1478

)

1484

)

1479

1485

1480

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1486

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1481

"""Find an acceptable delta against a candidate revision

1487

"""Find an acceptable delta against a candidate revision

1482

1488

1483

revinfo: information about the revision (instance of _revisioninfo)

1489

revinfo: information about the revision (instance of _revisioninfo)

1484

1490

1485

Returns the first acceptable candidate revision, as ordered by

1491

Returns the first acceptable candidate revision, as ordered by

1486

_candidategroups

1492

_candidategroups

1487

1493

1488

If no suitable deltabase is found, we return delta info for a full

1494

If no suitable deltabase is found, we return delta info for a full

1489

snapshot.

1495

snapshot.

1490

1496

1491

`excluded_bases` is an optional set of revision that cannot be used as

1497

`excluded_bases` is an optional set of revision that cannot be used as

1492

a delta base. Use this to recompute delta suitable in censor or strip

1498

a delta base. Use this to recompute delta suitable in censor or strip

1493

context.

1499

context.

1494

"""

1500

"""

1495

if target_rev is None:

1501

if target_rev is None:

1496

target_rev = len(self.revlog)

1502

target_rev = len(self.revlog)

1497

1503

1498

gather_debug = self._gather_debug

1504

gather_debug = self._gather_debug

1499

cachedelta = revinfo.cachedelta

1505

cachedelta = revinfo.cachedelta

1500

revlog = self.revlog

1506

revlog = self.revlog

1501

p1r = p2r = None

1507

p1r = p2r = None

1502

1508

1503

if excluded_bases is None:

1509

if excluded_bases is None:

1504

excluded_bases = set()

1510

excluded_bases = set()

1505

1511

1506

if gather_debug:

1512

if gather_debug:

1507

start = util.timer()

1513

start = util.timer()

1508

dbg = self._one_dbg_data()

1514

dbg = self._one_dbg_data()

1509

dbg['revision'] = target_rev

1515

dbg['revision'] = target_rev

1510

p1r = revlog.rev(revinfo.p1)

1516

p1r = revlog.rev(revinfo.p1)

1511

p2r = revlog.rev(revinfo.p2)

1517

p2r = revlog.rev(revinfo.p2)

1512

if p1r != nullrev:

1518

if p1r != nullrev:

1513

p1_chain_len = revlog._chaininfo(p1r)[0]

1519

p1_chain_len = revlog._chaininfo(p1r)[0]

1514

else:

1520

else:

1515

p1_chain_len = -1

1521

p1_chain_len = -1

1516

if p2r != nullrev:

1522

if p2r != nullrev:

1517

p2_chain_len = revlog._chaininfo(p2r)[0]

1523

p2_chain_len = revlog._chaininfo(p2r)[0]

1518

else:

1524

else:

1519

p2_chain_len = -1

1525

p2_chain_len = -1

1520

dbg['p1-chain-len'] = p1_chain_len

1526

dbg['p1-chain-len'] = p1_chain_len

1521

dbg['p2-chain-len'] = p2_chain_len

1527

dbg['p2-chain-len'] = p2_chain_len

1522

1528

1523

# 1) if the revision is empty, no amount of delta can beat it

1529

# 1) if the revision is empty, no amount of delta can beat it

1524

#

1530

#

1525

# 2) no delta for flag processor revision (see "candelta" for why)

1531

# 2) no delta for flag processor revision (see "candelta" for why)

1526

# not calling candelta since only one revision needs test, also to

1532

# not calling candelta since only one revision needs test, also to

1527

# avoid overhead fetching flags again.

1533

# avoid overhead fetching flags again.

1528

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1534

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1529

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1535

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1530

if gather_debug:

1536

if gather_debug:

1531

end = util.timer()

1537

end = util.timer()

1532

dbg['duration'] = end - start

1538

dbg['duration'] = end - start

1533

dbg[

1539

dbg[

1534

'delta-base'

1540

'delta-base'

1535

] = deltainfo.base # pytype: disable=attribute-error

1541

] = deltainfo.base # pytype: disable=attribute-error

1536

dbg['search_round_count'] = 0

1542

dbg['search_round_count'] = 0

1537

dbg['using-cached-base'] = False

1543

dbg['using-cached-base'] = False

1538

dbg['delta_try_count'] = 0

1544

dbg['delta_try_count'] = 0

1539

dbg['type'] = b"full"

1545

dbg['type'] = b"full"

1540

dbg['snapshot-depth'] = 0

1546

dbg['snapshot-depth'] = 0

1541

self._dbg_process_data(dbg)

1547

self._dbg_process_data(dbg)

1542

return deltainfo

1548

return deltainfo

1543

1549

1544

deltainfo = None

1550

deltainfo = None

1545

1551

1546

# If this source delta are to be forcibly reuse, let us comply early.

1552

# If this source delta are to be forcibly reuse, let us comply early.

1547

if (

1553

if (

1548

revlog.delta_config.general_delta

1554

revlog.delta_config.general_delta

1549

and revinfo.cachedelta is not None

1555

and revinfo.cachedelta is not None

1550

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1556

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1551

):

1557

):

1552

base = revinfo.cachedelta[0]

1558

base = revinfo.cachedelta[0]

1553

if base == nullrev:

1559

if base == nullrev:

1554

dbg_type = b"full"

1560

dbg_type = b"full"

1555

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1561

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1556

if gather_debug:

1562

if gather_debug:

1557

snapshotdepth = 0

1563

snapshotdepth = 0

1558

elif base not in excluded_bases:

1564

elif base not in excluded_bases:

1559

delta = revinfo.cachedelta[1]

1565

delta = revinfo.cachedelta[1]

1560

header, data = revlog.compress(delta)

1566

header, data = revlog.compress(delta)

1561

deltalen = len(header) + len(data)

1567

deltalen = len(header) + len(data)

1562

if gather_debug:

1568

if gather_debug:

1563

offset = revlog.end(len(revlog) - 1)

1569

offset = revlog.end(len(revlog) - 1)

1564

chainbase = revlog.chainbase(base)

1570

chainbase = revlog.chainbase(base)

1565

distance = deltalen + offset - revlog.start(chainbase)

1571

distance = deltalen + offset - revlog.start(chainbase)

1566

chainlen, compresseddeltalen = revlog._chaininfo(base)

1572

chainlen, compresseddeltalen = revlog._chaininfo(base)

1567

chainlen += 1

1573

chainlen += 1

1568

compresseddeltalen += deltalen

1574

compresseddeltalen += deltalen

1569

if base == p1r or base == p2r:

1575

if base == p1r or base == p2r:

1570

dbg_type = b"delta"

1576

dbg_type = b"delta"

1571

snapshotdepth = None

1577

snapshotdepth = None

1572

elif not revlog.issnapshot(base):

1578

elif not revlog.issnapshot(base):

1573

snapshotdepth = None

1579

snapshotdepth = None

1574

else:

1580

else:

1575

dbg_type = b"snapshot"

1581

dbg_type = b"snapshot"

1576

snapshotdepth = revlog.snapshotdepth(base) + 1

1582

snapshotdepth = revlog.snapshotdepth(base) + 1

1577

else:

1583

else:

1578

distance = None

1584

distance = None

1579

chainbase = None

1585

chainbase = None

1580

chainlen = None

1586

chainlen = None

1581

compresseddeltalen = None

1587

compresseddeltalen = None

1582

snapshotdepth = None

1588

snapshotdepth = None

1583

deltainfo = _deltainfo(

1589

deltainfo = _deltainfo(

1584

distance=distance,

1590

distance=distance,

1585

deltalen=deltalen,

1591

deltalen=deltalen,

1586

data=(header, data),

1592

data=(header, data),

1587

base=base,

1593

base=base,

1588

chainbase=chainbase,

1594

chainbase=chainbase,

1589

chainlen=chainlen,

1595

chainlen=chainlen,

1590

compresseddeltalen=compresseddeltalen,

1596

compresseddeltalen=compresseddeltalen,

1591

snapshotdepth=snapshotdepth,

1597

snapshotdepth=snapshotdepth,

1592

)

1598

)

1593

1599

1594

if deltainfo is not None:

1600

if deltainfo is not None:

1595

if gather_debug:

1601

if gather_debug:

1596

end = util.timer()

1602

end = util.timer()

1597

dbg['duration'] = end - start

1603

dbg['duration'] = end - start

1598

dbg[

1604

dbg[

1599

'delta-base'

1605

'delta-base'

1600

] = deltainfo.base # pytype: disable=attribute-error

1606

] = deltainfo.base # pytype: disable=attribute-error

1601

dbg['search_round_count'] = 0

1607

dbg['search_round_count'] = 0

1602

dbg['using-cached-base'] = True

1608

dbg['using-cached-base'] = True

1603

dbg['delta_try_count'] = 0

1609

dbg['delta_try_count'] = 0

1604

dbg['type'] = b"full"

1610

dbg['type'] = b"full"

1605

if snapshotdepth is None:

1611

if snapshotdepth is None:

1606

dbg['snapshot-depth'] = -1

1612

dbg['snapshot-depth'] = -1

1607

else:

1613

else:

1608

dbg['snapshot-depth'] = snapshotdepth

1614

dbg['snapshot-depth'] = snapshotdepth

1609

self._dbg_process_data(dbg)

1615

self._dbg_process_data(dbg)

1610

return deltainfo

1616

return deltainfo

1611

1617

1612

# count the number of different delta we tried (for debug purpose)

1618

# count the number of different delta we tried (for debug purpose)

1613

dbg_try_count = 0

1619

dbg_try_count = 0

1614

# count the number of "search round" we did. (for debug purpose)

1620

# count the number of "search round" we did. (for debug purpose)

1615

dbg_try_rounds = 0

1621

dbg_try_rounds = 0

1616

dbg_type = b'unknown'

1622

dbg_type = b'unknown'

1617

1623

1618

if p1r is None:

1624

if p1r is None:

1619

p1r = revlog.rev(revinfo.p1)

1625

p1r = revlog.rev(revinfo.p1)

1620

p2r = revlog.rev(revinfo.p2)

1626

p2r = revlog.rev(revinfo.p2)

1621

1627

1622

if self._debug_search:

1628

if self._debug_search:

1623

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1629

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1624

msg %= target_rev

1630

msg %= target_rev

1625

self._write_debug(msg)

1631

self._write_debug(msg)

1626

1632

1627

# should we try to build a delta?

1633

# should we try to build a delta?

1628

if not (len(self.revlog) and self.revlog._storedeltachains):

1634

if not (len(self.revlog) and self.revlog._storedeltachains):

1629

search_cls = _NoDeltaSearch

1635

search_cls = _NoDeltaSearch

1630

elif self.revlog.delta_config.sparse_revlog:

1636

elif self.revlog.delta_config.sparse_revlog:

1631

search_cls = _SparseDeltaSearch

1637

search_cls = _SparseDeltaSearch

1632

elif self.revlog.delta_config.general_delta:

1638

elif self.revlog.delta_config.general_delta:

1633

search_cls = _GeneralDeltaSearch

1639

search_cls = _GeneralDeltaSearch

1634

else:

1640

else:

1635

# before general delta, there is only one possible delta base

1641

# before general delta, there is only one possible delta base

1636

search_cls = _PrevDeltaSearch

1642

search_cls = _PrevDeltaSearch

1637

1643

1638

search = search_cls(

1644

search = search_cls(

1639

self.revlog,

1645

self.revlog,

1640

revinfo,

1646

revinfo,

1641

p1r,

1647

p1r,

1642

p2r,

1648

p2r,

1643

cachedelta,

1649

cachedelta,

1644

excluded_bases,

1650

excluded_bases,

1645

target_rev,

1651

target_rev,

1646

snapshot_cache=self._snapshot_cache,

1652

snapshot_cache=self._snapshot_cache,

1647

)

1653

)

1648

1654

1649

while not search.done:

1655

while not search.done:

1650

current_group = search.current_group

1656

current_group = search.current_group

1651

# current_group can be `None`, but not is search.done is False

1657

# current_group can be `None`, but not is search.done is False

1652

# We add this assert to help pytype

1658

# We add this assert to help pytype

1653

assert current_group is not None

1659

assert current_group is not None

1654

candidaterevs = current_group

1660

candidaterevs = current_group

1655

dbg_try_rounds += 1

1661

dbg_try_rounds += 1

1656

if self._debug_search:

1662

if self._debug_search:

1657

prev = None

1663

prev = None

1658

if deltainfo is not None:

1664

if deltainfo is not None:

1659

prev = deltainfo.base

1665

prev = deltainfo.base

1660

1666

1661

if (

1667

if (

1662

cachedelta is not None

1668

cachedelta is not None

1663

and len(candidaterevs) == 1

1669

and len(candidaterevs) == 1

1664

and cachedelta[0] in candidaterevs

1670

and cachedelta[0] in candidaterevs

1665

):

1671

):

1666

round_type = b"cached-delta"

1672

round_type = b"cached-delta"

1667

elif p1r in candidaterevs or p2r in candidaterevs:

1673

elif p1r in candidaterevs or p2r in candidaterevs:

1668

round_type = b"parents"

1674

round_type = b"parents"

1669

elif prev is not None and all(c < prev for c in candidaterevs):

1675

elif prev is not None and all(c < prev for c in candidaterevs):

1670

round_type = b"refine-down"

1676

round_type = b"refine-down"

1671

elif prev is not None and all(c > prev for c in candidaterevs):

1677

elif prev is not None and all(c > prev for c in candidaterevs):

1672

round_type = b"refine-up"

1678

round_type = b"refine-up"

1673

else:

1679

else:

1674

round_type = b"search-down"

1680

round_type = b"search-down"

1675

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1681

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1676

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1682

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1677

self._write_debug(msg)

1683

self._write_debug(msg)

1678

nominateddeltas = []

1684

nominateddeltas = []

1679

if deltainfo is not None:

1685

if deltainfo is not None:

1680

if self._debug_search:

1686

if self._debug_search:

1681

msg = (

1687

msg = (

1682

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1688

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1683

)

1689

)

1684

msg %= (deltainfo.base, deltainfo.deltalen)

1690

msg %= (deltainfo.base, deltainfo.deltalen)

1685

self._write_debug(msg)

1691

self._write_debug(msg)

1686

# if we already found a good delta,

1692

# if we already found a good delta,

1687

# challenge it against refined candidates

1693

# challenge it against refined candidates

1688

nominateddeltas.append(deltainfo)

1694

nominateddeltas.append(deltainfo)

1689

for candidaterev in candidaterevs:

1695

for candidaterev in candidaterevs:

1690

if self._debug_search:

1696

if self._debug_search:

1691

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1697

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1692

msg %= candidaterev

1698

msg %= candidaterev

1693

self._write_debug(msg)

1699

self._write_debug(msg)

1694

candidate_type = None

1700

candidate_type = None

1695

if candidaterev == p1r:

1701

if candidaterev == p1r:

1696

candidate_type = b"p1"

1702

candidate_type = b"p1"

1697

elif candidaterev == p2r:

1703

elif candidaterev == p2r:

1698

candidate_type = b"p2"

1704

candidate_type = b"p2"

1699

elif self.revlog.issnapshot(candidaterev):

1705

elif self.revlog.issnapshot(candidaterev):

1700

candidate_type = b"snapshot-%d"

1706

candidate_type = b"snapshot-%d"

1701

candidate_type %= self.revlog.snapshotdepth(

1707

candidate_type %= self.revlog.snapshotdepth(

1702

candidaterev

1708

candidaterev

1703

)

1709

)

1704

1710

1705

if candidate_type is not None:

1711

if candidate_type is not None:

1706

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1712

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1707

msg %= candidate_type

1713

msg %= candidate_type

1708

self._write_debug(msg)

1714

self._write_debug(msg)

1709

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1715

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1710

msg %= self.revlog.length(candidaterev)

1716

msg %= self.revlog.length(candidaterev)

1711

self._write_debug(msg)

1717

self._write_debug(msg)

1712

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1718

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1713

msg %= self.revlog.deltaparent(candidaterev)

1719

msg %= self.revlog.deltaparent(candidaterev)

1714

self._write_debug(msg)

1720

self._write_debug(msg)

1715

1721

1716

dbg_try_count += 1

1722

dbg_try_count += 1

1717

1723

1718

if self._debug_search:

1724

if self._debug_search:

1719

delta_start = util.timer()

1725

delta_start = util.timer()

1720

candidatedelta = self._builddeltainfo(

1726

candidatedelta = self._builddeltainfo(

1721

revinfo,

1727

revinfo,

1722

candidaterev,

1728

candidaterev,

1723

target_rev=target_rev,

1729

target_rev=target_rev,

1724

as_snapshot=search.current_stage == _STAGE_SNAPSHOT,

1730

as_snapshot=search.current_stage == _STAGE_SNAPSHOT,

1725

)

1731

)

1726

if self._debug_search:

1732

if self._debug_search:

1727

delta_end = util.timer()

1733

delta_end = util.timer()

1728

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1734

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1729

msg %= delta_end - delta_start

1735

msg %= delta_end - delta_start

1730

self._write_debug(msg)

1736

self._write_debug(msg)

1731

if candidatedelta is not None:

1737

if candidatedelta is not None:

1732

if search.is_good_delta_info(candidatedelta):

1738

if search.is_good_delta_info(candidatedelta):

1733

if self._debug_search:

1739

if self._debug_search:

1734

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1740

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1735

msg %= candidatedelta.deltalen

1741

msg %= candidatedelta.deltalen

1736

self._write_debug(msg)

1742

self._write_debug(msg)

1737

nominateddeltas.append(candidatedelta)

1743

nominateddeltas.append(candidatedelta)

1738

elif self._debug_search:

1744

elif self._debug_search:

1739

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1745

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1740

msg %= candidatedelta.deltalen

1746

msg %= candidatedelta.deltalen

1741

self._write_debug(msg)

1747

self._write_debug(msg)

1742

elif self._debug_search:

1748

elif self._debug_search:

1743

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1749

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1744

self._write_debug(msg)

1750

self._write_debug(msg)

1745

if nominateddeltas:

1751

if nominateddeltas:

1746

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1752

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1747

search.next_group(deltainfo)

1753

search.next_group(deltainfo)

1748

1754

1749

if deltainfo is None:

1755

if deltainfo is None:

1750

dbg_type = b"full"

1756

dbg_type = b"full"

1751

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1757

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1752

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1758

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1753

dbg_type = b"snapshot"

1759

dbg_type = b"snapshot"

1754

else:

1760

else:

1755

dbg_type = b"delta"

1761

dbg_type = b"delta"

1756

1762

1757

if gather_debug:

1763

if gather_debug:

1758

end = util.timer()

1764

end = util.timer()

1759

if dbg_type == b'full':

1765

if dbg_type == b'full':

1760

used_cached = (

1766

used_cached = (

1761

cachedelta is not None

1767

cachedelta is not None

1762

and dbg_try_rounds == 0

1768

and dbg_try_rounds == 0

1763

and dbg_try_count == 0

1769

and dbg_try_count == 0

1764

and cachedelta[0] == nullrev

1770

and cachedelta[0] == nullrev

1765

)

1771

)

1766

else:

1772

else:

1767

used_cached = (

1773

used_cached = (

1768

cachedelta is not None

1774

cachedelta is not None

1769

and dbg_try_rounds == 1

1775

and dbg_try_rounds == 1

1770

and dbg_try_count == 1

1776

and dbg_try_count == 1

1771

and deltainfo.base == cachedelta[0]

1777

and deltainfo.base == cachedelta[0]

1772

)

1778

)

1773

dbg['duration'] = end - start

1779

dbg['duration'] = end - start

1774

dbg[

1780

dbg[

1775

'delta-base'

1781

'delta-base'

1776

] = deltainfo.base # pytype: disable=attribute-error

1782

] = deltainfo.base # pytype: disable=attribute-error

1777

dbg['search_round_count'] = dbg_try_rounds

1783

dbg['search_round_count'] = dbg_try_rounds

1778

dbg['using-cached-base'] = used_cached

1784

dbg['using-cached-base'] = used_cached

1779

dbg['delta_try_count'] = dbg_try_count

1785

dbg['delta_try_count'] = dbg_try_count

1780

dbg['type'] = dbg_type

1786

dbg['type'] = dbg_type

1781

if (

1787

if (

1782

deltainfo.snapshotdepth # pytype: disable=attribute-error

1788

deltainfo.snapshotdepth # pytype: disable=attribute-error

1783

is not None

1789

is not None

1784

):

1790

):

1785

dbg[

1791

dbg[

1786

'snapshot-depth'

1792

'snapshot-depth'

1787

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1793

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1788

else:

1794

else:

1789

dbg['snapshot-depth'] = -1

1795

dbg['snapshot-depth'] = -1

1790

self._dbg_process_data(dbg)

1796

self._dbg_process_data(dbg)

1791

return deltainfo

1797

return deltainfo

1792

1798

1793

def _one_dbg_data(self):

1799

def _one_dbg_data(self):

1794

dbg = {

1800

dbg = {

1795

'duration': None,

1801

'duration': None,

1796

'revision': None,

1802

'revision': None,

1797

'delta-base': None,

1803

'delta-base': None,

1798

'search_round_count': None,

1804

'search_round_count': None,

1799

'using-cached-base': None,

1805

'using-cached-base': None,

1800

'delta_try_count': None,

1806

'delta_try_count': None,

1801

'type': None,

1807

'type': None,

1802

'p1-chain-len': None,

1808

'p1-chain-len': None,

1803

'p2-chain-len': None,

1809

'p2-chain-len': None,

1804

'snapshot-depth': None,

1810

'snapshot-depth': None,

1805

'target-revlog': None,

1811

'target-revlog': None,

1806

}

1812

}

1807

target_revlog = b"UNKNOWN"

1813

target_revlog = b"UNKNOWN"

1808

target_type = self.revlog.target[0]

1814

target_type = self.revlog.target[0]

1809

target_key = self.revlog.target[1]

1815

target_key = self.revlog.target[1]

1810

if target_type == KIND_CHANGELOG:

1816

if target_type == KIND_CHANGELOG:

1811

target_revlog = b'CHANGELOG:'

1817

target_revlog = b'CHANGELOG:'

1812

elif target_type == KIND_MANIFESTLOG:

1818

elif target_type == KIND_MANIFESTLOG:

1813

target_revlog = b'MANIFESTLOG:'

1819

target_revlog = b'MANIFESTLOG:'

1814

if target_key:

1820

if target_key:

1815

target_revlog += b'%s:' % target_key

1821

target_revlog += b'%s:' % target_key

1816

elif target_type == KIND_FILELOG:

1822

elif target_type == KIND_FILELOG:

1817

target_revlog = b'FILELOG:'

1823

target_revlog = b'FILELOG:'

1818

if target_key:

1824

if target_key:

1819

target_revlog += b'%s:' % target_key

1825

target_revlog += b'%s:' % target_key

1820

dbg['target-revlog'] = target_revlog

1826

dbg['target-revlog'] = target_revlog

1821

return dbg

1827

return dbg

1822

1828

1823

def _dbg_process_data(self, dbg):

1829

def _dbg_process_data(self, dbg):

1824

if self._debug_info is not None:

1830

if self._debug_info is not None:

1825

self._debug_info.append(dbg)

1831

self._debug_info.append(dbg)

1826

1832

1827

if self._write_debug is not None:

1833

if self._write_debug is not None:

1828

msg = (

1834

msg = (

1829

b"DBG-DELTAS:"

1835

b"DBG-DELTAS:"

1830

b" %-12s"

1836

b" %-12s"

1831

b" rev=%d:"

1837

b" rev=%d:"

1832

b" delta-base=%d"

1838

b" delta-base=%d"

1833

b" is-cached=%d"

1839

b" is-cached=%d"

1834

b" - search-rounds=%d"

1840

b" - search-rounds=%d"

1835

b" try-count=%d"

1841

b" try-count=%d"

1836

b" - delta-type=%-6s"

1842

b" - delta-type=%-6s"

1837

b" snap-depth=%d"

1843

b" snap-depth=%d"

1838

b" - p1-chain-length=%d"

1844

b" - p1-chain-length=%d"

1839

b" p2-chain-length=%d"

1845

b" p2-chain-length=%d"

1840

b" - duration=%f"

1846

b" - duration=%f"

1841

b"\n"

1847

b"\n"

1842

)

1848

)

1843

msg %= (

1849

msg %= (

1844

dbg["target-revlog"],

1850

dbg["target-revlog"],

1845

dbg["revision"],

1851

dbg["revision"],

1846

dbg["delta-base"],

1852

dbg["delta-base"],

1847

dbg["using-cached-base"],

1853

dbg["using-cached-base"],

1848

dbg["search_round_count"],

1854

dbg["search_round_count"],

1849

dbg["delta_try_count"],

1855

dbg["delta_try_count"],

1850

dbg["type"],

1856

dbg["type"],

1851

dbg["snapshot-depth"],

1857

dbg["snapshot-depth"],

1852

dbg["p1-chain-len"],

1858

dbg["p1-chain-len"],

1853

dbg["p2-chain-len"],

1859

dbg["p2-chain-len"],

1854

dbg["duration"],

1860

dbg["duration"],

1855

)

1861

)

1856

self._write_debug(msg)

1862

self._write_debug(msg)

1857

1863

1858

1864

1859

def delta_compression(default_compression_header, deltainfo):

1865

def delta_compression(default_compression_header, deltainfo):

1860

"""return (COMPRESSION_MODE, deltainfo)

1866

"""return (COMPRESSION_MODE, deltainfo)

1861

1867

1862

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1868

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1863

compression.

1869

compression.

1864

"""

1870

"""

1865

h, d = deltainfo.data

1871

h, d = deltainfo.data

1866

compression_mode = COMP_MODE_INLINE

1872

compression_mode = COMP_MODE_INLINE

1867

if not h and not d:

1873

if not h and not d:

1868

# not data to store at all... declare them uncompressed

1874

# not data to store at all... declare them uncompressed

1869

compression_mode = COMP_MODE_PLAIN

1875

compression_mode = COMP_MODE_PLAIN

1870

elif not h:

1876

elif not h:

1871

t = d[0:1]

1877

t = d[0:1]

1872

if t == b'\0':

1878

if t == b'\0':

1873

compression_mode = COMP_MODE_PLAIN

1879

compression_mode = COMP_MODE_PLAIN

1874

elif t == default_compression_header:

1880

elif t == default_compression_header:

1875

compression_mode = COMP_MODE_DEFAULT

1881

compression_mode = COMP_MODE_DEFAULT

1876

elif h == b'u':

1882

elif h == b'u':

1877

# we have a more efficient way to declare uncompressed

1883

# we have a more efficient way to declare uncompressed

1878

h = b''

1884

h = b''

1879

compression_mode = COMP_MODE_PLAIN

1885

compression_mode = COMP_MODE_PLAIN

1880

deltainfo = drop_u_compression(deltainfo)

1886

deltainfo = drop_u_compression(deltainfo)

1881

return compression_mode, deltainfo

1887

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import abc
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
                 than `revlog.data_config.sr_min_gap_size` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog.data_config.sr_min_gap_size`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs,
                     revlog.data_config.sr_density_threshold,
                     revlog.data_config.sr_min_gap_size,
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             ### stage of the search, used for debug and to select and to adjust some logic.
             # initial stage, next step is unknown
             _STAGE_UNSPECIFIED = "unspecified"
             # trying the cached delta
             _STAGE_CACHED = "cached"
             # trying delta based on parents
             _STAGE_PARENTS = "parents"
             # trying to build a valid snapshot of any level
             _STAGE_SNAPSHOT = "snapshot"
             # trying to build a delta based of the previous revision
             _STAGE_PREV = "prev"
             # trying to build a full snapshot
             _STAGE_FULL = "full"
             class _BaseDeltaSearch(abc.ABC):
                 """perform the search of a good delta for a single revlog revision
                 note: some of the deltacomputer.finddeltainfo logic should probably move
                 here.
                 """
                 def __init__(
                     self,
                     revlog,
                     revinfo,
                     p1,
                     p2,
                     cachedelta,
                     excluded_bases=None,
                     target_rev=None,
                     snapshot_cache=None,
                 ):
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         cachedelta is None
                         or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not revlog.delta_config.general_delta
                     )
                     self.revlog = revlog
                     self.revinfo = revinfo
                     self.textlen = revinfo.textlen
                     self.p1 = p1
                     self.p2 = p2
                     self.cachedelta = cachedelta
                     self.excluded_bases = excluded_bases
                     if target_rev is None:
                         self.target_rev = len(self.revlog)
                     self.target_rev = target_rev
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     self.snapshot_cache = snapshot_cache
                     self.tested = {nullrev}
                     self.current_stage = _STAGE_UNSPECIFIED
                     self.current_group = None
                     self._init_group()
                 def is_good_delta_info(self, deltainfo):
                     """Returns True if the given delta is good.
                     Good means that it is within the disk span, disk size, and chain length
                     bounds that we know to be performant.
                     """
                     if not self._is_good_delta_info_universal(deltainfo):
                         return False
                     if not self._is_good_delta_info_chain_quality(deltainfo):
                         return False
                     return True
                 def _is_good_delta_info_universal(self, deltainfo):
                     """Returns True if the given delta is good.
                     This performs generic checks needed by all format variants.
                     This is used by is_good_delta_info.
                     """
                     if deltainfo is None:
                         return False
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         self.revinfo.cachedelta is None
                         or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not self.revlog.delta_config.general_delta
                     )
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the delta
                     #   will be inefficient.
                     if self.revinfo.textlen < deltainfo.deltalen:
                         return False
                     return True
                 def _is_good_delta_info_chain_quality(self, deltainfo):
                     """Returns True if the chain associated with the delta is good.
                     This performs checks for format that use delta chains.
                     This is used by is_good_delta_info.
                     """
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     defaultmax = self.revinfo.textlen * 4
                     maxdist = self.revlog.delta_config.max_deltachain_span
                     if not maxdist:
                         maxdist = deltainfo.distance  # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     #
                     #   In the sparse-revlog case, we rely on the associated "sparse
                     #   reading" to avoid issue related to the span of data. In theory, it
                     #   would be possible to build pathological revlog where delta pattern
                     #   would lead to too many reads. However, they do not happen in
                     #   practice at all. So we skip the span check entirely.
                     if (
                         not self.revlog.delta_config.sparse_revlog
                         and maxdist < deltainfo.distance
                     ):
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
                     #   If the sum of delta get larger than K * target text length.
                     if max_chain_data < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if (
                         self.revlog.delta_config.max_chain_len
                         and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
                     ):
                         return False
                     return True
                 @property
                 def done(self):
                     """True when all possible candidate have been tested"""
                     return self.current_group is None
                 @abc.abstractmethod
                 def next_group(self, good_delta=None):
                     """move to the next group to test
                     The group of revision to test will be available in
                     `self.current_group`.  If the previous group had any good delta, the
                     best one can be passed as the `good_delta` parameter to help selecting
                     the next group.
                     If not revision remains to be, `self.done` will be True and
                     `self.current_group` will be None.
                     """
                     pass
                 @abc.abstractmethod
                 def _init_group(self):
                     pass
             class _NoDeltaSearch(_BaseDeltaSearch):
                 """Search for no delta.
                 This search variant is to be used in case where we should not store delta.
                 """
                 def _init_group(self):
                     self.current_stage = _STAGE_FULL
                 def next_group(self, good_delta=None):
                     pass
             class _PrevDeltaSearch(_BaseDeltaSearch):
                 """Search for delta against the previous revision only
                 This search variant is to be used when the format does not allow for delta
                 against arbitrary bases.
                 """
                 def _init_group(self):
                     self.current_stage = _STAGE_PREV
                     self.current_group = [self.target_rev - 1]
                     self.tested.update(self.current_group)
                 def next_group(self, good_delta=None):
                     self.current_stage = _STAGE_FULL
                     self.current_group = None
             class _GeneralDeltaSearch(_BaseDeltaSearch):
                 """Delta search variant for general-delta repository"""
                 def _init_group(self):
                     # Why search for delta base if we cannot use a delta base ?
                     # also see issue6056
                     assert self.revlog.delta_config.general_delta
                     self._candidates_iterator = self._iter_groups()
                     self._last_good = None
                     if (
                         self.cachedelta is not None
                         and self.cachedelta[2] > DELTA_BASE_REUSE_NO
                         and self._pre_filter_rev(self.cachedelta[0])
                     ):
                         # First we try to reuse a the delta contained in the bundle.  (or from
                         # the source revlog)
                         #
                         # This logic only applies to general delta repositories and can be
                         # disabled through configuration. Disabling reuse source delta is
                         # useful when we want to make sure we recomputed "optimal" deltas.
                         self.current_stage = _STAGE_CACHED
                         self._internal_group = (self.cachedelta[0],)
                         self._internal_idx = 0
                         self.current_group = self._internal_group
                         self.tested.update(self.current_group)
                     else:
                         self._next_internal_group()
                 def _next_internal_group(self):
                     # self._internal_group can be larger than self.current_group
                     self._internal_idx = 0
                     group = self._candidates_iterator.send(self._last_good)
                     if group is not None:
                         group = self._pre_filter_candidate_revs(group)
                     self._internal_group = group
                     if self._internal_group is None:
                         self.current_group = None
                     elif len(self._internal_group) == 0:
                         self.next_group()
                     else:
                         chunk_size = self.revlog.delta_config.candidate_group_chunk_size
                         if chunk_size > 0:
                             self.current_group = self._internal_group[:chunk_size]
                             self._internal_idx += chunk_size
                         else:
                             self.current_group = self._internal_group
                             self._internal_idx += len(self.current_group)
                         self.tested.update(self.current_group)
                 def next_group(self, good_delta=None):
                     old_good = self._last_good
                     if good_delta is not None:
                         self._last_good = good_delta.base
                     if self.current_stage == _STAGE_CACHED and good_delta is not None:
                         # the cache is good, let us use the cache as requested
                         self._candidates_iterator = None
                         self._internal_group = None
                         self._internal_idx = None
                         self.current_group = None
                         return
                     if (self._internal_idx < len(self._internal_group)) and (
                         old_good != good_delta
                     ):
                         # When the size of the candidate group is big, it can result in
                         # a quite significant performance impact. To reduce this, we
                         # can send them in smaller batches until the new batch does not
                         # provide any improvements.
                         #
                         # This might reduce the overall efficiency of the compression
                         # in some corner cases, but that should also prevent very
                         # pathological cases from being an issue. (eg. 20 000
                         # candidates).
                         #
                         # XXX note that the ordering of the group becomes important as
                         # it now impacts the final result. The current order is
                         # unprocessed and can be improved.
                         next_idx = self._internal_idx + self._group_chunk_size
                         self.current_group = self._internal_group[
                             self._internal_idx : next_idx
                         ]
                         self.tested.update(self.current_group)
                         self._internal_idx = next_idx
                     else:
                         self._next_internal_group()
                 def _pre_filter_candidate_revs(self, temptative):
                     """filter possible candidate before computing a delta
                     This function use various criteria to pre-filter candidate delta base
                     before we compute a delta and evaluate its quality.
                     Such pre-filter limit the number of computed delta, an expensive operation.
                     return the updated list of revision to test
                     """
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     tested = self.tested
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         if self._pre_filter_rev(rev):
                             group.append(rev)
                         else:
                             self.tested.add(rev)
                     return group
                 def _pre_filter_rev_universal(self, rev):
                     """pre filtering that is need in all cases.
                     return True if it seems okay to test a rev, False otherwise.
                     used by _pre_filter_rev.
                     """
                     # no need to try a delta against nullrev, this will be done as
                     # a last resort.
                     if rev == nullrev:
                         return False
                     # filter out revision we tested already
                     if rev in self.tested:
                         return False
                     # an higher authority deamed the base unworthy (e.g. censored)
                     if self.excluded_bases is not None and rev in self.excluded_bases:
                         return False
                     # We are in some recomputation cases and that rev is too high
                     # in the revlog
                     if self.target_rev is not None and rev >= self.target_rev:
                         return False
                     # no delta for rawtext-changing revs (see "candelta" for why)
                     if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return False
                     return True
-                def _pre_filter_rev_sparse(self, rev):
-                    """pre filtering that is needed in sparse revlog cases
-                    return True if it seems okay to test a rev, False otherwise.
-                    used by _pre_filter_rev.
-                    """
-                    assert self.revlog.delta_config.sparse_revlog
-                    # if the revision we test again is too small, the resulting delta
-                    # will be large anyway as that amount of data to be added is big
-                    if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
-                        return False
-                    if self.revlog.delta_config.upper_bound_comp is not None:
-                        maxcomp = self.revlog.delta_config.upper_bound_comp
-                        basenotsnap = (self.p1, self.p2, nullrev)
-                        if rev not in basenotsnap and self.revlog.issnapshot(rev):
-                            snapshotdepth = self.revlog.snapshotdepth(rev)
-                            # If text is significantly larger than the base, we can
-                            # expect the resulting delta to be proportional to the size
-                            # difference
-                            revsize = self.revlog.rawsize(rev)
-                            rawsizedistance = max(self.textlen - revsize, 0)
-                            # use an estimate of the compression upper bound.
-                            lowestrealisticdeltalen = rawsizedistance // maxcomp
-                            # check the absolute constraint on the delta size
-                            snapshotlimit = self.textlen >> snapshotdepth
-                            if snapshotlimit < lowestrealisticdeltalen:
-                                # delta lower bound is larger than accepted upper
-                                # bound
-                                return False
-                            # check the relative constraint on the delta size
-                            revlength = self.revlog.length(rev)
-                            if revlength < lowestrealisticdeltalen:
-                                # delta probable lower bound is larger than target
-                                # base
-                                return False
-                    return True
                 def _pre_filter_rev_delta_chain(self, rev):
                     """pre filtering that is needed in sparse revlog cases
                     return True if it seems okay to test a rev, False otherwise.
                     used by _pre_filter_rev.
                     """
                     deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
                     # filter out delta base that will never produce good delta
                     #
                     # if the delta of that base is already bigger than the limit
                     # for the delta chain size, doing a delta is hopeless.
                     if deltas_limit < self.revlog.length(rev):
                         return False
                     # If we reach here, we are about to build and test a delta.
                     # The delta building process will compute the chaininfo in all
                     # case, since that computation is cached, it is fine to access
                     # it here too.
                     chainlen, chainsize = self.revlog._chaininfo(rev)
                     # if chain will be too long, skip base
                     if (
                         self.revlog.delta_config.max_chain_len
                         and chainlen >= self.revlog.delta_config.max_chain_len
                     ):
                         return False
                     # if chain already have too much data, skip base
                     if deltas_limit < chainsize:
                         return False
                     return True
                 def _pre_filter_rev(self, rev):
                     """return True if it seems okay to test a rev, False otherwise"""
                     if not self._pre_filter_rev_universal(rev):
                         return False
                     if not self._pre_filter_rev_delta_chain(rev):
                         return False
-                    if self.revlog.delta_config.sparse_revlog:
-                        if not self._pre_filter_rev_sparse(rev):
-                            return False
                     return True
                 def _iter_parents(self):
                     # exclude already lazy tested base if any
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     self.current_stage = _STAGE_PARENTS
                     if (
                         not self.revlog.delta_config.delta_both_parents
                         and len(parents) == 2
                     ):
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 def _iter_prev(self):
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     self.current_stage = _STAGE_PREV
                     yield (self.target_rev - 1,)
                 def _iter_groups(self):
                     good = None
                     for group in self._iter_parents():
                         good = yield group
                         if good is not None:
                             break
                     else:
                         assert good is None
                         yield from self._iter_prev()
                     yield None
             class _SparseDeltaSearch(_GeneralDeltaSearch):
                 """Delta search variants for sparse-revlog"""
                 def is_good_delta_info(self, deltainfo):
                     """Returns True if the given delta is good.
                     Good means that it is within the disk span, disk size, and chain length
                     bounds that we know to be performant.
                     """
                     if not self._is_good_delta_info_universal(deltainfo):
                         return False
                     if not self._is_good_delta_info_chain_quality(deltainfo):
                         return False
                     if not self._is_good_delta_info_snapshot_constraints(deltainfo):
                         return False
                     return True
                 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
                     """Returns True if the chain associated with snapshots
                     This performs checks for format that use sparse-revlog and intermediate
                     snapshots.
                     This is used by is_good_delta_info.
                     """
                     # if not a snapshot, this method has no filtering to do
                     if deltainfo.snapshotdepth is None:
                         return True
                     # bad delta from intermediate snapshot size limit
                     #
                     #   If an intermediate snapshot size is higher than the limit.  The
                     #   limit exist to prevent endless chain of intermediate delta to be
                     #   created.
                     if (
                         self.revinfo.textlen >> deltainfo.snapshotdepth
                     ) < deltainfo.deltalen:
                         return False
                     # bad delta if new intermediate snapshot is larger than the previous
                     # snapshot
                     if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
                         return False
                     return True
+                def _pre_filter_rev(self, rev):
+                    """return True if it seems okay to test a rev, False otherwise"""
+                    if not self._pre_filter_rev_universal(rev):
+                        return False
+                    if not self._pre_filter_rev_delta_chain(rev):
+                        return False
+                    if not self._pre_filter_rev_sparse(rev):
+                        return False
+                    return True
+                def _pre_filter_rev_sparse(self, rev):
+                    """pre filtering that is needed in sparse revlog cases
+                    return True if it seems okay to test a rev, False otherwise.
+                    used by _pre_filter_rev.
+                    """
+                    assert self.revlog.delta_config.sparse_revlog
+                    # if the revision we test again is too small, the resulting delta
+                    # will be large anyway as that amount of data to be added is big
+                    if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
+                        return False
+                    if self.revlog.delta_config.upper_bound_comp is not None:
+                        maxcomp = self.revlog.delta_config.upper_bound_comp
+                        basenotsnap = (self.p1, self.p2, nullrev)
+                        if rev not in basenotsnap and self.revlog.issnapshot(rev):
+                            snapshotdepth = self.revlog.snapshotdepth(rev)
+                            # If text is significantly larger than the base, we can
+                            # expect the resulting delta to be proportional to the size
+                            # difference
+                            revsize = self.revlog.rawsize(rev)
+                            rawsizedistance = max(self.textlen - revsize, 0)
+                            # use an estimate of the compression upper bound.
+                            lowestrealisticdeltalen = rawsizedistance // maxcomp
+                            # check the absolute constraint on the delta size
+                            snapshotlimit = self.textlen >> snapshotdepth
+                            if snapshotlimit < lowestrealisticdeltalen:
+                                # delta lower bound is larger than accepted upper
+                                # bound
+                                return False
+                            # check the relative constraint on the delta size
+                            revlength = self.revlog.length(rev)
+                            if revlength < lowestrealisticdeltalen:
+                                # delta probable lower bound is larger than target
+                                # base
+                                return False
+                    return True
                 def _iter_snapshots_base(self):
                     assert self.revlog.delta_config.sparse_revlog
                     assert self.current_stage == _STAGE_SNAPSHOT
                     prev = self.target_rev - 1
                     deltachain = lambda rev: self.revlog._deltachain(rev)[0]
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     if not parents:
                         return
                     # See if we can use an existing snapshot in the parent chains to
                     # use as a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain map: snapshot-level:
                     # snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not self.revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     self.snapshot_cache.update(self.revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily
                     # enumerate over the chains. That would be more efficient, however
                     # we stick to simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in self.revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated
                         # intermediate snapshot that are deeper than the ones from the
                         # parent delta chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not self.revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base We test them
                     # from highest to lowest level. High level one are more likely to
                     # result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(self.snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we
                         # check if an existing snapshot, children of base we consider,
                         # would be suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less
                             # chances to be valid base since our ancestors had to
                             # create a new snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base
                     # for an intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains.
                     # Creating more and more snapshot as the repository grow.
                     full = [
                         r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
                     ]
                     yield tuple(sorted(full))
                 def _iter_snapshots(self):
                     assert self.revlog.delta_config.sparse_revlog
                     self.current_stage = _STAGE_SNAPSHOT
                     good = None
                     groups = self._iter_snapshots_base()
                     for candidates in groups:
                         good = yield candidates
                         if good is not None:
                             break
                     # if we have a refinable value, try to refine it
                     if (
                         good is not None
                         and good not in (self.p1, self.p2)
                         and self.revlog.issnapshot(good)
                     ):
                         assert self.current_stage == _STAGE_SNAPSHOT
                         # refine snapshot down
                         previous = None
                         while previous != good:
                             previous = good
                             base = self.revlog.deltaparent(good)
                             if base == nullrev:
                                 break
                             good = yield (base,)
                         # refine snapshot up
                         if not self.snapshot_cache.snapshots:
                             self.snapshot_cache.update(self.revlog, good + 1)
                         previous = None
                         while good != previous:
                             previous = good
                             children = tuple(
                                 sorted(c for c in self.snapshot_cache.snapshots[good])
                             )
                             good = yield children
                     yield None
                 def _iter_groups(self):
                     good = None
                     for group in self._iter_parents():
                         good = yield group
                         if good is not None:
                             break
                     else:
                         assert good is None
                         assert self.revlog.delta_config.sparse_revlog
                         # If sparse revlog is enabled, we can try to refine the
                         # available deltas
                         iter_snap = self._iter_snapshots()
                         group = iter_snap.send(None)
                         while group is not None:
                             good = yield group
                             group = iter_snap.send(good)
                     yield None
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 """object capable of computing delta and finding delta for multiple revision
                 This object is meant to compute and find multiple delta applied to the same
                 revlog.
                 """
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(
                     self, revinfo, base, target_rev=None, as_snapshot=False
                 ):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog.delta_config.sparse_revlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog.delta_config.sparse_revlog and as_snapshot:
                         assert revlog.issnapshot(deltabase)
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog.delta_config.lazy_delta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
                         lowestrealisticdeltalen = (
                             len(delta) // revlog.delta_config.upper_bound_comp
                         )
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog._inner.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog._inner.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = -1
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     # should we try to build a delta?
                     if not (len(self.revlog) and self.revlog._storedeltachains):
                         search_cls = _NoDeltaSearch
                     elif self.revlog.delta_config.sparse_revlog:
                         search_cls = _SparseDeltaSearch
                     elif self.revlog.delta_config.general_delta:
                         search_cls = _GeneralDeltaSearch
                     else:
                         # before general delta, there is only one possible delta base
                         search_cls = _PrevDeltaSearch
                     search = search_cls(
                         self.revlog,
                         revinfo,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     while not search.done:
                         current_group = search.current_group
                         # current_group can be `None`, but not is search.done is False
                         # We add this assert to help pytype
                         assert current_group is not None
                         candidaterevs = current_group
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                                 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if search.is_good_delta_info(candidatedelta):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         search.next_group(deltainfo)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = -1
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     dbg = {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                     target_revlog = b"UNKNOWN"
                     target_type = self.revlog.target[0]
                     target_key = self.revlog.target[1]
                     if target_type == KIND_CHANGELOG:
                         target_revlog = b'CHANGELOG:'
                     elif target_type == KIND_MANIFESTLOG:
                         target_revlog = b'MANIFESTLOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     elif target_type == KIND_FILELOG:
                         target_revlog = b'FILELOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     dbg['target-revlog'] = target_revlog
                     return dbg
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo