upstream/mercurial-mirror Commit - r52232:9a1239c3

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

18

from .constants import (

18

from .constants import (

19

COMP_MODE_DEFAULT,

19

COMP_MODE_DEFAULT,

20

COMP_MODE_INLINE,

20

COMP_MODE_INLINE,

21

COMP_MODE_PLAIN,

21

COMP_MODE_PLAIN,

22

DELTA_BASE_REUSE_FORCE,

22

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

from .. import revlog

50

from .. import revlog

51

52

self._data = data

52

self._data = data

53

self.data_config = revlog.DataConfig()

53

self.data_config = revlog.DataConfig()

54

self.data_config.sr_density_threshold = density

54

self.data_config.sr_density_threshold = density

55

self.data_config.sr_min_gap_size = mingap

55

self.data_config.sr_min_gap_size = mingap

56

self.delta_config = revlog.DeltaConfig()

56

self.delta_config = revlog.DeltaConfig()

57

self.feature_config = revlog.FeatureConfig()

57

self.feature_config = revlog.FeatureConfig()

58

self._snapshot = set(snapshot)

58

self._snapshot = set(snapshot)

59

self.index = None

59

self.index = None

60

61

def start(self, rev):

61

def start(self, rev):

62

if rev == nullrev:

62

if rev == nullrev:

63

return 0

63

return 0

64

if rev == 0:

64

if rev == 0:

65

return 0

65

return 0

66

return self._data[rev - 1]

66

return self._data[rev - 1]

67

68

def end(self, rev):

68

def end(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return 0

70

return 0

71

return self._data[rev]

71

return self._data[rev]

72

73

def length(self, rev):

73

def length(self, rev):

74

return self.end(rev) - self.start(rev)

74

return self.end(rev) - self.start(rev)

75

76

def __len__(self):

76

def __len__(self):

77

return len(self._data)

77

return len(self._data)

78

79

def issnapshot(self, rev):

79

def issnapshot(self, rev):

80

if rev == nullrev:

80

if rev == nullrev:

81

return True

81

return True

82

return rev in self._snapshot

82

return rev in self._snapshot

83

84

85

def slicechunk(revlog, revs, targetsize=None):

85

def slicechunk(revlog, revs, targetsize=None):

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

87

88

``revs`` is sliced into groups that should be read in one time.

88

``revs`` is sliced into groups that should be read in one time.

89

Assume that revs are sorted.

89

Assume that revs are sorted.

90

91

The initial chunk is sliced until the overall density (payload/chunks-span

91

The initial chunk is sliced until the overall density (payload/chunks-span

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

93

than `revlog.data_config.sr_min_gap_size` is skipped.

93

than `revlog.data_config.sr_min_gap_size` is skipped.

94

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

96

For consistency with other slicing choice, this limit won't go lower than

96

For consistency with other slicing choice, this limit won't go lower than

97

`revlog.data_config.sr_min_gap_size`.

97

`revlog.data_config.sr_min_gap_size`.

98

99

If individual revisions chunk are larger than this limit, they will still

99

If individual revisions chunk are larger than this limit, they will still

100

be raised individually.

100

be raised individually.

101

102

>>> data = [

102

>>> data = [

103

... 5, #00 (5)

103

... 5, #00 (5)

104

... 10, #01 (5)

104

... 10, #01 (5)

105

... 12, #02 (2)

105

... 12, #02 (2)

106

... 12, #03 (empty)

106

... 12, #03 (empty)

107

... 27, #04 (15)

107

... 27, #04 (15)

108

... 31, #05 (4)

108

... 31, #05 (4)

109

... 31, #06 (empty)

109

... 31, #06 (empty)

110

... 42, #07 (11)

110

... 42, #07 (11)

111

... 47, #08 (5)

111

... 47, #08 (5)

112

... 47, #09 (empty)

112

... 47, #09 (empty)

113

... 48, #10 (1)

113

... 48, #10 (1)

114

... 51, #11 (3)

114

... 51, #11 (3)

115

... 74, #12 (23)

115

... 74, #12 (23)

116

... 85, #13 (11)

116

... 85, #13 (11)

117

... 86, #14 (1)

117

... 86, #14 (1)

118

... 91, #15 (5)

118

... 91, #15 (5)

119

... ]

119

... ]

120

>>> revlog = _testrevlog(data, snapshot=range(16))

120

>>> revlog = _testrevlog(data, snapshot=range(16))

121

122

>>> list(slicechunk(revlog, list(range(16))))

122

>>> list(slicechunk(revlog, list(range(16))))

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

124

>>> list(slicechunk(revlog, [0, 15]))

124

>>> list(slicechunk(revlog, [0, 15]))

125

[[0], [15]]

125

[[0], [15]]

126

>>> list(slicechunk(revlog, [0, 11, 15]))

126

>>> list(slicechunk(revlog, [0, 11, 15]))

127

[[0], [11], [15]]

127

[[0], [11], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

129

[[0], [11, 13, 15]]

129

[[0], [11, 13, 15]]

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

131

[[1, 2], [5, 8, 10, 11], [14]]

131

[[1, 2], [5, 8, 10, 11], [14]]

132

133

Slicing with a maximum chunk size

133

Slicing with a maximum chunk size

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

135

[[0], [11], [13], [15]]

135

[[0], [11], [13], [15]]

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

137

[[0], [11], [13, 15]]

137

[[0], [11], [13, 15]]

138

139

Slicing involving nullrev

139

Slicing involving nullrev

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

141

[[-1, 0], [11], [13, 15]]

141

[[-1, 0], [11], [13, 15]]

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

143

[[-1], [13], [15]]

143

[[-1], [13], [15]]

144

"""

144

"""

145

if targetsize is not None:

145

if targetsize is not None:

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

147

# targetsize should not be specified when evaluating delta candidates:

147

# targetsize should not be specified when evaluating delta candidates:

148

# * targetsize is used to ensure we stay within specification when reading,

148

# * targetsize is used to ensure we stay within specification when reading,

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

150

if densityslicing is None:

150

if densityslicing is None:

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

152

for chunk in densityslicing(

152

for chunk in densityslicing(

153

revs,

153

revs,

154

revlog.data_config.sr_density_threshold,

154

revlog.data_config.sr_density_threshold,

155

revlog.data_config.sr_min_gap_size,

155

revlog.data_config.sr_min_gap_size,

156

):

156

):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

yield subchunk

158

yield subchunk

159

160

161

def _slicechunktosize(revlog, revs, targetsize=None):

161

def _slicechunktosize(revlog, revs, targetsize=None):

162

"""slice revs to match the target size

162

"""slice revs to match the target size

163

164

This is intended to be used on chunk that density slicing selected by that

164

This is intended to be used on chunk that density slicing selected by that

165

are still too large compared to the read garantee of revlog. This might

165

are still too large compared to the read garantee of revlog. This might

166

happens when "minimal gap size" interrupted the slicing or when chain are

166

happens when "minimal gap size" interrupted the slicing or when chain are

167

built in a way that create large blocks next to each other.

167

built in a way that create large blocks next to each other.

168

169

>>> data = [

169

>>> data = [

170

... 3, #0 (3)

170

... 3, #0 (3)

171

... 5, #1 (2)

171

... 5, #1 (2)

172

... 6, #2 (1)

172

... 6, #2 (1)

173

... 8, #3 (2)

173

... 8, #3 (2)

174

... 8, #4 (empty)

174

... 8, #4 (empty)

175

... 11, #5 (3)

175

... 11, #5 (3)

176

... 12, #6 (1)

176

... 12, #6 (1)

177

... 13, #7 (1)

177

... 13, #7 (1)

178

... 14, #8 (1)

178

... 14, #8 (1)

179

... ]

179

... ]

180

181

== All snapshots cases ==

181

== All snapshots cases ==

182

>>> revlog = _testrevlog(data, snapshot=range(9))

182

>>> revlog = _testrevlog(data, snapshot=range(9))

183

184

Cases where chunk is already small enough

184

Cases where chunk is already small enough

185

>>> list(_slicechunktosize(revlog, [0], 3))

185

>>> list(_slicechunktosize(revlog, [0], 3))

186

[[0]]

186

[[0]]

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

[[6, 7]]

188

[[6, 7]]

189

>>> list(_slicechunktosize(revlog, [0], None))

189

>>> list(_slicechunktosize(revlog, [0], None))

190

[[0]]

190

[[0]]

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

[[6, 7]]

192

[[6, 7]]

193

194

cases where we need actual slicing

194

cases where we need actual slicing

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

[[0], [1]]

196

[[0], [1]]

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

[[1], [3]]

198

[[1], [3]]

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

[[1, 2], [3]]

200

[[1, 2], [3]]

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

[[3], [5]]

202

[[3], [5]]

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

[[3], [5]]

204

[[3], [5]]

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

[[5], [6, 7, 8]]

206

[[5], [6, 7, 8]]

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

210

Case with too large individual chunk (must return valid chunk)

210

Case with too large individual chunk (must return valid chunk)

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

[[0], [1]]

212

[[0], [1]]

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

[[1], [3]]

214

[[1], [3]]

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

[[3], [5]]

216

[[3], [5]]

217

218

== No Snapshot cases ==

218

== No Snapshot cases ==

219

>>> revlog = _testrevlog(data)

219

>>> revlog = _testrevlog(data)

220

221

Cases where chunk is already small enough

221

Cases where chunk is already small enough

222

>>> list(_slicechunktosize(revlog, [0], 3))

222

>>> list(_slicechunktosize(revlog, [0], 3))

223

[[0]]

223

[[0]]

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

[[6, 7]]

225

[[6, 7]]

226

>>> list(_slicechunktosize(revlog, [0], None))

226

>>> list(_slicechunktosize(revlog, [0], None))

227

[[0]]

227

[[0]]

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

[[6, 7]]

229

[[6, 7]]

230

231

cases where we need actual slicing

231

cases where we need actual slicing

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

[[0], [1]]

233

[[0], [1]]

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

[[1], [3]]

235

[[1], [3]]

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

[[1], [2, 3]]

237

[[1], [2, 3]]

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

[[3], [5]]

239

[[3], [5]]

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

[[3], [4, 5]]

241

[[3], [4, 5]]

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

[[5], [6, 7, 8]]

243

[[5], [6, 7, 8]]

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

247

Case with too large individual chunk (must return valid chunk)

247

Case with too large individual chunk (must return valid chunk)

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

[[0], [1]]

249

[[0], [1]]

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

[[1], [3]]

251

[[1], [3]]

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

[[3], [5]]

253

[[3], [5]]

254

255

== mixed case ==

255

== mixed case ==

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

"""

259

"""

260

assert targetsize is None or 0 <= targetsize

260

assert targetsize is None or 0 <= targetsize

261

startdata = revlog.start(revs[0])

261

startdata = revlog.start(revs[0])

262

enddata = revlog.end(revs[-1])

262

enddata = revlog.end(revs[-1])

263

fullspan = enddata - startdata

263

fullspan = enddata - startdata

264

if targetsize is None or fullspan <= targetsize:

264

if targetsize is None or fullspan <= targetsize:

265

yield revs

265

yield revs

266

return

266

return

267

268

startrevidx = 0

268

startrevidx = 0

269

endrevidx = 1

269

endrevidx = 1

270

iterrevs = enumerate(revs)

270

iterrevs = enumerate(revs)

271

next(iterrevs) # skip first rev.

271

next(iterrevs) # skip first rev.

272

# first step: get snapshots out of the way

272

# first step: get snapshots out of the way

273

for idx, r in iterrevs:

273

for idx, r in iterrevs:

274

span = revlog.end(r) - startdata

274

span = revlog.end(r) - startdata

275

snapshot = revlog.issnapshot(r)

275

snapshot = revlog.issnapshot(r)

276

if span <= targetsize and snapshot:

276

if span <= targetsize and snapshot:

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

else:

278

else:

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

if chunk:

280

if chunk:

281

yield chunk

281

yield chunk

282

startrevidx = idx

282

startrevidx = idx

283

startdata = revlog.start(r)

283

startdata = revlog.start(r)

284

endrevidx = idx + 1

284

endrevidx = idx + 1

285

if not snapshot:

285

if not snapshot:

286

break

286

break

287

288

# for the others, we use binary slicing to quickly converge toward valid

288

# for the others, we use binary slicing to quickly converge toward valid

289

# chunks (otherwise, we might end up looking for start/end of many

289

# chunks (otherwise, we might end up looking for start/end of many

290

# revisions). This logic is not looking for the perfect slicing point, it

290

# revisions). This logic is not looking for the perfect slicing point, it

291

# focuses on quickly converging toward valid chunks.

291

# focuses on quickly converging toward valid chunks.

292

nbitem = len(revs)

292

nbitem = len(revs)

293

while (enddata - startdata) > targetsize:

293

while (enddata - startdata) > targetsize:

294

endrevidx = nbitem

294

endrevidx = nbitem

295

if nbitem - startrevidx <= 1:

295

if nbitem - startrevidx <= 1:

296

break # protect against individual chunk larger than limit

296

break # protect against individual chunk larger than limit

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

while span > targetsize:

299

while span > targetsize:

300

if endrevidx - startrevidx <= 1:

300

if endrevidx - startrevidx <= 1:

301

break # protect against individual chunk larger than limit

301

break # protect against individual chunk larger than limit

302

endrevidx -= (endrevidx - startrevidx) // 2

302

endrevidx -= (endrevidx - startrevidx) // 2

303

localenddata = revlog.end(revs[endrevidx - 1])

303

localenddata = revlog.end(revs[endrevidx - 1])

304

span = localenddata - startdata

304

span = localenddata - startdata

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

startrevidx = endrevidx

308

startrevidx = endrevidx

309

startdata = revlog.start(revs[startrevidx])

309

startdata = revlog.start(revs[startrevidx])

310

311

chunk = _trimchunk(revlog, revs, startrevidx)

311

chunk = _trimchunk(revlog, revs, startrevidx)

312

if chunk:

312

if chunk:

313

yield chunk

313

yield chunk

314

315

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

319

``revs`` is sliced into groups that should be read in one time.

319

``revs`` is sliced into groups that should be read in one time.

320

Assume that revs are sorted.

320

Assume that revs are sorted.

321

322

The initial chunk is sliced until the overall density (payload/chunks-span

322

The initial chunk is sliced until the overall density (payload/chunks-span

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

skipped.

324

skipped.

325

326

>>> revlog = _testrevlog([

326

>>> revlog = _testrevlog([

327

... 5, #00 (5)

327

... 5, #00 (5)

328

... 10, #01 (5)

328

... 10, #01 (5)

329

... 12, #02 (2)

329

... 12, #02 (2)

330

... 12, #03 (empty)

330

... 12, #03 (empty)

331

... 27, #04 (15)

331

... 27, #04 (15)

332

... 31, #05 (4)

332

... 31, #05 (4)

333

... 31, #06 (empty)

333

... 31, #06 (empty)

334

... 42, #07 (11)

334

... 42, #07 (11)

335

... 47, #08 (5)

335

... 47, #08 (5)

336

... 47, #09 (empty)

336

... 47, #09 (empty)

337

... 48, #10 (1)

337

... 48, #10 (1)

338

... 51, #11 (3)

338

... 51, #11 (3)

339

... 74, #12 (23)

339

... 74, #12 (23)

340

... 85, #13 (11)

340

... 85, #13 (11)

341

... 86, #14 (1)

341

... 86, #14 (1)

342

... 91, #15 (5)

342

... 91, #15 (5)

343

... ])

343

... ])

344

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

[[0], [15]]

348

[[0], [15]]

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

[[0], [11], [15]]

350

[[0], [11], [15]]

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

[[0], [11, 13, 15]]

352

[[0], [11, 13, 15]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

[[1, 2], [5, 8, 10, 11], [14]]

354

[[1, 2], [5, 8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... mingapsize=20))

356

... mingapsize=20))

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

... targetdensity=0.95))

359

... targetdensity=0.95))

360

[[1, 2], [5], [8, 10, 11], [14]]

360

[[1, 2], [5], [8, 10, 11], [14]]

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

... targetdensity=0.95, mingapsize=12))

362

... targetdensity=0.95, mingapsize=12))

363

[[1, 2], [5, 8, 10, 11], [14]]

363

[[1, 2], [5, 8, 10, 11], [14]]

364

"""

364

"""

365

start = revlog.start

365

start = revlog.start

366

length = revlog.length

366

length = revlog.length

367

368

if len(revs) <= 1:

368

if len(revs) <= 1:

369

yield revs

369

yield revs

370

return

370

return

371

372

deltachainspan = segmentspan(revlog, revs)

372

deltachainspan = segmentspan(revlog, revs)

373

374

if deltachainspan < mingapsize:

374

if deltachainspan < mingapsize:

375

yield revs

375

yield revs

376

return

376

return

377

378

readdata = deltachainspan

378

readdata = deltachainspan

379

chainpayload = sum(length(r) for r in revs)

379

chainpayload = sum(length(r) for r in revs)

380

381

if deltachainspan:

381

if deltachainspan:

382

density = chainpayload / float(deltachainspan)

382

density = chainpayload / float(deltachainspan)

383

else:

383

else:

384

density = 1.0

384

density = 1.0

385

386

if density >= targetdensity:

386

if density >= targetdensity:

387

yield revs

387

yield revs

388

return

388

return

389

390

# Store the gaps in a heap to have them sorted by decreasing size

390

# Store the gaps in a heap to have them sorted by decreasing size

391

gaps = []

391

gaps = []

392

prevend = None

392

prevend = None

393

for i, rev in enumerate(revs):

393

for i, rev in enumerate(revs):

394

revstart = start(rev)

394

revstart = start(rev)

395

revlen = length(rev)

395

revlen = length(rev)

396

397

# Skip empty revisions to form larger holes

397

# Skip empty revisions to form larger holes

398

if revlen == 0:

398

if revlen == 0:

399

continue

399

continue

400

401

if prevend is not None:

401

if prevend is not None:

402

gapsize = revstart - prevend

402

gapsize = revstart - prevend

403

# only consider holes that are large enough

403

# only consider holes that are large enough

404

if gapsize > mingapsize:

404

if gapsize > mingapsize:

405

gaps.append((gapsize, i))

405

gaps.append((gapsize, i))

406

407

prevend = revstart + revlen

407

prevend = revstart + revlen

408

# sort the gaps to pop them from largest to small

408

# sort the gaps to pop them from largest to small

409

gaps.sort()

409

gaps.sort()

410

411

# Collect the indices of the largest holes until the density is acceptable

411

# Collect the indices of the largest holes until the density is acceptable

412

selected = []

412

selected = []

413

while gaps and density < targetdensity:

413

while gaps and density < targetdensity:

414

gapsize, gapidx = gaps.pop()

414

gapsize, gapidx = gaps.pop()

415

416

selected.append(gapidx)

416

selected.append(gapidx)

417

418

# the gap sizes are stored as negatives to be sorted decreasingly

418

# the gap sizes are stored as negatives to be sorted decreasingly

419

# by the heap

419

# by the heap

420

readdata -= gapsize

420

readdata -= gapsize

421

if readdata > 0:

421

if readdata > 0:

422

density = chainpayload / float(readdata)

422

density = chainpayload / float(readdata)

423

else:

423

else:

424

density = 1.0

424

density = 1.0

425

selected.sort()

425

selected.sort()

426

427

# Cut the revs at collected indices

427

# Cut the revs at collected indices

428

previdx = 0

428

previdx = 0

429

for idx in selected:

429

for idx in selected:

430

431

chunk = _trimchunk(revlog, revs, previdx, idx)

431

chunk = _trimchunk(revlog, revs, previdx, idx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

previdx = idx

435

previdx = idx

436

437

chunk = _trimchunk(revlog, revs, previdx)

437

chunk = _trimchunk(revlog, revs, previdx)

438

if chunk:

438

if chunk:

439

yield chunk

439

yield chunk

440

441

442

def _trimchunk(revlog, revs, startidx, endidx=None):

442

def _trimchunk(revlog, revs, startidx, endidx=None):

443

"""returns revs[startidx:endidx] without empty trailing revs

443

"""returns revs[startidx:endidx] without empty trailing revs

444

445

Doctest Setup

445

Doctest Setup

446

>>> revlog = _testrevlog([

446

>>> revlog = _testrevlog([

447

... 5, #0

447

... 5, #0

448

... 10, #1

448

... 10, #1

449

... 12, #2

449

... 12, #2

450

... 12, #3 (empty)

450

... 12, #3 (empty)

451

... 17, #4

451

... 17, #4

452

... 21, #5

452

... 21, #5

453

... 21, #6 (empty)

453

... 21, #6 (empty)

454

... ])

454

... ])

455

456

Contiguous cases:

456

Contiguous cases:

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

[0, 1, 2, 3, 4, 5]

458

[0, 1, 2, 3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

[0, 1, 2, 3, 4]

460

[0, 1, 2, 3, 4]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

[0, 1, 2]

462

[0, 1, 2]

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

[2]

464

[2]

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

[3, 4, 5]

466

[3, 4, 5]

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

[3, 4]

468

[3, 4]

469

470

Discontiguous cases:

470

Discontiguous cases:

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

[1, 3, 5]

472

[1, 3, 5]

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

[1]

474

[1]

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

[3, 5]

476

[3, 5]

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

[3, 5]

478

[3, 5]

479

"""

479

"""

480

length = revlog.length

480

length = revlog.length

481

482

if endidx is None:

482

if endidx is None:

483

endidx = len(revs)

483

endidx = len(revs)

484

485

# If we have a non-emtpy delta candidate, there are nothing to trim

485

# If we have a non-emtpy delta candidate, there are nothing to trim

486

if revs[endidx - 1] < len(revlog):

486

if revs[endidx - 1] < len(revlog):

487

# Trim empty revs at the end, except the very first revision of a chain

487

# Trim empty revs at the end, except the very first revision of a chain

488

while (

488

while (

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

):

490

):

491

endidx -= 1

491

endidx -= 1

492

493

return revs[startidx:endidx]

493

return revs[startidx:endidx]

494

495

496

def segmentspan(revlog, revs):

496

def segmentspan(revlog, revs):

497

"""Get the byte span of a segment of revisions

497

"""Get the byte span of a segment of revisions

498

499

revs is a sorted array of revision numbers

499

revs is a sorted array of revision numbers

500

501

>>> revlog = _testrevlog([

501

>>> revlog = _testrevlog([

502

... 5, #0

502

... 5, #0

503

... 10, #1

503

... 10, #1

504

... 12, #2

504

... 12, #2

505

... 12, #3 (empty)

505

... 12, #3 (empty)

506

... 17, #4

506

... 17, #4

507

... ])

507

... ])

508

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

17

510

17

511

>>> segmentspan(revlog, [0, 4])

511

>>> segmentspan(revlog, [0, 4])

512

17

512

17

513

>>> segmentspan(revlog, [3, 4])

513

>>> segmentspan(revlog, [3, 4])

514

5

514

5

515

>>> segmentspan(revlog, [1, 2, 3,])

515

>>> segmentspan(revlog, [1, 2, 3,])

516

7

516

7

517

>>> segmentspan(revlog, [1, 3])

517

>>> segmentspan(revlog, [1, 3])

518

7

518

7

519

"""

519

"""

520

if not revs:

520

if not revs:

521

return 0

521

return 0

522

end = revlog.end(revs[-1])

522

end = revlog.end(revs[-1])

523

return end - revlog.start(revs[0])

523

return end - revlog.start(revs[0])

524

525

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

"""build full text from a (base, delta) pair and other metadata"""

527

"""build full text from a (base, delta) pair and other metadata"""

528

# special case deltas which replace entire base; no need to decode

528

# special case deltas which replace entire base; no need to decode

529

# base revision. this neatly avoids censored bases, which throw when

529

# base revision. this neatly avoids censored bases, which throw when

530

# they're decoded.

530

# they're decoded.

531

hlen = struct.calcsize(b">lll")

531

hlen = struct.calcsize(b">lll")

532

if delta[:hlen] == mdiff.replacediffheader(

532

if delta[:hlen] == mdiff.replacediffheader(

533

revlog.rawsize(baserev), len(delta) - hlen

533

revlog.rawsize(baserev), len(delta) - hlen

534

):

534

):

535

fulltext = delta[hlen:]

535

fulltext = delta[hlen:]

536

else:

536

else:

537

# deltabase is rawtext before changed by flag processors, which is

537

# deltabase is rawtext before changed by flag processors, which is

538

# equivalent to non-raw text

538

# equivalent to non-raw text

539

basetext = revlog.revision(baserev)

539

basetext = revlog.revision(baserev)

540

fulltext = mdiff.patch(basetext, delta)

540

fulltext = mdiff.patch(basetext, delta)

541

542

try:

542

try:

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

if validatehash:

544

if validatehash:

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

if flags & REVIDX_ISCENSORED:

546

if flags & REVIDX_ISCENSORED:

547

raise error.StorageError(

547

raise error.StorageError(

548

_(b'node %s is not censored') % expectednode

548

_(b'node %s is not censored') % expectednode

549

)

549

)

550

except error.CensoredNodeError:

550

except error.CensoredNodeError:

551

# must pass the censored index flag to add censored revisions

551

# must pass the censored index flag to add censored revisions

552

if not flags & REVIDX_ISCENSORED:

552

if not flags & REVIDX_ISCENSORED:

553

raise

553

raise

554

return fulltext

554

return fulltext

555

556

557

@attr.s(slots=True, frozen=True)

557

@attr.s(slots=True, frozen=True)

558

class _deltainfo:

558

class _deltainfo:

559

distance = attr.ib()

559

distance = attr.ib()

560

deltalen = attr.ib()

560

deltalen = attr.ib()

561

data = attr.ib()

561

data = attr.ib()

562

base = attr.ib()

562

base = attr.ib()

563

chainbase = attr.ib()

563

chainbase = attr.ib()

564

chainlen = attr.ib()

564

chainlen = attr.ib()

565

compresseddeltalen = attr.ib()

565

compresseddeltalen = attr.ib()

566

snapshotdepth = attr.ib()

566

snapshotdepth = attr.ib()

567

568

569

def drop_u_compression(delta):

569

def drop_u_compression(delta):

570

"""turn into a "u" (no-compression) into no-compression without header

570

"""turn into a "u" (no-compression) into no-compression without header

571

572

This is useful for revlog format that has better compression method.

572

This is useful for revlog format that has better compression method.

573

"""

573

"""

574

assert delta.data[0] == b'u', delta.data[0]

574

assert delta.data[0] == b'u', delta.data[0]

575

return _deltainfo(

575

return _deltainfo(

576

delta.distance,

576

delta.distance,

577

delta.deltalen - 1,

577

delta.deltalen - 1,

578

(b'', delta.data[1]),

578

(b'', delta.data[1]),

579

delta.base,

579

delta.base,

580

delta.chainbase,

580

delta.chainbase,

581

delta.chainlen,

581

delta.chainlen,

582

delta.compresseddeltalen,

582

delta.compresseddeltalen,

583

delta.snapshotdepth,

583

delta.snapshotdepth,

584

)

584

)

585

586

587

# If a revision's full text is that much bigger than a base candidate full

587

# If a revision's full text is that much bigger than a base candidate full

588

# text's, it is very unlikely that it will produce a valid delta. We no longer

588

# text's, it is very unlikely that it will produce a valid delta. We no longer

589

# consider these candidates.

589

# consider these candidates.

590

LIMIT_BASE2TEXT = 500

590

LIMIT_BASE2TEXT = 500

591

592

593

class _DeltaSearch:

593

class _DeltaSearch:

594

"""perform the search of a good delta for a single revlog revision

594

"""perform the search of a good delta for a single revlog revision

595

596

note: some of the deltacomputer.finddeltainfo logic should probably move

596

note: some of the deltacomputer.finddeltainfo logic should probably move

597

here.

597

here.

598

"""

598

"""

599

600

def __init__(

600

def __init__(

601

self,

601

self,

602

revlog,

602

revlog,

603

revinfo,

603

revinfo,

604

p1,

604

p1,

605

p2,

605

p2,

606

cachedelta,

606

cachedelta,

607

excluded_bases=None,

607

excluded_bases=None,

608

target_rev=None,

608

target_rev=None,

609

snapshot_cache=None,

609

snapshot_cache=None,

610

):

610

):

611

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

611

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

612

# so we should never end up asking such question. Adding the assert as

612

# so we should never end up asking such question. Adding the assert as

613

# a safe-guard to detect anything that would be fishy in this regard.

613

# a safe-guard to detect anything that would be fishy in this regard.

614

assert (

614

assert (

615

cachedelta is None

615

cachedelta is None

616

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

616

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

617

or not revlog.delta_config.general_delta

617

or not revlog.delta_config.general_delta

618

)

618

)

619

self.revlog = revlog

619

self.revlog = revlog

620

self.revinfo = revinfo

620

self.revinfo = revinfo

621

self.textlen = revinfo.textlen

621

self.textlen = revinfo.textlen

622

self.p1 = p1

622

self.p1 = p1

623

self.p2 = p2

623

self.p2 = p2

624

self.cachedelta = cachedelta

624

self.cachedelta = cachedelta

625

self.excluded_bases = excluded_bases

625

self.excluded_bases = excluded_bases

626

if target_rev is None:

626

if target_rev is None:

627

self.target_rev = len(self.revlog)

627

self.target_rev = len(self.revlog)

628

self.target_rev = target_rev

628

self.target_rev = target_rev

629

if snapshot_cache is None:

629

if snapshot_cache is None:

630

# map: base-rev: [snapshot-revs]

630

# map: base-rev: [snapshot-revs]

631

snapshot_cache = SnapshotCache()

631

snapshot_cache = SnapshotCache()

632

self.snapshot_cache = snapshot_cache

632

self.snapshot_cache = snapshot_cache

633

634

self.tested = {nullrev}

634

self.tested = {nullrev}

635

636

self._candidates_iterator = self._candidate_groups()

636

self._candidates_iterator = self._candidate_groups()

637

self._last_good = None

637

self._last_good = None

638

self.current_group = self._candidates_iterator.send(self._last_good)

638

self.current_group = self._candidates_iterator.send(self._last_good)

639

640

@property

640

@property

641

def done(self):

641

def done(self):

642

"""True when all possible candidate have been tested"""

642

"""True when all possible candidate have been tested"""

643

return self.current_group is None

643

return self.current_group is None

644

645

def next_group(self, good_delta=None):

645

def next_group(self, good_delta=None):

646

"""move to the next group to test

646

"""move to the next group to test

647

648

The group of revision to test will be available in

648

The group of revision to test will be available in

649

`self.current_group`. If the previous group had any good delta, the

649

`self.current_group`. If the previous group had any good delta, the

650

best one can be passed as the `good_delta` parameter to help selecting

650

best one can be passed as the `good_delta` parameter to help selecting

651

the next group.

651

the next group.

652

653

If not revision remains to be, `self.done` will be True and

653

If not revision remains to be, `self.done` will be True and

654

`self.current_group` will be None.

654

`self.current_group` will be None.

655

"""

655

"""

656

if good_delta is not None:

656

if good_delta is not None:

657

self._last_good = good_delta.base

657

self._last_good = good_delta.base

658

self.current_group = self._candidates_iterator.send(self._last_good)

658

self.current_group = self._candidates_iterator.send(self._last_good)

659

660

def _candidate_groups(self):

660

def _candidate_groups(self):

661

"""Provides group of revision to be tested as delta base

661

"""Provides group of revision to be tested as delta base

662

663

This top level function focus on emitting groups with unique and

663

This top level function focus on emitting groups with unique and

664

worthwhile content. See _raw_candidate_groups for details about the

664

worthwhile content. See _raw_candidate_groups for details about the

665

group order.

665

group order.

666

"""

666

"""

667

# should we try to build a delta?

667

# should we try to build a delta?

668

if not (len(self.revlog) and self.revlog._storedeltachains):

668

if not (len(self.revlog) and self.revlog._storedeltachains):

669

yield None

669

yield None

670

return

670

return

671

672

if not self.revlog.delta_config.general_delta:

672

if not self.revlog.delta_config.general_delta:

673

# before general delta, there is only one possible delta base

673

# before general delta, there is only one possible delta base

674

yield (self.target_rev - 1,)

674

yield (self.target_rev - 1,)

675

yield None

675

yield None

676

return

676

return

677

678

good = None

678

good = None

679

680

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

680

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

681

682

tested = self.tested # prefetch for speed and code compactness

682

tested = self.tested # prefetch for speed and code compactness

683

candidates = self._refined_groups()

683

candidates = self._refined_groups()

684

while True:

684

while True:

685

temptative = candidates.send(good)

685

temptative = candidates.send(good)

686

if temptative is None:

686

if temptative is None:

687

break

687

break

688

group = self._pre_filter_candidate_revs(temptative)

688

group = self._pre_filter_candidate_revs(temptative)

689

if group:

689

if group:

690

# When the size of the candidate group is big, it can result in

690

# When the size of the candidate group is big, it can result in

691

# a quite significant performance impact. To reduce this, we

691

# a quite significant performance impact. To reduce this, we

692

# can send them in smaller batches until the new batch does not

692

# can send them in smaller batches until the new batch does not

693

# provide any improvements.

693

# provide any improvements.

694

#

694

#

695

# This might reduce the overall efficiency of the compression

695

# This might reduce the overall efficiency of the compression

696

# in some corner cases, but that should also prevent very

696

# in some corner cases, but that should also prevent very

697

# pathological cases from being an issue. (eg. 20 000

697

# pathological cases from being an issue. (eg. 20 000

698

# candidates).

698

# candidates).

699

#

699

#

700

# XXX note that the ordering of the group becomes important as

700

# XXX note that the ordering of the group becomes important as

701

# it now impacts the final result. The current order is

701

# it now impacts the final result. The current order is

702

# unprocessed and can be improved.

702

# unprocessed and can be improved.

703

if group_chunk_size == 0:

703

if group_chunk_size == 0:

704

tested.update(group)

704

tested.update(group)

705

good = yield tuple(group)

705

good = yield tuple(group)

706

else:

706

else:

707

prev_good = good

707

prev_good = good

708

for start in range(0, len(group), group_chunk_size):

708

for start in range(0, len(group), group_chunk_size):

709

sub_group = group[start : start + group_chunk_size]

709

sub_group = group[start : start + group_chunk_size]

710

tested.update(sub_group)

710

tested.update(sub_group)

711

good = yield tuple(sub_group)

711

good = yield tuple(sub_group)

712

if prev_good == good:

712

if prev_good == good:

713

break

713

break

714

715

yield None

715

yield None

716

717

def _pre_filter_candidate_revs(self, temptative):

717

def _pre_filter_candidate_revs(self, temptative):

718

"""filter possible candidate before computing a delta

718

"""filter possible candidate before computing a delta

719

720

This function use various criteria to pre-filter candidate delta base

720

This function use various criteria to pre-filter candidate delta base

721

before we compute a delta and evaluate its quality.

721

before we compute a delta and evaluate its quality.

722

723

Such pre-filter limit the number of computed delta, an expensive operation.

723

Such pre-filter limit the number of computed delta, an expensive operation.

724

725

return the updated list of revision to test

725

return the updated list of revision to test

726

"""

726

"""

727

deltalength = self.revlog.length

727

deltalength = self.revlog.length

728

deltaparent = self.revlog.deltaparent

728

deltaparent = self.revlog.deltaparent

729

730

tested = self.tested

730

tested = self.tested

731

group = []

731

group = []

732

for rev in temptative:

732

for rev in temptative:

733

# skip over empty delta (no need to include them in a chain)

733

# skip over empty delta (no need to include them in a chain)

734

while not (rev == nullrev or rev in tested or deltalength(rev)):

734

while not (rev == nullrev or rev in tested or deltalength(rev)):

735

tested.add(rev)

735

tested.add(rev)

736

rev = deltaparent(rev)

736

rev = deltaparent(rev)

737

if self._pre_filter_rev(rev):

737

if self._pre_filter_rev(rev):

738

group.append(rev)

738

group.append(rev)

739

else:

739

else:

740

self.tested.add(rev)

740

self.tested.add(rev)

741

return group

741

return group

742

743

def _pre_filter_rev_universal(self, rev):

743

def _pre_filter_rev_universal(self, rev):

744

"""pre filtering that is need in all cases.

744

"""pre filtering that is need in all cases.

745

746

return True if it seems okay to test a rev, False otherwise.

746

return True if it seems okay to test a rev, False otherwise.

747

748

used by _pre_filter_rev.

748

used by _pre_filter_rev.

749

"""

749

"""

750

# no need to try a delta against nullrev, this will be done as

750

# no need to try a delta against nullrev, this will be done as

751

# a last resort.

751

# a last resort.

752

if rev == nullrev:

752

if rev == nullrev:

753

return False

753

return False

754

# filter out revision we tested already

754

# filter out revision we tested already

755

if rev in self.tested:

755

if rev in self.tested:

756

return False

756

return False

757

758

# an higher authority deamed the base unworthy (e.g. censored)

758

# an higher authority deamed the base unworthy (e.g. censored)

759

if self.excluded_bases is not None and rev in self.excluded_bases:

759

if self.excluded_bases is not None and rev in self.excluded_bases:

760

return False

760

return False

761

# We are in some recomputation cases and that rev is too high

761

# We are in some recomputation cases and that rev is too high

762

# in the revlog

762

# in the revlog

763

if self.target_rev is not None and rev >= self.target_rev:

763

if self.target_rev is not None and rev >= self.target_rev:

764

return False

764

return False

765

# no delta for rawtext-changing revs (see "candelta" for why)

765

# no delta for rawtext-changing revs (see "candelta" for why)

766

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

766

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

767

return False

767

return False

768

return True

768

return True

769

770

def _pre_filter_rev(self, rev):

770

def _pre_filter_rev_sparse(self, rev):

771

"""return True if it seems okay to test a rev, False otherwise"""

771

"""pre filtering that is needed in sparse revlog cases

772

if not self._pre_filter_rev_universal(rev):

773

return False

774

772

775

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

773

return True if it seems okay to test a rev, False otherwise.

776

# filter out delta base that will never produce good delta

777

#

778

# if the delta of that base is already bigger than the limit

779

# for the delta chain size, doing a delta is hopeless.

780

if deltas_limit < self.revlog.length(rev):

781

return False

782

774

783

sparse = self.revlog.delta_config.sparse_revlog

775

used by _pre_filter_rev.

776

"""

777

assert self.revlog.delta_config.sparse_revlog

784

# if the revision we test again is too small, the resulting delta

778

# if the revision we test again is too small, the resulting delta

785

# will be large anyway as that amount of data to be added is big

779

# will be large anyway as that amount of data to be added is big

786

if ~~sparse~~ ~~and~~ self.revlog.rawsize(rev) < (

780

if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):

787

self.textlen // LIMIT_BASE2TEXT

788

):

789

return False

781

return False

790

782

791

# If we reach here, we are about to build and test a delta.

783

if self.revlog.delta_config.upper_bound_comp is not None:

792

# The delta building process will compute the chaininfo in all

793

# case, since that computation is cached, it is fine to access

794

# it here too.

795

chainlen, chainsize = self.revlog._chaininfo(rev)

796

# if chain will be too long, skip base

797

if (

798

self.revlog.delta_config.max_chain_len

799

and chainlen >= self.revlog.delta_config.max_chain_len

800

):

801

return False

802

# if chain already have too much data, skip base

803

if deltas_limit < chainsize:

804

return False

805

if sparse and self.revlog.delta_config.upper_bound_comp is not None:

806

maxcomp = self.revlog.delta_config.upper_bound_comp

784

maxcomp = self.revlog.delta_config.upper_bound_comp

807

basenotsnap = (self.p1, self.p2, nullrev)

785

basenotsnap = (self.p1, self.p2, nullrev)

808

if rev not in basenotsnap and self.revlog.issnapshot(rev):

786

if rev not in basenotsnap and self.revlog.issnapshot(rev):

809

snapshotdepth = self.revlog.snapshotdepth(rev)

787

snapshotdepth = self.revlog.snapshotdepth(rev)

810

# If text is significantly larger than the base, we can

788

# If text is significantly larger than the base, we can

811

# expect the resulting delta to be proportional to the size

789

# expect the resulting delta to be proportional to the size

812

# difference

790

# difference

813

revsize = self.revlog.rawsize(rev)

791

revsize = self.revlog.rawsize(rev)

814

rawsizedistance = max(self.textlen - revsize, 0)

792

rawsizedistance = max(self.textlen - revsize, 0)

815

# use an estimate of the compression upper bound.

793

# use an estimate of the compression upper bound.

816

lowestrealisticdeltalen = rawsizedistance // maxcomp

794

lowestrealisticdeltalen = rawsizedistance // maxcomp

817

795

818

# check the absolute constraint on the delta size

796

# check the absolute constraint on the delta size

819

snapshotlimit = self.textlen >> snapshotdepth

797

snapshotlimit = self.textlen >> snapshotdepth

820

if snapshotlimit < lowestrealisticdeltalen:

798

if snapshotlimit < lowestrealisticdeltalen:

821

# delta lower bound is larger than accepted upper

799

# delta lower bound is larger than accepted upper

822

# bound

800

# bound

823

return False

801

return False

824

802

825

# check the relative constraint on the delta size

803

# check the relative constraint on the delta size

826

revlength = self.revlog.length(rev)

804

revlength = self.revlog.length(rev)

827

if revlength < lowestrealisticdeltalen:

805

if revlength < lowestrealisticdeltalen:

828

# delta probable lower bound is larger than target

806

# delta probable lower bound is larger than target

829

# base

807

# base

830

return False

808

return False

831

return True

809

return True

832

810

811

def _pre_filter_rev(self, rev):

812

"""return True if it seems okay to test a rev, False otherwise"""

813

if not self._pre_filter_rev_universal(rev):

814

return False

815

816

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

817

# filter out delta base that will never produce good delta

818

#

819

# if the delta of that base is already bigger than the limit

820

# for the delta chain size, doing a delta is hopeless.

821

if deltas_limit < self.revlog.length(rev):

822

return False

823

824

# If we reach here, we are about to build and test a delta.

825

# The delta building process will compute the chaininfo in all

826

# case, since that computation is cached, it is fine to access

827

# it here too.

828

chainlen, chainsize = self.revlog._chaininfo(rev)

829

# if chain will be too long, skip base

830

if (

831

self.revlog.delta_config.max_chain_len

832

and chainlen >= self.revlog.delta_config.max_chain_len

833

):

834

return False

835

# if chain already have too much data, skip base

836

if deltas_limit < chainsize:

837

return False

838

839

if self.revlog.delta_config.sparse_revlog:

840

if not self._pre_filter_rev_sparse(rev):

841

return False

842

843

return True

844

833

def _refined_groups(self):

845

def _refined_groups(self):

834

good = None

846

good = None

835

# First we try to reuse a the delta contained in the bundle. (or from

847

# First we try to reuse a the delta contained in the bundle. (or from

836

# the source revlog)

848

# the source revlog)

837

#

849

#

838

# This logic only applies to general delta repositories and can be

850

# This logic only applies to general delta repositories and can be

839

# disabled through configuration. Disabling reuse source delta is

851

# disabled through configuration. Disabling reuse source delta is

840

# useful when we want to make sure we recomputed "optimal" deltas.

852

# useful when we want to make sure we recomputed "optimal" deltas.

841

debug_info = None

853

debug_info = None

842

if (

854

if (

843

self.cachedelta is not None

855

self.cachedelta is not None

844

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

856

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

845

):

857

):

846

# Assume what we received from the server is a good choice

858

# Assume what we received from the server is a good choice

847

# build delta will reuse the cache

859

# build delta will reuse the cache

848

if debug_info is not None:

860

if debug_info is not None:

849

debug_info['cached-delta.tested'] += 1

861

debug_info['cached-delta.tested'] += 1

850

good = yield (self.cachedelta[0],)

862

good = yield (self.cachedelta[0],)

851

if good is not None:

863

if good is not None:

852

if debug_info is not None:

864

if debug_info is not None:

853

debug_info['cached-delta.accepted'] += 1

865

debug_info['cached-delta.accepted'] += 1

854

yield None

866

yield None

855

return

867

return

856

groups = self._raw_groups()

868

groups = self._raw_groups()

857

for candidates in groups:

869

for candidates in groups:

858

good = yield candidates

870

good = yield candidates

859

if good is not None:

871

if good is not None:

860

break

872

break

861

873

862

# If sparse revlog is enabled, we can try to refine the available

874

# If sparse revlog is enabled, we can try to refine the available

863

# deltas

875

# deltas

864

if not self.revlog.delta_config.sparse_revlog:

876

if not self.revlog.delta_config.sparse_revlog:

865

yield None

877

yield None

866

return

878

return

867

879

868

# if we have a refinable value, try to refine it

880

# if we have a refinable value, try to refine it

869

if (

881

if (

870

good is not None

882

good is not None

871

and good not in (self.p1, self.p2)

883

and good not in (self.p1, self.p2)

872

and self.revlog.issnapshot(good)

884

and self.revlog.issnapshot(good)

873

):

885

):

874

# refine snapshot down

886

# refine snapshot down

875

previous = None

887

previous = None

876

while previous != good:

888

while previous != good:

877

previous = good

889

previous = good

878

base = self.revlog.deltaparent(good)

890

base = self.revlog.deltaparent(good)

879

if base == nullrev:

891

if base == nullrev:

880

break

892

break

881

good = yield (base,)

893

good = yield (base,)

882

# refine snapshot up

894

# refine snapshot up

883

if not self.snapshot_cache.snapshots:

895

if not self.snapshot_cache.snapshots:

884

self.snapshot_cache.update(self.revlog, good + 1)

896

self.snapshot_cache.update(self.revlog, good + 1)

885

previous = None

897

previous = None

886

while good != previous:

898

while good != previous:

887

previous = good

899

previous = good

888

children = tuple(

900

children = tuple(

889

sorted(c for c in self.snapshot_cache.snapshots[good])

901

sorted(c for c in self.snapshot_cache.snapshots[good])

890

)

902

)

891

good = yield children

903

good = yield children

892

904

893

if debug_info is not None:

905

if debug_info is not None:

894

if good is None:

906

if good is None:

895

debug_info['no-solution'] += 1

907

debug_info['no-solution'] += 1

896

908

897

yield None

909

yield None

898

910

899

def _raw_groups(self):

911

def _raw_groups(self):

900

"""Provides group of revision to be tested as delta base

912

"""Provides group of revision to be tested as delta base

901

913

902

This lower level function focus on emitting delta theorically

914

This lower level function focus on emitting delta theorically

903

interresting without looking it any practical details.

915

interresting without looking it any practical details.

904

916

905

The group order aims at providing fast or small candidates first.

917

The group order aims at providing fast or small candidates first.

906

"""

918

"""

907

# Why search for delta base if we cannot use a delta base ?

919

# Why search for delta base if we cannot use a delta base ?

908

assert self.revlog.delta_config.general_delta

920

assert self.revlog.delta_config.general_delta

909

# also see issue6056

921

# also see issue6056

910

sparse = self.revlog.delta_config.sparse_revlog

922

sparse = self.revlog.delta_config.sparse_revlog

911

prev = self.target_rev - 1

923

prev = self.target_rev - 1

912

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

924

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

913

925

914

# exclude already lazy tested base if any

926

# exclude already lazy tested base if any

915

parents = [p for p in (self.p1, self.p2) if p != nullrev]

927

parents = [p for p in (self.p1, self.p2) if p != nullrev]

916

928

917

if (

929

if (

918

not self.revlog.delta_config.delta_both_parents

930

not self.revlog.delta_config.delta_both_parents

919

and len(parents) == 2

931

and len(parents) == 2

920

):

932

):

921

parents.sort()

933

parents.sort()

922

# To minimize the chance of having to build a fulltext,

934

# To minimize the chance of having to build a fulltext,

923

# pick first whichever parent is closest to us (max rev)

935

# pick first whichever parent is closest to us (max rev)

924

yield (parents[1],)

936

yield (parents[1],)

925

# then the other one (min rev) if the first did not fit

937

# then the other one (min rev) if the first did not fit

926

yield (parents[0],)

938

yield (parents[0],)

927

elif len(parents) > 0:

939

elif len(parents) > 0:

928

# Test all parents (1 or 2), and keep the best candidate

940

# Test all parents (1 or 2), and keep the best candidate

929

yield parents

941

yield parents

930

942

931

if sparse and parents:

943

if sparse and parents:

932

# See if we can use an existing snapshot in the parent chains to

944

# See if we can use an existing snapshot in the parent chains to

933

# use as a base for a new intermediate-snapshot

945

# use as a base for a new intermediate-snapshot

934

#

946

#

935

# search for snapshot in parents delta chain map: snapshot-level:

947

# search for snapshot in parents delta chain map: snapshot-level:

936

# snapshot-rev

948

# snapshot-rev

937

parents_snaps = collections.defaultdict(set)

949

parents_snaps = collections.defaultdict(set)

938

candidate_chains = [deltachain(p) for p in parents]

950

candidate_chains = [deltachain(p) for p in parents]

939

for chain in candidate_chains:

951

for chain in candidate_chains:

940

for idx, s in enumerate(chain):

952

for idx, s in enumerate(chain):

941

if not self.revlog.issnapshot(s):

953

if not self.revlog.issnapshot(s):

942

break

954

break

943

parents_snaps[idx].add(s)

955

parents_snaps[idx].add(s)

944

snapfloor = min(parents_snaps[0]) + 1

956

snapfloor = min(parents_snaps[0]) + 1

945

self.snapshot_cache.update(self.revlog, snapfloor)

957

self.snapshot_cache.update(self.revlog, snapfloor)

946

# search for the highest "unrelated" revision

958

# search for the highest "unrelated" revision

947

#

959

#

948

# Adding snapshots used by "unrelated" revision increase the odd we

960

# Adding snapshots used by "unrelated" revision increase the odd we

949

# reuse an independant, yet better snapshot chain.

961

# reuse an independant, yet better snapshot chain.

950

#

962

#

951

# XXX instead of building a set of revisions, we could lazily

963

# XXX instead of building a set of revisions, we could lazily

952

# enumerate over the chains. That would be more efficient, however

964

# enumerate over the chains. That would be more efficient, however

953

# we stick to simple code for now.

965

# we stick to simple code for now.

954

all_revs = set()

966

all_revs = set()

955

for chain in candidate_chains:

967

for chain in candidate_chains:

956

all_revs.update(chain)

968

all_revs.update(chain)

957

other = None

969

other = None

958

for r in self.revlog.revs(prev, snapfloor):

970

for r in self.revlog.revs(prev, snapfloor):

959

if r not in all_revs:

971

if r not in all_revs:

960

other = r

972

other = r

961

break

973

break

962

if other is not None:

974

if other is not None:

963

# To avoid unfair competition, we won't use unrelated

975

# To avoid unfair competition, we won't use unrelated

964

# intermediate snapshot that are deeper than the ones from the

976

# intermediate snapshot that are deeper than the ones from the

965

# parent delta chain.

977

# parent delta chain.

966

max_depth = max(parents_snaps.keys())

978

max_depth = max(parents_snaps.keys())

967

chain = deltachain(other)

979

chain = deltachain(other)

968

for depth, s in enumerate(chain):

980

for depth, s in enumerate(chain):

969

if s < snapfloor:

981

if s < snapfloor:

970

continue

982

continue

971

if max_depth < depth:

983

if max_depth < depth:

972

break

984

break

973

if not self.revlog.issnapshot(s):

985

if not self.revlog.issnapshot(s):

974

break

986

break

975

parents_snaps[depth].add(s)

987

parents_snaps[depth].add(s)

976

# Test them as possible intermediate snapshot base We test them

988

# Test them as possible intermediate snapshot base We test them

977

# from highest to lowest level. High level one are more likely to

989

# from highest to lowest level. High level one are more likely to

978

# result in small delta

990

# result in small delta

979

floor = None

991

floor = None

980

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

992

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

981

siblings = set()

993

siblings = set()

982

for s in snaps:

994

for s in snaps:

983

siblings.update(self.snapshot_cache.snapshots[s])

995

siblings.update(self.snapshot_cache.snapshots[s])

984

# Before considering making a new intermediate snapshot, we

996

# Before considering making a new intermediate snapshot, we

985

# check if an existing snapshot, children of base we consider,

997

# check if an existing snapshot, children of base we consider,

986

# would be suitable.

998

# would be suitable.

987

#

999

#

988

# It give a change to reuse a delta chain "unrelated" to the

1000

# It give a change to reuse a delta chain "unrelated" to the

989

# current revision instead of starting our own. Without such

1001

# current revision instead of starting our own. Without such

990

# re-use, topological branches would keep reopening new chains.

1002

# re-use, topological branches would keep reopening new chains.

991

# Creating more and more snapshot as the repository grow.

1003

# Creating more and more snapshot as the repository grow.

992

1004

993

if floor is not None:

1005

if floor is not None:

994

# We only do this for siblings created after the one in our

1006

# We only do this for siblings created after the one in our

995

# parent's delta chain. Those created before has less

1007

# parent's delta chain. Those created before has less

996

# chances to be valid base since our ancestors had to

1008

# chances to be valid base since our ancestors had to

997

# create a new snapshot.

1009

# create a new snapshot.

998

siblings = [r for r in siblings if floor < r]

1010

siblings = [r for r in siblings if floor < r]

999

yield tuple(sorted(siblings))

1011

yield tuple(sorted(siblings))

1000

# then test the base from our parent's delta chain.

1012

# then test the base from our parent's delta chain.

1001

yield tuple(sorted(snaps))

1013

yield tuple(sorted(snaps))

1002

floor = min(snaps)

1014

floor = min(snaps)

1003

# No suitable base found in the parent chain, search if any full

1015

# No suitable base found in the parent chain, search if any full

1004

# snapshots emitted since parent's base would be a suitable base

1016

# snapshots emitted since parent's base would be a suitable base

1005

# for an intermediate snapshot.

1017

# for an intermediate snapshot.

1006

#

1018

#

1007

# It give a chance to reuse a delta chain unrelated to the current

1019

# It give a chance to reuse a delta chain unrelated to the current

1008

# revisions instead of starting our own. Without such re-use,

1020

# revisions instead of starting our own. Without such re-use,

1009

# topological branches would keep reopening new full chains.

1021

# topological branches would keep reopening new full chains.

1010

# Creating more and more snapshot as the repository grow.

1022

# Creating more and more snapshot as the repository grow.

1011

full = [

1023

full = [

1012

r

1024

r

1013

for r in self.snapshot_cache.snapshots[nullrev]

1025

for r in self.snapshot_cache.snapshots[nullrev]

1014

if snapfloor <= r

1026

if snapfloor <= r

1015

]

1027

]

1016

yield tuple(sorted(full))

1028

yield tuple(sorted(full))

1017

1029

1018

if not sparse:

1030

if not sparse:

1019

# other approach failed try against prev to hopefully save us a

1031

# other approach failed try against prev to hopefully save us a

1020

# fulltext.

1032

# fulltext.

1021

yield (prev,)

1033

yield (prev,)

1022

1034

1023

def is_good_delta_info(self, deltainfo):

1035

def is_good_delta_info(self, deltainfo):

1024

"""Returns True if the given delta is good. Good means that it is

1036

"""Returns True if the given delta is good. Good means that it is

1025

within the disk span, disk size, and chain length bounds that we know

1037

within the disk span, disk size, and chain length bounds that we know

1026

to be performant."""

1038

to be performant."""

1027

if deltainfo is None:

1039

if deltainfo is None:

1028

return False

1040

return False

1029

1041

1030

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

1042

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

1031

# so we should never end up asking such question. Adding the assert as

1043

# so we should never end up asking such question. Adding the assert as

1032

# a safe-guard to detect anything that would be fishy in this regard.

1044

# a safe-guard to detect anything that would be fishy in this regard.

1033

assert (

1045

assert (

1034

self.revinfo.cachedelta is None

1046

self.revinfo.cachedelta is None

1035

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

1047

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

1036

or not self.revlog.delta_config.general_delta

1048

or not self.revlog.delta_config.general_delta

1037

)

1049

)

1038

1050

1039

# - 'deltainfo.distance' is the distance from the base revision --

1051

# - 'deltainfo.distance' is the distance from the base revision --

1040

# bounding it limits the amount of I/O we need to do.

1052

# bounding it limits the amount of I/O we need to do.

1041

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

1053

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

1042

# deltas we need to apply -- bounding it limits the amount of CPU

1054

# deltas we need to apply -- bounding it limits the amount of CPU

1043

# we consume.

1055

# we consume.

1044

1056

1045

textlen = self.revinfo.textlen

1057

textlen = self.revinfo.textlen

1046

defaultmax = textlen * 4

1058

defaultmax = textlen * 4

1047

maxdist = self.revlog.delta_config.max_deltachain_span

1059

maxdist = self.revlog.delta_config.max_deltachain_span

1048

if not maxdist:

1060

if not maxdist:

1049

maxdist = deltainfo.distance # ensure the conditional pass

1061

maxdist = deltainfo.distance # ensure the conditional pass

1050

maxdist = max(maxdist, defaultmax)

1062

maxdist = max(maxdist, defaultmax)

1051

1063

1052

# Bad delta from read span:

1064

# Bad delta from read span:

1053

#

1065

#

1054

# If the span of data read is larger than the maximum allowed.

1066

# If the span of data read is larger than the maximum allowed.

1055

#

1067

#

1056

# In the sparse-revlog case, we rely on the associated "sparse

1068

# In the sparse-revlog case, we rely on the associated "sparse

1057

# reading" to avoid issue related to the span of data. In theory, it

1069

# reading" to avoid issue related to the span of data. In theory, it

1058

# would be possible to build pathological revlog where delta pattern

1070

# would be possible to build pathological revlog where delta pattern

1059

# would lead to too many reads. However, they do not happen in

1071

# would lead to too many reads. However, they do not happen in

1060

# practice at all. So we skip the span check entirely.

1072

# practice at all. So we skip the span check entirely.

1061

if (

1073

if (

1062

not self.revlog.delta_config.sparse_revlog

1074

not self.revlog.delta_config.sparse_revlog

1063

and maxdist < deltainfo.distance

1075

and maxdist < deltainfo.distance

1064

):

1076

):

1065

return False

1077

return False

1066

1078

1067

# Bad delta from new delta size:

1079

# Bad delta from new delta size:

1068

#

1080

#

1069

# If the delta size is larger than the target text, storing the delta

1081

# If the delta size is larger than the target text, storing the delta

1070

# will be inefficient.

1082

# will be inefficient.

1071

if textlen < deltainfo.deltalen:

1083

if textlen < deltainfo.deltalen:

1072

return False

1084

return False

1073

1085

1074

# Bad delta from cumulated payload size:

1086

# Bad delta from cumulated payload size:

1075

#

1087

#

1076

# If the sum of delta get larger than K * target text length.

1088

# If the sum of delta get larger than K * target text length.

1077

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

1089

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

1078

return False

1090

return False

1079

1091

1080

# Bad delta from chain length:

1092

# Bad delta from chain length:

1081

#

1093

#

1082

# If the number of delta in the chain gets too high.

1094

# If the number of delta in the chain gets too high.

1083

if (

1095

if (

1084

self.revlog.delta_config.max_chain_len

1096

self.revlog.delta_config.max_chain_len

1085

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

1097

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

1086

):

1098

):

1087

return False

1099

return False

1088

1100

1089

# bad delta from intermediate snapshot size limit

1101

# bad delta from intermediate snapshot size limit

1090

#

1102

#

1091

# If an intermediate snapshot size is higher than the limit. The

1103

# If an intermediate snapshot size is higher than the limit. The

1092

# limit exist to prevent endless chain of intermediate delta to be

1104

# limit exist to prevent endless chain of intermediate delta to be

1093

# created.

1105

# created.

1094

if (

1106

if (

1095

deltainfo.snapshotdepth is not None

1107

deltainfo.snapshotdepth is not None

1096

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

1108

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

1097

):

1109

):

1098

return False

1110

return False

1099

1111

1100

# bad delta if new intermediate snapshot is larger than the previous

1112

# bad delta if new intermediate snapshot is larger than the previous

1101

# snapshot

1113

# snapshot

1102

if (

1114

if (

1103

deltainfo.snapshotdepth

1115

deltainfo.snapshotdepth

1104

and self.revlog.length(deltainfo.base) < deltainfo.deltalen

1116

and self.revlog.length(deltainfo.base) < deltainfo.deltalen

1105

):

1117

):

1106

return False

1118

return False

1107

1119

1108

return True

1120

return True

1109

1121

1110

1122

1111

class SnapshotCache:

1123

class SnapshotCache:

1112

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1124

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1113

1125

1114

def __init__(self):

1126

def __init__(self):

1115

self.snapshots = collections.defaultdict(set)

1127

self.snapshots = collections.defaultdict(set)

1116

self._start_rev = None

1128

self._start_rev = None

1117

self._end_rev = None

1129

self._end_rev = None

1118

1130

1119

def update(self, revlog, start_rev=0):

1131

def update(self, revlog, start_rev=0):

1120

"""find snapshots from start_rev to tip"""

1132

"""find snapshots from start_rev to tip"""

1121

nb_revs = len(revlog)

1133

nb_revs = len(revlog)

1122

end_rev = nb_revs - 1

1134

end_rev = nb_revs - 1

1123

if start_rev > end_rev:

1135

if start_rev > end_rev:

1124

return # range is empty

1136

return # range is empty

1125

1137

1126

if self._start_rev is None:

1138

if self._start_rev is None:

1127

assert self._end_rev is None

1139

assert self._end_rev is None

1128

self._update(revlog, start_rev, end_rev)

1140

self._update(revlog, start_rev, end_rev)

1129

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1141

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1130

if start_rev < self._start_rev:

1142

if start_rev < self._start_rev:

1131

self._update(revlog, start_rev, self._start_rev - 1)

1143

self._update(revlog, start_rev, self._start_rev - 1)

1132

if self._end_rev < end_rev:

1144

if self._end_rev < end_rev:

1133

self._update(revlog, self._end_rev + 1, end_rev)

1145

self._update(revlog, self._end_rev + 1, end_rev)

1134

1146

1135

if self._start_rev is None:

1147

if self._start_rev is None:

1136

assert self._end_rev is None

1148

assert self._end_rev is None

1137

self._end_rev = end_rev

1149

self._end_rev = end_rev

1138

self._start_rev = start_rev

1150

self._start_rev = start_rev

1139

else:

1151

else:

1140

self._start_rev = min(self._start_rev, start_rev)

1152

self._start_rev = min(self._start_rev, start_rev)

1141

self._end_rev = max(self._end_rev, end_rev)

1153

self._end_rev = max(self._end_rev, end_rev)

1142

assert self._start_rev <= self._end_rev, (

1154

assert self._start_rev <= self._end_rev, (

1143

self._start_rev,

1155

self._start_rev,

1144

self._end_rev,

1156

self._end_rev,

1145

)

1157

)

1146

1158

1147

def _update(self, revlog, start_rev, end_rev):

1159

def _update(self, revlog, start_rev, end_rev):

1148

"""internal method that actually do update content"""

1160

"""internal method that actually do update content"""

1149

assert self._start_rev is None or (

1161

assert self._start_rev is None or (

1150

start_rev < self._start_rev or start_rev > self._end_rev

1162

start_rev < self._start_rev or start_rev > self._end_rev

1151

), (self._start_rev, self._end_rev, start_rev, end_rev)

1163

), (self._start_rev, self._end_rev, start_rev, end_rev)

1152

assert self._start_rev is None or (

1164

assert self._start_rev is None or (

1153

end_rev < self._start_rev or end_rev > self._end_rev

1165

end_rev < self._start_rev or end_rev > self._end_rev

1154

), (self._start_rev, self._end_rev, start_rev, end_rev)

1166

), (self._start_rev, self._end_rev, start_rev, end_rev)

1155

cache = self.snapshots

1167

cache = self.snapshots

1156

if hasattr(revlog.index, 'findsnapshots'):

1168

if hasattr(revlog.index, 'findsnapshots'):

1157

revlog.index.findsnapshots(cache, start_rev, end_rev)

1169

revlog.index.findsnapshots(cache, start_rev, end_rev)

1158

else:

1170

else:

1159

deltaparent = revlog.deltaparent

1171

deltaparent = revlog.deltaparent

1160

issnapshot = revlog.issnapshot

1172

issnapshot = revlog.issnapshot

1161

for rev in revlog.revs(start_rev, end_rev):

1173

for rev in revlog.revs(start_rev, end_rev):

1162

if issnapshot(rev):

1174

if issnapshot(rev):

1163

cache[deltaparent(rev)].add(rev)

1175

cache[deltaparent(rev)].add(rev)

1164

1176

1165

1177

1166

class deltacomputer:

1178

class deltacomputer:

1167

"""object capable of computing delta and finding delta for multiple revision

1179

"""object capable of computing delta and finding delta for multiple revision

1168

1180

1169

This object is meant to compute and find multiple delta applied to the same

1181

This object is meant to compute and find multiple delta applied to the same

1170

revlog.

1182

revlog.

1171

"""

1183

"""

1172

1184

1173

def __init__(

1185

def __init__(

1174

self,

1186

self,

1175

revlog,

1187

revlog,

1176

write_debug=None,

1188

write_debug=None,

1177

debug_search=False,

1189

debug_search=False,

1178

debug_info=None,

1190

debug_info=None,

1179

):

1191

):

1180

self.revlog = revlog

1192

self.revlog = revlog

1181

self._write_debug = write_debug

1193

self._write_debug = write_debug

1182

if write_debug is None:

1194

if write_debug is None:

1183

self._debug_search = False

1195

self._debug_search = False

1184

else:

1196

else:

1185

self._debug_search = debug_search

1197

self._debug_search = debug_search

1186

self._debug_info = debug_info

1198

self._debug_info = debug_info

1187

self._snapshot_cache = SnapshotCache()

1199

self._snapshot_cache = SnapshotCache()

1188

1200

1189

@property

1201

@property

1190

def _gather_debug(self):

1202

def _gather_debug(self):

1191

return self._write_debug is not None or self._debug_info is not None

1203

return self._write_debug is not None or self._debug_info is not None

1192

1204

1193

def buildtext(self, revinfo):

1205

def buildtext(self, revinfo):

1194

"""Builds a fulltext version of a revision

1206

"""Builds a fulltext version of a revision

1195

1207

1196

revinfo: revisioninfo instance that contains all needed info

1208

revinfo: revisioninfo instance that contains all needed info

1197

"""

1209

"""

1198

btext = revinfo.btext

1210

btext = revinfo.btext

1199

if btext[0] is not None:

1211

if btext[0] is not None:

1200

return btext[0]

1212

return btext[0]

1201

1213

1202

revlog = self.revlog

1214

revlog = self.revlog

1203

cachedelta = revinfo.cachedelta

1215

cachedelta = revinfo.cachedelta

1204

baserev = cachedelta[0]

1216

baserev = cachedelta[0]

1205

delta = cachedelta[1]

1217

delta = cachedelta[1]

1206

1218

1207

fulltext = btext[0] = _textfromdelta(

1219

fulltext = btext[0] = _textfromdelta(

1208

revlog,

1220

revlog,

1209

baserev,

1221

baserev,

1210

delta,

1222

delta,

1211

revinfo.p1,

1223

revinfo.p1,

1212

revinfo.p2,

1224

revinfo.p2,

1213

revinfo.flags,

1225

revinfo.flags,

1214

revinfo.node,

1226

revinfo.node,

1215

)

1227

)

1216

return fulltext

1228

return fulltext

1217

1229

1218

def _builddeltadiff(self, base, revinfo):

1230

def _builddeltadiff(self, base, revinfo):

1219

revlog = self.revlog

1231

revlog = self.revlog

1220

t = self.buildtext(revinfo)

1232

t = self.buildtext(revinfo)

1221

if revlog.iscensored(base):

1233

if revlog.iscensored(base):

1222

# deltas based on a censored revision must replace the

1234

# deltas based on a censored revision must replace the

1223

# full content in one patch, so delta works everywhere

1235

# full content in one patch, so delta works everywhere

1224

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1236

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1225

delta = header + t

1237

delta = header + t

1226

else:

1238

else:

1227

ptext = revlog.rawdata(base)

1239

ptext = revlog.rawdata(base)

1228

delta = mdiff.textdiff(ptext, t)

1240

delta = mdiff.textdiff(ptext, t)

1229

1241

1230

return delta

1242

return delta

1231

1243

1232

def _builddeltainfo(self, revinfo, base, target_rev=None):

1244

def _builddeltainfo(self, revinfo, base, target_rev=None):

1233

# can we use the cached delta?

1245

# can we use the cached delta?

1234

revlog = self.revlog

1246

revlog = self.revlog

1235

chainbase = revlog.chainbase(base)

1247

chainbase = revlog.chainbase(base)

1236

if revlog.delta_config.general_delta:

1248

if revlog.delta_config.general_delta:

1237

deltabase = base

1249

deltabase = base

1238

else:

1250

else:

1239

if target_rev is not None and base != target_rev - 1:

1251

if target_rev is not None and base != target_rev - 1:

1240

msg = (

1252

msg = (

1241

b'general delta cannot use delta for something else '

1253

b'general delta cannot use delta for something else '

1242

b'than `prev`: %d<-%d'

1254

b'than `prev`: %d<-%d'

1243

)

1255

)

1244

msg %= (base, target_rev)

1256

msg %= (base, target_rev)

1245

raise error.ProgrammingError(msg)

1257

raise error.ProgrammingError(msg)

1246

deltabase = chainbase

1258

deltabase = chainbase

1247

snapshotdepth = None

1259

snapshotdepth = None

1248

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1260

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1249

snapshotdepth = 0

1261

snapshotdepth = 0

1250

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1262

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1251

# A delta chain should always be one full snapshot,

1263

# A delta chain should always be one full snapshot,

1252

# zero or more semi-snapshots, and zero or more deltas

1264

# zero or more semi-snapshots, and zero or more deltas

1253

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1265

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1254

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1266

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1255

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1267

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1256

delta = None

1268

delta = None

1257

if revinfo.cachedelta:

1269

if revinfo.cachedelta:

1258

cachebase = revinfo.cachedelta[0]

1270

cachebase = revinfo.cachedelta[0]

1259

# check if the diff still apply

1271

# check if the diff still apply

1260

currentbase = cachebase

1272

currentbase = cachebase

1261

while (

1273

while (

1262

currentbase != nullrev

1274

currentbase != nullrev

1263

and currentbase != base

1275

and currentbase != base

1264

and self.revlog.length(currentbase) == 0

1276

and self.revlog.length(currentbase) == 0

1265

):

1277

):

1266

currentbase = self.revlog.deltaparent(currentbase)

1278

currentbase = self.revlog.deltaparent(currentbase)

1267

if self.revlog.delta_config.lazy_delta and currentbase == base:

1279

if self.revlog.delta_config.lazy_delta and currentbase == base:

1268

delta = revinfo.cachedelta[1]

1280

delta = revinfo.cachedelta[1]

1269

if delta is None:

1281

if delta is None:

1270

delta = self._builddeltadiff(base, revinfo)

1282

delta = self._builddeltadiff(base, revinfo)

1271

if self._debug_search:

1283

if self._debug_search:

1272

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1284

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1273

msg %= len(delta)

1285

msg %= len(delta)

1274

self._write_debug(msg)

1286

self._write_debug(msg)

1275

# snapshotdept need to be neither None nor 0 level snapshot

1287

# snapshotdept need to be neither None nor 0 level snapshot

1276

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1288

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1277

lowestrealisticdeltalen = (

1289

lowestrealisticdeltalen = (

1278

len(delta) // revlog.delta_config.upper_bound_comp

1290

len(delta) // revlog.delta_config.upper_bound_comp

1279

)

1291

)

1280

snapshotlimit = revinfo.textlen >> snapshotdepth

1292

snapshotlimit = revinfo.textlen >> snapshotdepth

1281

if self._debug_search:

1293

if self._debug_search:

1282

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1294

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1283

msg %= lowestrealisticdeltalen

1295

msg %= lowestrealisticdeltalen

1284

self._write_debug(msg)

1296

self._write_debug(msg)

1285

if snapshotlimit < lowestrealisticdeltalen:

1297

if snapshotlimit < lowestrealisticdeltalen:

1286

if self._debug_search:

1298

if self._debug_search:

1287

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1299

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1288

self._write_debug(msg)

1300

self._write_debug(msg)

1289

return None

1301

return None

1290

if revlog.length(base) < lowestrealisticdeltalen:

1302

if revlog.length(base) < lowestrealisticdeltalen:

1291

if self._debug_search:

1303

if self._debug_search:

1292

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1304

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1293

self._write_debug(msg)

1305

self._write_debug(msg)

1294

return None

1306

return None

1295

header, data = revlog._inner.compress(delta)

1307

header, data = revlog._inner.compress(delta)

1296

deltalen = len(header) + len(data)

1308

deltalen = len(header) + len(data)

1297

offset = revlog.end(len(revlog) - 1)

1309

offset = revlog.end(len(revlog) - 1)

1298

dist = deltalen + offset - revlog.start(chainbase)

1310

dist = deltalen + offset - revlog.start(chainbase)

1299

chainlen, compresseddeltalen = revlog._chaininfo(base)

1311

chainlen, compresseddeltalen = revlog._chaininfo(base)

1300

chainlen += 1

1312

chainlen += 1

1301

compresseddeltalen += deltalen

1313

compresseddeltalen += deltalen

1302

1314

1303

return _deltainfo(

1315

return _deltainfo(

1304

dist,

1316

dist,

1305

deltalen,

1317

deltalen,

1306

(header, data),

1318

(header, data),

1307

deltabase,

1319

deltabase,

1308

chainbase,

1320

chainbase,

1309

chainlen,

1321

chainlen,

1310

compresseddeltalen,

1322

compresseddeltalen,

1311

snapshotdepth,

1323

snapshotdepth,

1312

)

1324

)

1313

1325

1314

def _fullsnapshotinfo(self, revinfo, curr):

1326

def _fullsnapshotinfo(self, revinfo, curr):

1315

rawtext = self.buildtext(revinfo)

1327

rawtext = self.buildtext(revinfo)

1316

data = self.revlog._inner.compress(rawtext)

1328

data = self.revlog._inner.compress(rawtext)

1317

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1329

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1318

deltabase = chainbase = curr

1330

deltabase = chainbase = curr

1319

snapshotdepth = 0

1331

snapshotdepth = 0

1320

chainlen = 1

1332

chainlen = 1

1321

1333

1322

return _deltainfo(

1334

return _deltainfo(

1323

dist,

1335

dist,

1324

deltalen,

1336

deltalen,

1325

data,

1337

data,

1326

deltabase,

1338

deltabase,

1327

chainbase,

1339

chainbase,

1328

chainlen,

1340

chainlen,

1329

compresseddeltalen,

1341

compresseddeltalen,

1330

snapshotdepth,

1342

snapshotdepth,

1331

)

1343

)

1332

1344

1333

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1345

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1334

"""Find an acceptable delta against a candidate revision

1346

"""Find an acceptable delta against a candidate revision

1335

1347

1336

revinfo: information about the revision (instance of _revisioninfo)

1348

revinfo: information about the revision (instance of _revisioninfo)

1337

1349

1338

Returns the first acceptable candidate revision, as ordered by

1350

Returns the first acceptable candidate revision, as ordered by

1339

_candidategroups

1351

_candidategroups

1340

1352

1341

If no suitable deltabase is found, we return delta info for a full

1353

If no suitable deltabase is found, we return delta info for a full

1342

snapshot.

1354

snapshot.

1343

1355

1344

`excluded_bases` is an optional set of revision that cannot be used as

1356

`excluded_bases` is an optional set of revision that cannot be used as

1345

a delta base. Use this to recompute delta suitable in censor or strip

1357

a delta base. Use this to recompute delta suitable in censor or strip

1346

context.

1358

context.

1347

"""

1359

"""

1348

if target_rev is None:

1360

if target_rev is None:

1349

target_rev = len(self.revlog)

1361

target_rev = len(self.revlog)

1350

1362

1351

gather_debug = self._gather_debug

1363

gather_debug = self._gather_debug

1352

cachedelta = revinfo.cachedelta

1364

cachedelta = revinfo.cachedelta

1353

revlog = self.revlog

1365

revlog = self.revlog

1354

p1r = p2r = None

1366

p1r = p2r = None

1355

1367

1356

if excluded_bases is None:

1368

if excluded_bases is None:

1357

excluded_bases = set()

1369

excluded_bases = set()

1358

1370

1359

if gather_debug:

1371

if gather_debug:

1360

start = util.timer()

1372

start = util.timer()

1361

dbg = self._one_dbg_data()

1373

dbg = self._one_dbg_data()

1362

dbg['revision'] = target_rev

1374

dbg['revision'] = target_rev

1363

p1r = revlog.rev(revinfo.p1)

1375

p1r = revlog.rev(revinfo.p1)

1364

p2r = revlog.rev(revinfo.p2)

1376

p2r = revlog.rev(revinfo.p2)

1365

if p1r != nullrev:

1377

if p1r != nullrev:

1366

p1_chain_len = revlog._chaininfo(p1r)[0]

1378

p1_chain_len = revlog._chaininfo(p1r)[0]

1367

else:

1379

else:

1368

p1_chain_len = -1

1380

p1_chain_len = -1

1369

if p2r != nullrev:

1381

if p2r != nullrev:

1370

p2_chain_len = revlog._chaininfo(p2r)[0]

1382

p2_chain_len = revlog._chaininfo(p2r)[0]

1371

else:

1383

else:

1372

p2_chain_len = -1

1384

p2_chain_len = -1

1373

dbg['p1-chain-len'] = p1_chain_len

1385

dbg['p1-chain-len'] = p1_chain_len

1374

dbg['p2-chain-len'] = p2_chain_len

1386

dbg['p2-chain-len'] = p2_chain_len

1375

1387

1376

# 1) if the revision is empty, no amount of delta can beat it

1388

# 1) if the revision is empty, no amount of delta can beat it

1377

#

1389

#

1378

# 2) no delta for flag processor revision (see "candelta" for why)

1390

# 2) no delta for flag processor revision (see "candelta" for why)

1379

# not calling candelta since only one revision needs test, also to

1391

# not calling candelta since only one revision needs test, also to

1380

# avoid overhead fetching flags again.

1392

# avoid overhead fetching flags again.

1381

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1393

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1382

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1394

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1383

if gather_debug:

1395

if gather_debug:

1384

end = util.timer()

1396

end = util.timer()

1385

dbg['duration'] = end - start

1397

dbg['duration'] = end - start

1386

dbg[

1398

dbg[

1387

'delta-base'

1399

'delta-base'

1388

] = deltainfo.base # pytype: disable=attribute-error

1400

] = deltainfo.base # pytype: disable=attribute-error

1389

dbg['search_round_count'] = 0

1401

dbg['search_round_count'] = 0

1390

dbg['using-cached-base'] = False

1402

dbg['using-cached-base'] = False

1391

dbg['delta_try_count'] = 0

1403

dbg['delta_try_count'] = 0

1392

dbg['type'] = b"full"

1404

dbg['type'] = b"full"

1393

dbg['snapshot-depth'] = 0

1405

dbg['snapshot-depth'] = 0

1394

self._dbg_process_data(dbg)

1406

self._dbg_process_data(dbg)

1395

return deltainfo

1407

return deltainfo

1396

1408

1397

deltainfo = None

1409

deltainfo = None

1398

1410

1399

# If this source delta are to be forcibly reuse, let us comply early.

1411

# If this source delta are to be forcibly reuse, let us comply early.

1400

if (

1412

if (

1401

revlog.delta_config.general_delta

1413

revlog.delta_config.general_delta

1402

and revinfo.cachedelta is not None

1414

and revinfo.cachedelta is not None

1403

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1415

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1404

):

1416

):

1405

base = revinfo.cachedelta[0]

1417

base = revinfo.cachedelta[0]

1406

if base == nullrev:

1418

if base == nullrev:

1407

dbg_type = b"full"

1419

dbg_type = b"full"

1408

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1420

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1409

if gather_debug:

1421

if gather_debug:

1410

snapshotdepth = 0

1422

snapshotdepth = 0

1411

elif base not in excluded_bases:

1423

elif base not in excluded_bases:

1412

delta = revinfo.cachedelta[1]

1424

delta = revinfo.cachedelta[1]

1413

header, data = revlog.compress(delta)

1425

header, data = revlog.compress(delta)

1414

deltalen = len(header) + len(data)

1426

deltalen = len(header) + len(data)

1415

if gather_debug:

1427

if gather_debug:

1416

offset = revlog.end(len(revlog) - 1)

1428

offset = revlog.end(len(revlog) - 1)

1417

chainbase = revlog.chainbase(base)

1429

chainbase = revlog.chainbase(base)

1418

distance = deltalen + offset - revlog.start(chainbase)

1430

distance = deltalen + offset - revlog.start(chainbase)

1419

chainlen, compresseddeltalen = revlog._chaininfo(base)

1431

chainlen, compresseddeltalen = revlog._chaininfo(base)

1420

chainlen += 1

1432

chainlen += 1

1421

compresseddeltalen += deltalen

1433

compresseddeltalen += deltalen

1422

if base == p1r or base == p2r:

1434

if base == p1r or base == p2r:

1423

dbg_type = b"delta"

1435

dbg_type = b"delta"

1424

snapshotdepth = None

1436

snapshotdepth = None

1425

elif not revlog.issnapshot(base):

1437

elif not revlog.issnapshot(base):

1426

snapshotdepth = None

1438

snapshotdepth = None

1427

else:

1439

else:

1428

dbg_type = b"snapshot"

1440

dbg_type = b"snapshot"

1429

snapshotdepth = revlog.snapshotdepth(base) + 1

1441

snapshotdepth = revlog.snapshotdepth(base) + 1

1430

else:

1442

else:

1431

distance = None

1443

distance = None

1432

chainbase = None

1444

chainbase = None

1433

chainlen = None

1445

chainlen = None

1434

compresseddeltalen = None

1446

compresseddeltalen = None

1435

snapshotdepth = None

1447

snapshotdepth = None

1436

deltainfo = _deltainfo(

1448

deltainfo = _deltainfo(

1437

distance=distance,

1449

distance=distance,

1438

deltalen=deltalen,

1450

deltalen=deltalen,

1439

data=(header, data),

1451

data=(header, data),

1440

base=base,

1452

base=base,

1441

chainbase=chainbase,

1453

chainbase=chainbase,

1442

chainlen=chainlen,

1454

chainlen=chainlen,

1443

compresseddeltalen=compresseddeltalen,

1455

compresseddeltalen=compresseddeltalen,

1444

snapshotdepth=snapshotdepth,

1456

snapshotdepth=snapshotdepth,

1445

)

1457

)

1446

1458

1447

if deltainfo is not None:

1459

if deltainfo is not None:

1448

if gather_debug:

1460

if gather_debug:

1449

end = util.timer()

1461

end = util.timer()

1450

dbg['duration'] = end - start

1462

dbg['duration'] = end - start

1451

dbg[

1463

dbg[

1452

'delta-base'

1464

'delta-base'

1453

] = deltainfo.base # pytype: disable=attribute-error

1465

] = deltainfo.base # pytype: disable=attribute-error

1454

dbg['search_round_count'] = 0

1466

dbg['search_round_count'] = 0

1455

dbg['using-cached-base'] = True

1467

dbg['using-cached-base'] = True

1456

dbg['delta_try_count'] = 0

1468

dbg['delta_try_count'] = 0

1457

dbg['type'] = b"full"

1469

dbg['type'] = b"full"

1458

if snapshotdepth is None:

1470

if snapshotdepth is None:

1459

dbg['snapshot-depth'] = -1

1471

dbg['snapshot-depth'] = -1

1460

else:

1472

else:

1461

dbg['snapshot-depth'] = snapshotdepth

1473

dbg['snapshot-depth'] = snapshotdepth

1462

self._dbg_process_data(dbg)

1474

self._dbg_process_data(dbg)

1463

return deltainfo

1475

return deltainfo

1464

1476

1465

# count the number of different delta we tried (for debug purpose)

1477

# count the number of different delta we tried (for debug purpose)

1466

dbg_try_count = 0

1478

dbg_try_count = 0

1467

# count the number of "search round" we did. (for debug purpose)

1479

# count the number of "search round" we did. (for debug purpose)

1468

dbg_try_rounds = 0

1480

dbg_try_rounds = 0

1469

dbg_type = b'unknown'

1481

dbg_type = b'unknown'

1470

1482

1471

if p1r is None:

1483

if p1r is None:

1472

p1r = revlog.rev(revinfo.p1)

1484

p1r = revlog.rev(revinfo.p1)

1473

p2r = revlog.rev(revinfo.p2)

1485

p2r = revlog.rev(revinfo.p2)

1474

1486

1475

if self._debug_search:

1487

if self._debug_search:

1476

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1488

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1477

msg %= target_rev

1489

msg %= target_rev

1478

self._write_debug(msg)

1490

self._write_debug(msg)

1479

1491

1480

search = _DeltaSearch(

1492

search = _DeltaSearch(

1481

self.revlog,

1493

self.revlog,

1482

revinfo,

1494

revinfo,

1483

p1r,

1495

p1r,

1484

p2r,

1496

p2r,

1485

cachedelta,

1497

cachedelta,

1486

excluded_bases,

1498

excluded_bases,

1487

target_rev,

1499

target_rev,

1488

snapshot_cache=self._snapshot_cache,

1500

snapshot_cache=self._snapshot_cache,

1489

)

1501

)

1490

1502

1491

while not search.done:

1503

while not search.done:

1492

current_group = search.current_group

1504

current_group = search.current_group

1493

# current_group can be `None`, but not is search.done is False

1505

# current_group can be `None`, but not is search.done is False

1494

# We add this assert to help pytype

1506

# We add this assert to help pytype

1495

assert current_group is not None

1507

assert current_group is not None

1496

candidaterevs = current_group

1508

candidaterevs = current_group

1497

dbg_try_rounds += 1

1509

dbg_try_rounds += 1

1498

if self._debug_search:

1510

if self._debug_search:

1499

prev = None

1511

prev = None

1500

if deltainfo is not None:

1512

if deltainfo is not None:

1501

prev = deltainfo.base

1513

prev = deltainfo.base

1502

1514

1503

if (

1515

if (

1504

cachedelta is not None

1516

cachedelta is not None

1505

and len(candidaterevs) == 1

1517

and len(candidaterevs) == 1

1506

and cachedelta[0] in candidaterevs

1518

and cachedelta[0] in candidaterevs

1507

):

1519

):

1508

round_type = b"cached-delta"

1520

round_type = b"cached-delta"

1509

elif p1r in candidaterevs or p2r in candidaterevs:

1521

elif p1r in candidaterevs or p2r in candidaterevs:

1510

round_type = b"parents"

1522

round_type = b"parents"

1511

elif prev is not None and all(c < prev for c in candidaterevs):

1523

elif prev is not None and all(c < prev for c in candidaterevs):

1512

round_type = b"refine-down"

1524

round_type = b"refine-down"

1513

elif prev is not None and all(c > prev for c in candidaterevs):

1525

elif prev is not None and all(c > prev for c in candidaterevs):

1514

round_type = b"refine-up"

1526

round_type = b"refine-up"

1515

else:

1527

else:

1516

round_type = b"search-down"

1528

round_type = b"search-down"

1517

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1529

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1518

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1530

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1519

self._write_debug(msg)

1531

self._write_debug(msg)

1520

nominateddeltas = []

1532

nominateddeltas = []

1521

if deltainfo is not None:

1533

if deltainfo is not None:

1522

if self._debug_search:

1534

if self._debug_search:

1523

msg = (

1535

msg = (

1524

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1536

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1525

)

1537

)

1526

msg %= (deltainfo.base, deltainfo.deltalen)

1538

msg %= (deltainfo.base, deltainfo.deltalen)

1527

self._write_debug(msg)

1539

self._write_debug(msg)

1528

# if we already found a good delta,

1540

# if we already found a good delta,

1529

# challenge it against refined candidates

1541

# challenge it against refined candidates

1530

nominateddeltas.append(deltainfo)

1542

nominateddeltas.append(deltainfo)

1531

for candidaterev in candidaterevs:

1543

for candidaterev in candidaterevs:

1532

if self._debug_search:

1544

if self._debug_search:

1533

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1545

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1534

msg %= candidaterev

1546

msg %= candidaterev

1535

self._write_debug(msg)

1547

self._write_debug(msg)

1536

candidate_type = None

1548

candidate_type = None

1537

if candidaterev == p1r:

1549

if candidaterev == p1r:

1538

candidate_type = b"p1"

1550

candidate_type = b"p1"

1539

elif candidaterev == p2r:

1551

elif candidaterev == p2r:

1540

candidate_type = b"p2"

1552

candidate_type = b"p2"

1541

elif self.revlog.issnapshot(candidaterev):

1553

elif self.revlog.issnapshot(candidaterev):

1542

candidate_type = b"snapshot-%d"

1554

candidate_type = b"snapshot-%d"

1543

candidate_type %= self.revlog.snapshotdepth(

1555

candidate_type %= self.revlog.snapshotdepth(

1544

candidaterev

1556

candidaterev

1545

)

1557

)

1546

1558

1547

if candidate_type is not None:

1559

if candidate_type is not None:

1548

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1560

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1549

msg %= candidate_type

1561

msg %= candidate_type

1550

self._write_debug(msg)

1562

self._write_debug(msg)

1551

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1563

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1552

msg %= self.revlog.length(candidaterev)

1564

msg %= self.revlog.length(candidaterev)

1553

self._write_debug(msg)

1565

self._write_debug(msg)

1554

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1566

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1555

msg %= self.revlog.deltaparent(candidaterev)

1567

msg %= self.revlog.deltaparent(candidaterev)

1556

self._write_debug(msg)

1568

self._write_debug(msg)

1557

1569

1558

dbg_try_count += 1

1570

dbg_try_count += 1

1559

1571

1560

if self._debug_search:

1572

if self._debug_search:

1561

delta_start = util.timer()

1573

delta_start = util.timer()

1562

candidatedelta = self._builddeltainfo(

1574

candidatedelta = self._builddeltainfo(

1563

revinfo,

1575

revinfo,

1564

candidaterev,

1576

candidaterev,

1565

target_rev=target_rev,

1577

target_rev=target_rev,

1566

)

1578

)

1567

if self._debug_search:

1579

if self._debug_search:

1568

delta_end = util.timer()

1580

delta_end = util.timer()

1569

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1581

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1570

msg %= delta_end - delta_start

1582

msg %= delta_end - delta_start

1571

self._write_debug(msg)

1583

self._write_debug(msg)

1572

if candidatedelta is not None:

1584

if candidatedelta is not None:

1573

if search.is_good_delta_info(candidatedelta):

1585

if search.is_good_delta_info(candidatedelta):

1574

if self._debug_search:

1586

if self._debug_search:

1575

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1587

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1576

msg %= candidatedelta.deltalen

1588

msg %= candidatedelta.deltalen

1577

self._write_debug(msg)

1589

self._write_debug(msg)

1578

nominateddeltas.append(candidatedelta)

1590

nominateddeltas.append(candidatedelta)

1579

elif self._debug_search:

1591

elif self._debug_search:

1580

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1592

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1581

msg %= candidatedelta.deltalen

1593

msg %= candidatedelta.deltalen

1582

self._write_debug(msg)

1594

self._write_debug(msg)

1583

elif self._debug_search:

1595

elif self._debug_search:

1584

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1596

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1585

self._write_debug(msg)

1597

self._write_debug(msg)

1586

if nominateddeltas:

1598

if nominateddeltas:

1587

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1599

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1588

search.next_group(deltainfo)

1600

search.next_group(deltainfo)

1589

1601

1590

if deltainfo is None:

1602

if deltainfo is None:

1591

dbg_type = b"full"

1603

dbg_type = b"full"

1592

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1604

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1593

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1605

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1594

dbg_type = b"snapshot"

1606

dbg_type = b"snapshot"

1595

else:

1607

else:

1596

dbg_type = b"delta"

1608

dbg_type = b"delta"

1597

1609

1598

if gather_debug:

1610

if gather_debug:

1599

end = util.timer()

1611

end = util.timer()

1600

if dbg_type == b'full':

1612

if dbg_type == b'full':

1601

used_cached = (

1613

used_cached = (

1602

cachedelta is not None

1614

cachedelta is not None

1603

and dbg_try_rounds == 0

1615

and dbg_try_rounds == 0

1604

and dbg_try_count == 0

1616

and dbg_try_count == 0

1605

and cachedelta[0] == nullrev

1617

and cachedelta[0] == nullrev

1606

)

1618

)

1607

else:

1619

else:

1608

used_cached = (

1620

used_cached = (

1609

cachedelta is not None

1621

cachedelta is not None

1610

and dbg_try_rounds == 1

1622

and dbg_try_rounds == 1

1611

and dbg_try_count == 1

1623

and dbg_try_count == 1

1612

and deltainfo.base == cachedelta[0]

1624

and deltainfo.base == cachedelta[0]

1613

)

1625

)

1614

dbg['duration'] = end - start

1626

dbg['duration'] = end - start

1615

dbg[

1627

dbg[

1616

'delta-base'

1628

'delta-base'

1617

] = deltainfo.base # pytype: disable=attribute-error

1629

] = deltainfo.base # pytype: disable=attribute-error

1618

dbg['search_round_count'] = dbg_try_rounds

1630

dbg['search_round_count'] = dbg_try_rounds

1619

dbg['using-cached-base'] = used_cached

1631

dbg['using-cached-base'] = used_cached

1620

dbg['delta_try_count'] = dbg_try_count

1632

dbg['delta_try_count'] = dbg_try_count

1621

dbg['type'] = dbg_type

1633

dbg['type'] = dbg_type

1622

if (

1634

if (

1623

deltainfo.snapshotdepth # pytype: disable=attribute-error

1635

deltainfo.snapshotdepth # pytype: disable=attribute-error

1624

is not None

1636

is not None

1625

):

1637

):

1626

dbg[

1638

dbg[

1627

'snapshot-depth'

1639

'snapshot-depth'

1628

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1640

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1629

else:

1641

else:

1630

dbg['snapshot-depth'] = -1

1642

dbg['snapshot-depth'] = -1

1631

self._dbg_process_data(dbg)

1643

self._dbg_process_data(dbg)

1632

return deltainfo

1644

return deltainfo

1633

1645

1634

def _one_dbg_data(self):

1646

def _one_dbg_data(self):

1635

dbg = {

1647

dbg = {

1636

'duration': None,

1648

'duration': None,

1637

'revision': None,

1649

'revision': None,

1638

'delta-base': None,

1650

'delta-base': None,

1639

'search_round_count': None,

1651

'search_round_count': None,

1640

'using-cached-base': None,

1652

'using-cached-base': None,

1641

'delta_try_count': None,

1653

'delta_try_count': None,

1642

'type': None,

1654

'type': None,

1643

'p1-chain-len': None,

1655

'p1-chain-len': None,

1644

'p2-chain-len': None,

1656

'p2-chain-len': None,

1645

'snapshot-depth': None,

1657

'snapshot-depth': None,

1646

'target-revlog': None,

1658

'target-revlog': None,

1647

}

1659

}

1648

target_revlog = b"UNKNOWN"

1660

target_revlog = b"UNKNOWN"

1649

target_type = self.revlog.target[0]

1661

target_type = self.revlog.target[0]

1650

target_key = self.revlog.target[1]

1662

target_key = self.revlog.target[1]

1651

if target_type == KIND_CHANGELOG:

1663

if target_type == KIND_CHANGELOG:

1652

target_revlog = b'CHANGELOG:'

1664

target_revlog = b'CHANGELOG:'

1653

elif target_type == KIND_MANIFESTLOG:

1665

elif target_type == KIND_MANIFESTLOG:

1654

target_revlog = b'MANIFESTLOG:'

1666

target_revlog = b'MANIFESTLOG:'

1655

if target_key:

1667

if target_key:

1656

target_revlog += b'%s:' % target_key

1668

target_revlog += b'%s:' % target_key

1657

elif target_type == KIND_FILELOG:

1669

elif target_type == KIND_FILELOG:

1658

target_revlog = b'FILELOG:'

1670

target_revlog = b'FILELOG:'

1659

if target_key:

1671

if target_key:

1660

target_revlog += b'%s:' % target_key

1672

target_revlog += b'%s:' % target_key

1661

dbg['target-revlog'] = target_revlog

1673

dbg['target-revlog'] = target_revlog

1662

return dbg

1674

return dbg

1663

1675

1664

def _dbg_process_data(self, dbg):

1676

def _dbg_process_data(self, dbg):

1665

if self._debug_info is not None:

1677

if self._debug_info is not None:

1666

self._debug_info.append(dbg)

1678

self._debug_info.append(dbg)

1667

1679

1668

if self._write_debug is not None:

1680

if self._write_debug is not None:

1669

msg = (

1681

msg = (

1670

b"DBG-DELTAS:"

1682

b"DBG-DELTAS:"

1671

b" %-12s"

1683

b" %-12s"

1672

b" rev=%d:"

1684

b" rev=%d:"

1673

b" delta-base=%d"

1685

b" delta-base=%d"

1674

b" is-cached=%d"

1686

b" is-cached=%d"

1675

b" - search-rounds=%d"

1687

b" - search-rounds=%d"

1676

b" try-count=%d"

1688

b" try-count=%d"

1677

b" - delta-type=%-6s"

1689

b" - delta-type=%-6s"

1678

b" snap-depth=%d"

1690

b" snap-depth=%d"

1679

b" - p1-chain-length=%d"

1691

b" - p1-chain-length=%d"

1680

b" p2-chain-length=%d"

1692

b" p2-chain-length=%d"

1681

b" - duration=%f"

1693

b" - duration=%f"

1682

b"\n"

1694

b"\n"

1683

)

1695

)

1684

msg %= (

1696

msg %= (

1685

dbg["target-revlog"],

1697

dbg["target-revlog"],

1686

dbg["revision"],

1698

dbg["revision"],

1687

dbg["delta-base"],

1699

dbg["delta-base"],

1688

dbg["using-cached-base"],

1700

dbg["using-cached-base"],

1689

dbg["search_round_count"],

1701

dbg["search_round_count"],

1690

dbg["delta_try_count"],

1702

dbg["delta_try_count"],

1691

dbg["type"],

1703

dbg["type"],

1692

dbg["snapshot-depth"],

1704

dbg["snapshot-depth"],

1693

dbg["p1-chain-len"],

1705

dbg["p1-chain-len"],

1694

dbg["p2-chain-len"],

1706

dbg["p2-chain-len"],

1695

dbg["duration"],

1707

dbg["duration"],

1696

)

1708

)

1697

self._write_debug(msg)

1709

self._write_debug(msg)

1698

1710

1699

1711

1700

def delta_compression(default_compression_header, deltainfo):

1712

def delta_compression(default_compression_header, deltainfo):

1701

"""return (COMPRESSION_MODE, deltainfo)

1713

"""return (COMPRESSION_MODE, deltainfo)

1702

1714

1703

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1715

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1704

compression.

1716

compression.

1705

"""

1717

"""

1706

h, d = deltainfo.data

1718

h, d = deltainfo.data

1707

compression_mode = COMP_MODE_INLINE

1719

compression_mode = COMP_MODE_INLINE

1708

if not h and not d:

1720

if not h and not d:

1709

# not data to store at all... declare them uncompressed

1721

# not data to store at all... declare them uncompressed

1710

compression_mode = COMP_MODE_PLAIN

1722

compression_mode = COMP_MODE_PLAIN

1711

elif not h:

1723

elif not h:

1712

t = d[0:1]

1724

t = d[0:1]

1713

if t == b'\0':

1725

if t == b'\0':

1714

compression_mode = COMP_MODE_PLAIN

1726

compression_mode = COMP_MODE_PLAIN

1715

elif t == default_compression_header:

1727

elif t == default_compression_header:

1716

compression_mode = COMP_MODE_DEFAULT

1728

compression_mode = COMP_MODE_DEFAULT

1717

elif h == b'u':

1729

elif h == b'u':

1718

# we have a more efficient way to declare uncompressed

1730

# we have a more efficient way to declare uncompressed

1719

h = b''

1731

h = b''

1720

compression_mode = COMP_MODE_PLAIN

1732

compression_mode = COMP_MODE_PLAIN

1721

deltainfo = drop_u_compression(deltainfo)

1733

deltainfo = drop_u_compression(deltainfo)

1722

return compression_mode, deltainfo

1734

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
                 than `revlog.data_config.sr_min_gap_size` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog.data_config.sr_min_gap_size`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs,
                     revlog.data_config.sr_density_threshold,
                     revlog.data_config.sr_min_gap_size,
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             class _DeltaSearch:
                 """perform the search of a good delta for a single revlog revision
                 note: some of the deltacomputer.finddeltainfo logic should probably move
                 here.
                 """
                 def __init__(
                     self,
                     revlog,
                     revinfo,
                     p1,
                     p2,
                     cachedelta,
                     excluded_bases=None,
                     target_rev=None,
                     snapshot_cache=None,
                 ):
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         cachedelta is None
                         or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not revlog.delta_config.general_delta
                     )
                     self.revlog = revlog
                     self.revinfo = revinfo
                     self.textlen = revinfo.textlen
                     self.p1 = p1
                     self.p2 = p2
                     self.cachedelta = cachedelta
                     self.excluded_bases = excluded_bases
                     if target_rev is None:
                         self.target_rev = len(self.revlog)
                     self.target_rev = target_rev
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     self.snapshot_cache = snapshot_cache
                     self.tested = {nullrev}
                     self._candidates_iterator = self._candidate_groups()
                     self._last_good = None
                     self.current_group = self._candidates_iterator.send(self._last_good)
                 @property
                 def done(self):
                     """True when all possible candidate have been tested"""
                     return self.current_group is None
                 def next_group(self, good_delta=None):
                     """move to the next group to test
                     The group of revision to test will be available in
                     `self.current_group`.  If the previous group had any good delta, the
                     best one can be passed as the `good_delta` parameter to help selecting
                     the next group.
                     If not revision remains to be, `self.done` will be True and
                     `self.current_group` will be None.
                     """
                     if good_delta is not None:
                         self._last_good = good_delta.base
                     self.current_group = self._candidates_iterator.send(self._last_good)
                 def _candidate_groups(self):
                     """Provides group of revision to be tested as delta base
                     This top level function focus on emitting groups with unique and
                     worthwhile content. See _raw_candidate_groups for details about the
                     group order.
                     """
                     # should we try to build a delta?
                     if not (len(self.revlog) and self.revlog._storedeltachains):
                         yield None
                         return
                     if not self.revlog.delta_config.general_delta:
                         # before general delta, there is only one possible delta base
                         yield (self.target_rev - 1,)
                         yield None
                         return
                     good = None
                     group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
                     tested = self.tested  # prefetch for speed and code compactness
                     candidates = self._refined_groups()
                     while True:
                         temptative = candidates.send(good)
                         if temptative is None:
                             break
                         group = self._pre_filter_candidate_revs(temptative)
                         if group:
                             # When the size of the candidate group is big, it can result in
                             # a quite significant performance impact. To reduce this, we
                             # can send them in smaller batches until the new batch does not
                             # provide any improvements.
                             #
                             # This might reduce the overall efficiency of the compression
                             # in some corner cases, but that should also prevent very
                             # pathological cases from being an issue. (eg. 20 000
                             # candidates).
                             #
                             # XXX note that the ordering of the group becomes important as
                             # it now impacts the final result. The current order is
                             # unprocessed and can be improved.
                             if group_chunk_size == 0:
                                 tested.update(group)
                                 good = yield tuple(group)
                             else:
                                 prev_good = good
                                 for start in range(0, len(group), group_chunk_size):
                                     sub_group = group[start : start + group_chunk_size]
                                     tested.update(sub_group)
                                     good = yield tuple(sub_group)
                                     if prev_good == good:
                                         break
                     yield None
                 def _pre_filter_candidate_revs(self, temptative):
                     """filter possible candidate before computing a delta
                     This function use various criteria to pre-filter candidate delta base
                     before we compute a delta and evaluate its quality.
                     Such pre-filter limit the number of computed delta, an expensive operation.
                     return the updated list of revision to test
                     """
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     tested = self.tested
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         if self._pre_filter_rev(rev):
                             group.append(rev)
                         else:
                             self.tested.add(rev)
                     return group
                 def _pre_filter_rev_universal(self, rev):
                     """pre filtering that is need in all cases.
                     return True if it seems okay to test a rev, False otherwise.
                     used by _pre_filter_rev.
                     """
                     # no need to try a delta against nullrev, this will be done as
                     # a last resort.
                     if rev == nullrev:
                         return False
                     # filter out revision we tested already
                     if rev in self.tested:
                         return False
                     # an higher authority deamed the base unworthy (e.g. censored)
                     if self.excluded_bases is not None and rev in self.excluded_bases:
                         return False
                     # We are in some recomputation cases and that rev is too high
                     # in the revlog
                     if self.target_rev is not None and rev >= self.target_rev:
                         return False
                     # no delta for rawtext-changing revs (see "candelta" for why)
                     if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return False
                     return True
-                def _pre_filter_rev(self, rev):
+                def _pre_filter_rev_sparse(self, rev):
-                    """return True if it seems okay to test a rev, False otherwise"""
+                    """pre filtering that is needed in sparse revlog cases
-                    if not self._pre_filter_rev_universal(rev):
-                        return False
-                    deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
+                    return True if it seems okay to test a rev, False otherwise.
-                    # filter out delta base that will never produce good delta
-                    # if the delta of that base is already bigger than the limit
-                    # for the delta chain size, doing a delta is hopeless.
-                    if deltas_limit < self.revlog.length(rev):
-                        return False
-                    sparse = self.revlog.delta_config.sparse_revlog
+                    used by _pre_filter_rev.
+                    """
+                    assert self.revlog.delta_config.sparse_revlog
                     # if the revision we test again is too small, the resulting delta
                     # will be large anyway as that amount of data to be added is big
-                    if sparse and self.revlog.rawsize(rev) < (
+                    if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
-                        self.textlen // LIMIT_BASE2TEXT
-                    ):
                         return False
-                    # If we reach here, we are about to build and test a delta.
+                    if self.revlog.delta_config.upper_bound_comp is not None:
-                    # The delta building process will compute the chaininfo in all
-                    # case, since that computation is cached, it is fine to access
-                    # it here too.
-                    chainlen, chainsize = self.revlog._chaininfo(rev)
-                    # if chain will be too long, skip base
-                    if (
-                        self.revlog.delta_config.max_chain_len
-                        and chainlen >= self.revlog.delta_config.max_chain_len
-                    ):
-                        return False
-                    # if chain already have too much data, skip base
-                    if deltas_limit < chainsize:
-                        return False
-                    if sparse and self.revlog.delta_config.upper_bound_comp is not None:
                         maxcomp = self.revlog.delta_config.upper_bound_comp
                         basenotsnap = (self.p1, self.p2, nullrev)
                         if rev not in basenotsnap and self.revlog.issnapshot(rev):
                             snapshotdepth = self.revlog.snapshotdepth(rev)
                             # If text is significantly larger than the base, we can
                             # expect the resulting delta to be proportional to the size
                             # difference
                             revsize = self.revlog.rawsize(rev)
                             rawsizedistance = max(self.textlen - revsize, 0)
                             # use an estimate of the compression upper bound.
                             lowestrealisticdeltalen = rawsizedistance // maxcomp
                             # check the absolute constraint on the delta size
                             snapshotlimit = self.textlen >> snapshotdepth
                             if snapshotlimit < lowestrealisticdeltalen:
                                 # delta lower bound is larger than accepted upper
                                 # bound
                                 return False
                             # check the relative constraint on the delta size
                             revlength = self.revlog.length(rev)
                             if revlength < lowestrealisticdeltalen:
                                 # delta probable lower bound is larger than target
                                 # base
                                 return False
                     return True
+                def _pre_filter_rev(self, rev):
+                    """return True if it seems okay to test a rev, False otherwise"""
+                    if not self._pre_filter_rev_universal(rev):
+                        return False
+                    deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
+                    # filter out delta base that will never produce good delta
+                    #
+                    # if the delta of that base is already bigger than the limit
+                    # for the delta chain size, doing a delta is hopeless.
+                    if deltas_limit < self.revlog.length(rev):
+                        return False
+                    # If we reach here, we are about to build and test a delta.
+                    # The delta building process will compute the chaininfo in all
+                    # case, since that computation is cached, it is fine to access
+                    # it here too.
+                    chainlen, chainsize = self.revlog._chaininfo(rev)
+                    # if chain will be too long, skip base
+                    if (
+                        self.revlog.delta_config.max_chain_len
+                        and chainlen >= self.revlog.delta_config.max_chain_len
+                    ):
+                        return False
+                    # if chain already have too much data, skip base
+                    if deltas_limit < chainsize:
+                        return False
+                    if self.revlog.delta_config.sparse_revlog:
+                        if not self._pre_filter_rev_sparse(rev):
+                            return False
+                    return True
                 def _refined_groups(self):
                     good = None
                     # First we try to reuse a the delta contained in the bundle.  (or from
                     # the source revlog)
                     #
                     # This logic only applies to general delta repositories and can be
                     # disabled through configuration. Disabling reuse source delta is
                     # useful when we want to make sure we recomputed "optimal" deltas.
                     debug_info = None
                     if (
                         self.cachedelta is not None
                         and self.cachedelta[2] > DELTA_BASE_REUSE_NO
                     ):
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         if debug_info is not None:
                             debug_info['cached-delta.tested'] += 1
                         good = yield (self.cachedelta[0],)
                         if good is not None:
                             if debug_info is not None:
                                 debug_info['cached-delta.accepted'] += 1
                             yield None
                             return
                     groups = self._raw_groups()
                     for candidates in groups:
                         good = yield candidates
                         if good is not None:
                             break
                     # If sparse revlog is enabled, we can try to refine the available
                     # deltas
                     if not self.revlog.delta_config.sparse_revlog:
                         yield None
                         return
                     # if we have a refinable value, try to refine it
                     if (
                         good is not None
                         and good not in (self.p1, self.p2)
                         and self.revlog.issnapshot(good)
                     ):
                         # refine snapshot down
                         previous = None
                         while previous != good:
                             previous = good
                             base = self.revlog.deltaparent(good)
                             if base == nullrev:
                                 break
                             good = yield (base,)
                         # refine snapshot up
                         if not self.snapshot_cache.snapshots:
                             self.snapshot_cache.update(self.revlog, good + 1)
                         previous = None
                         while good != previous:
                             previous = good
                             children = tuple(
                                 sorted(c for c in self.snapshot_cache.snapshots[good])
                             )
                             good = yield children
                     if debug_info is not None:
                         if good is None:
                             debug_info['no-solution'] += 1
                     yield None
                 def _raw_groups(self):
                     """Provides group of revision to be tested as delta base
                     This lower level function focus on emitting delta theorically
                     interresting without looking it any practical details.
                     The group order aims at providing fast or small candidates first.
                     """
                     # Why search for delta base if we cannot use a delta base ?
                     assert self.revlog.delta_config.general_delta
                     # also see issue6056
                     sparse = self.revlog.delta_config.sparse_revlog
                     prev = self.target_rev - 1
                     deltachain = lambda rev: self.revlog._deltachain(rev)[0]
                     # exclude already lazy tested base if any
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     if (
                         not self.revlog.delta_config.delta_both_parents
                         and len(parents) == 2
                     ):
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                     if sparse and parents:
                         # See if we can use an existing snapshot in the parent chains to
                         # use as a base for a new intermediate-snapshot
                         #
                         # search for snapshot in parents delta chain map: snapshot-level:
                         # snapshot-rev
                         parents_snaps = collections.defaultdict(set)
                         candidate_chains = [deltachain(p) for p in parents]
                         for chain in candidate_chains:
                             for idx, s in enumerate(chain):
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[idx].add(s)
                         snapfloor = min(parents_snaps[0]) + 1
                         self.snapshot_cache.update(self.revlog, snapfloor)
                         # search for the highest "unrelated" revision
                         #
                         # Adding snapshots used by "unrelated" revision increase the odd we
                         # reuse an independant, yet better snapshot chain.
                         #
                         # XXX instead of building a set of revisions, we could lazily
                         # enumerate over the chains. That would be more efficient, however
                         # we stick to simple code for now.
                         all_revs = set()
                         for chain in candidate_chains:
                             all_revs.update(chain)
                         other = None
                         for r in self.revlog.revs(prev, snapfloor):
                             if r not in all_revs:
                                 other = r
                                 break
                         if other is not None:
                             # To avoid unfair competition, we won't use unrelated
                             # intermediate snapshot that are deeper than the ones from the
                             # parent delta chain.
                             max_depth = max(parents_snaps.keys())
                             chain = deltachain(other)
                             for depth, s in enumerate(chain):
                                 if s < snapfloor:
                                     continue
                                 if max_depth < depth:
                                     break
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[depth].add(s)
                         # Test them as possible intermediate snapshot base We test them
                         # from highest to lowest level. High level one are more likely to
                         # result in small delta
                         floor = None
                         for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                             siblings = set()
                             for s in snaps:
                                 siblings.update(self.snapshot_cache.snapshots[s])
                             # Before considering making a new intermediate snapshot, we
                             # check if an existing snapshot, children of base we consider,
                             # would be suitable.
                             #
                             # It give a change to reuse a delta chain "unrelated" to the
                             # current revision instead of starting our own. Without such
                             # re-use, topological branches would keep reopening new chains.
                             # Creating more and more snapshot as the repository grow.
                             if floor is not None:
                                 # We only do this for siblings created after the one in our
                                 # parent's delta chain. Those created before has less
                                 # chances to be valid base since our ancestors had to
                                 # create a new snapshot.
                                 siblings = [r for r in siblings if floor < r]
                             yield tuple(sorted(siblings))
                             # then test the base from our parent's delta chain.
                             yield tuple(sorted(snaps))
                             floor = min(snaps)
                         # No suitable base found in the parent chain, search if any full
                         # snapshots emitted since parent's base would be a suitable base
                         # for an intermediate snapshot.
                         #
                         # It give a chance to reuse a delta chain unrelated to the current
                         # revisions instead of starting our own. Without such re-use,
                         # topological branches would keep reopening new full chains.
                         # Creating more and more snapshot as the repository grow.
                         full = [
                             r
                             for r in self.snapshot_cache.snapshots[nullrev]
                             if snapfloor <= r
                         ]
                         yield tuple(sorted(full))
                     if not sparse:
                         # other approach failed try against prev to hopefully save us a
                         # fulltext.
                         yield (prev,)
                 def is_good_delta_info(self, deltainfo):
                     """Returns True if the given delta is good. Good means that it is
                     within the disk span, disk size, and chain length bounds that we know
                     to be performant."""
                     if deltainfo is None:
                         return False
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         self.revinfo.cachedelta is None
                         or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not self.revlog.delta_config.general_delta
                     )
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     textlen = self.revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self.revlog.delta_config.max_deltachain_span
                     if not maxdist:
                         maxdist = deltainfo.distance  # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     #
                     #   In the sparse-revlog case, we rely on the associated "sparse
                     #   reading" to avoid issue related to the span of data. In theory, it
                     #   would be possible to build pathological revlog where delta pattern
                     #   would lead to too many reads. However, they do not happen in
                     #   practice at all. So we skip the span check entirely.
                     if (
                         not self.revlog.delta_config.sparse_revlog
                         and maxdist < deltainfo.distance
                     ):
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the delta
                     #   will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if (
                         self.revlog.delta_config.max_chain_len
                         and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
                     ):
                         return False
                     # bad delta from intermediate snapshot size limit
                     #
                     #   If an intermediate snapshot size is higher than the limit.  The
                     #   limit exist to prevent endless chain of intermediate delta to be
                     #   created.
                     if (
                         deltainfo.snapshotdepth is not None
                         and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                     ):
                         return False
                     # bad delta if new intermediate snapshot is larger than the previous
                     # snapshot
                     if (
                         deltainfo.snapshotdepth
                         and self.revlog.length(deltainfo.base) < deltainfo.deltalen
                     ):
                         return False
                     return True
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 """object capable of computing delta and finding delta for multiple revision
                 This object is meant to compute and find multiple delta applied to the same
                 revlog.
                 """
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog.delta_config.sparse_revlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog.delta_config.lazy_delta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
                         lowestrealisticdeltalen = (
                             len(delta) // revlog.delta_config.upper_bound_comp
                         )
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog._inner.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog._inner.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = -1
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     search = _DeltaSearch(
                         self.revlog,
                         revinfo,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     while not search.done:
                         current_group = search.current_group
                         # current_group can be `None`, but not is search.done is False
                         # We add this assert to help pytype
                         assert current_group is not None
                         candidaterevs = current_group
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if search.is_good_delta_info(candidatedelta):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         search.next_group(deltainfo)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = -1
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     dbg = {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                     target_revlog = b"UNKNOWN"
                     target_type = self.revlog.target[0]
                     target_key = self.revlog.target[1]
                     if target_type == KIND_CHANGELOG:
                         target_revlog = b'CHANGELOG:'
                     elif target_type == KIND_MANIFESTLOG:
                         target_revlog = b'MANIFESTLOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     elif target_type == KIND_FILELOG:
                         target_revlog = b'FILELOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     dbg['target-revlog'] = target_revlog
                     return dbg
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo