upstream/mercurial-mirror Commit - r52219:01db705b

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

18

from .constants import (

18

from .constants import (

19

COMP_MODE_DEFAULT,

19

COMP_MODE_DEFAULT,

20

COMP_MODE_INLINE,

20

COMP_MODE_INLINE,

21

COMP_MODE_PLAIN,

21

COMP_MODE_PLAIN,

22

DELTA_BASE_REUSE_FORCE,

22

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

from .. import revlog

50

from .. import revlog

51

52

self._data = data

52

self._data = data

53

self.data_config = revlog.DataConfig()

53

self.data_config = revlog.DataConfig()

54

self.data_config.sr_density_threshold = density

54

self.data_config.sr_density_threshold = density

55

self.data_config.sr_min_gap_size = mingap

55

self.data_config.sr_min_gap_size = mingap

56

self.delta_config = revlog.DeltaConfig()

56

self.delta_config = revlog.DeltaConfig()

57

self.feature_config = revlog.FeatureConfig()

57

self.feature_config = revlog.FeatureConfig()

58

self._snapshot = set(snapshot)

58

self._snapshot = set(snapshot)

59

self.index = None

59

self.index = None

60

61

def start(self, rev):

61

def start(self, rev):

62

if rev == nullrev:

62

if rev == nullrev:

63

return 0

63

return 0

64

if rev == 0:

64

if rev == 0:

65

return 0

65

return 0

66

return self._data[rev - 1]

66

return self._data[rev - 1]

67

68

def end(self, rev):

68

def end(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return 0

70

return 0

71

return self._data[rev]

71

return self._data[rev]

72

73

def length(self, rev):

73

def length(self, rev):

74

return self.end(rev) - self.start(rev)

74

return self.end(rev) - self.start(rev)

75

76

def __len__(self):

76

def __len__(self):

77

return len(self._data)

77

return len(self._data)

78

79

def issnapshot(self, rev):

79

def issnapshot(self, rev):

80

if rev == nullrev:

80

if rev == nullrev:

81

return True

81

return True

82

return rev in self._snapshot

82

return rev in self._snapshot

83

84

85

def slicechunk(revlog, revs, targetsize=None):

85

def slicechunk(revlog, revs, targetsize=None):

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

87

88

``revs`` is sliced into groups that should be read in one time.

88

``revs`` is sliced into groups that should be read in one time.

89

Assume that revs are sorted.

89

Assume that revs are sorted.

90

91

The initial chunk is sliced until the overall density (payload/chunks-span

91

The initial chunk is sliced until the overall density (payload/chunks-span

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

93

than `revlog.data_config.sr_min_gap_size` is skipped.

93

than `revlog.data_config.sr_min_gap_size` is skipped.

94

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

96

For consistency with other slicing choice, this limit won't go lower than

96

For consistency with other slicing choice, this limit won't go lower than

97

`revlog.data_config.sr_min_gap_size`.

97

`revlog.data_config.sr_min_gap_size`.

98

99

If individual revisions chunk are larger than this limit, they will still

99

If individual revisions chunk are larger than this limit, they will still

100

be raised individually.

100

be raised individually.

101

102

>>> data = [

102

>>> data = [

103

... 5, #00 (5)

103

... 5, #00 (5)

104

... 10, #01 (5)

104

... 10, #01 (5)

105

... 12, #02 (2)

105

... 12, #02 (2)

106

... 12, #03 (empty)

106

... 12, #03 (empty)

107

... 27, #04 (15)

107

... 27, #04 (15)

108

... 31, #05 (4)

108

... 31, #05 (4)

109

... 31, #06 (empty)

109

... 31, #06 (empty)

110

... 42, #07 (11)

110

... 42, #07 (11)

111

... 47, #08 (5)

111

... 47, #08 (5)

112

... 47, #09 (empty)

112

... 47, #09 (empty)

113

... 48, #10 (1)

113

... 48, #10 (1)

114

... 51, #11 (3)

114

... 51, #11 (3)

115

... 74, #12 (23)

115

... 74, #12 (23)

116

... 85, #13 (11)

116

... 85, #13 (11)

117

... 86, #14 (1)

117

... 86, #14 (1)

118

... 91, #15 (5)

118

... 91, #15 (5)

119

... ]

119

... ]

120

>>> revlog = _testrevlog(data, snapshot=range(16))

120

>>> revlog = _testrevlog(data, snapshot=range(16))

121

122

>>> list(slicechunk(revlog, list(range(16))))

122

>>> list(slicechunk(revlog, list(range(16))))

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

124

>>> list(slicechunk(revlog, [0, 15]))

124

>>> list(slicechunk(revlog, [0, 15]))

125

[[0], [15]]

125

[[0], [15]]

126

>>> list(slicechunk(revlog, [0, 11, 15]))

126

>>> list(slicechunk(revlog, [0, 11, 15]))

127

[[0], [11], [15]]

127

[[0], [11], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

129

[[0], [11, 13, 15]]

129

[[0], [11, 13, 15]]

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

131

[[1, 2], [5, 8, 10, 11], [14]]

131

[[1, 2], [5, 8, 10, 11], [14]]

132

133

Slicing with a maximum chunk size

133

Slicing with a maximum chunk size

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

135

[[0], [11], [13], [15]]

135

[[0], [11], [13], [15]]

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

137

[[0], [11], [13, 15]]

137

[[0], [11], [13, 15]]

138

139

Slicing involving nullrev

139

Slicing involving nullrev

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

141

[[-1, 0], [11], [13, 15]]

141

[[-1, 0], [11], [13, 15]]

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

143

[[-1], [13], [15]]

143

[[-1], [13], [15]]

144

"""

144

"""

145

if targetsize is not None:

145

if targetsize is not None:

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

147

# targetsize should not be specified when evaluating delta candidates:

147

# targetsize should not be specified when evaluating delta candidates:

148

# * targetsize is used to ensure we stay within specification when reading,

148

# * targetsize is used to ensure we stay within specification when reading,

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

150

if densityslicing is None:

150

if densityslicing is None:

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

152

for chunk in densityslicing(

152

for chunk in densityslicing(

153

revs,

153

revs,

154

revlog.data_config.sr_density_threshold,

154

revlog.data_config.sr_density_threshold,

155

revlog.data_config.sr_min_gap_size,

155

revlog.data_config.sr_min_gap_size,

156

):

156

):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

yield subchunk

158

yield subchunk

159

160

161

def _slicechunktosize(revlog, revs, targetsize=None):

161

def _slicechunktosize(revlog, revs, targetsize=None):

162

"""slice revs to match the target size

162

"""slice revs to match the target size

163

164

This is intended to be used on chunk that density slicing selected by that

164

This is intended to be used on chunk that density slicing selected by that

165

are still too large compared to the read garantee of revlog. This might

165

are still too large compared to the read garantee of revlog. This might

166

happens when "minimal gap size" interrupted the slicing or when chain are

166

happens when "minimal gap size" interrupted the slicing or when chain are

167

built in a way that create large blocks next to each other.

167

built in a way that create large blocks next to each other.

168

169

>>> data = [

169

>>> data = [

170

... 3, #0 (3)

170

... 3, #0 (3)

171

... 5, #1 (2)

171

... 5, #1 (2)

172

... 6, #2 (1)

172

... 6, #2 (1)

173

... 8, #3 (2)

173

... 8, #3 (2)

174

... 8, #4 (empty)

174

... 8, #4 (empty)

175

... 11, #5 (3)

175

... 11, #5 (3)

176

... 12, #6 (1)

176

... 12, #6 (1)

177

... 13, #7 (1)

177

... 13, #7 (1)

178

... 14, #8 (1)

178

... 14, #8 (1)

179

... ]

179

... ]

180

181

== All snapshots cases ==

181

== All snapshots cases ==

182

>>> revlog = _testrevlog(data, snapshot=range(9))

182

>>> revlog = _testrevlog(data, snapshot=range(9))

183

184

Cases where chunk is already small enough

184

Cases where chunk is already small enough

185

>>> list(_slicechunktosize(revlog, [0], 3))

185

>>> list(_slicechunktosize(revlog, [0], 3))

186

[[0]]

186

[[0]]

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

[[6, 7]]

188

[[6, 7]]

189

>>> list(_slicechunktosize(revlog, [0], None))

189

>>> list(_slicechunktosize(revlog, [0], None))

190

[[0]]

190

[[0]]

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

[[6, 7]]

192

[[6, 7]]

193

194

cases where we need actual slicing

194

cases where we need actual slicing

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

[[0], [1]]

196

[[0], [1]]

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

[[1], [3]]

198

[[1], [3]]

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

[[1, 2], [3]]

200

[[1, 2], [3]]

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

[[3], [5]]

202

[[3], [5]]

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

[[3], [5]]

204

[[3], [5]]

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

[[5], [6, 7, 8]]

206

[[5], [6, 7, 8]]

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

210

Case with too large individual chunk (must return valid chunk)

210

Case with too large individual chunk (must return valid chunk)

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

[[0], [1]]

212

[[0], [1]]

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

[[1], [3]]

214

[[1], [3]]

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

[[3], [5]]

216

[[3], [5]]

217

218

== No Snapshot cases ==

218

== No Snapshot cases ==

219

>>> revlog = _testrevlog(data)

219

>>> revlog = _testrevlog(data)

220

221

Cases where chunk is already small enough

221

Cases where chunk is already small enough

222

>>> list(_slicechunktosize(revlog, [0], 3))

222

>>> list(_slicechunktosize(revlog, [0], 3))

223

[[0]]

223

[[0]]

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

[[6, 7]]

225

[[6, 7]]

226

>>> list(_slicechunktosize(revlog, [0], None))

226

>>> list(_slicechunktosize(revlog, [0], None))

227

[[0]]

227

[[0]]

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

[[6, 7]]

229

[[6, 7]]

230

231

cases where we need actual slicing

231

cases where we need actual slicing

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

[[0], [1]]

233

[[0], [1]]

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

[[1], [3]]

235

[[1], [3]]

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

[[1], [2, 3]]

237

[[1], [2, 3]]

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

[[3], [5]]

239

[[3], [5]]

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

[[3], [4, 5]]

241

[[3], [4, 5]]

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

[[5], [6, 7, 8]]

243

[[5], [6, 7, 8]]

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

247

Case with too large individual chunk (must return valid chunk)

247

Case with too large individual chunk (must return valid chunk)

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

[[0], [1]]

249

[[0], [1]]

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

[[1], [3]]

251

[[1], [3]]

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

[[3], [5]]

253

[[3], [5]]

254

255

== mixed case ==

255

== mixed case ==

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

"""

259

"""

260

assert targetsize is None or 0 <= targetsize

260

assert targetsize is None or 0 <= targetsize

261

startdata = revlog.start(revs[0])

261

startdata = revlog.start(revs[0])

262

enddata = revlog.end(revs[-1])

262

enddata = revlog.end(revs[-1])

263

fullspan = enddata - startdata

263

fullspan = enddata - startdata

264

if targetsize is None or fullspan <= targetsize:

264

if targetsize is None or fullspan <= targetsize:

265

yield revs

265

yield revs

266

return

266

return

267

268

startrevidx = 0

268

startrevidx = 0

269

endrevidx = 1

269

endrevidx = 1

270

iterrevs = enumerate(revs)

270

iterrevs = enumerate(revs)

271

next(iterrevs) # skip first rev.

271

next(iterrevs) # skip first rev.

272

# first step: get snapshots out of the way

272

# first step: get snapshots out of the way

273

for idx, r in iterrevs:

273

for idx, r in iterrevs:

274

span = revlog.end(r) - startdata

274

span = revlog.end(r) - startdata

275

snapshot = revlog.issnapshot(r)

275

snapshot = revlog.issnapshot(r)

276

if span <= targetsize and snapshot:

276

if span <= targetsize and snapshot:

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

else:

278

else:

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

if chunk:

280

if chunk:

281

yield chunk

281

yield chunk

282

startrevidx = idx

282

startrevidx = idx

283

startdata = revlog.start(r)

283

startdata = revlog.start(r)

284

endrevidx = idx + 1

284

endrevidx = idx + 1

285

if not snapshot:

285

if not snapshot:

286

break

286

break

287

288

# for the others, we use binary slicing to quickly converge toward valid

288

# for the others, we use binary slicing to quickly converge toward valid

289

# chunks (otherwise, we might end up looking for start/end of many

289

# chunks (otherwise, we might end up looking for start/end of many

290

# revisions). This logic is not looking for the perfect slicing point, it

290

# revisions). This logic is not looking for the perfect slicing point, it

291

# focuses on quickly converging toward valid chunks.

291

# focuses on quickly converging toward valid chunks.

292

nbitem = len(revs)

292

nbitem = len(revs)

293

while (enddata - startdata) > targetsize:

293

while (enddata - startdata) > targetsize:

294

endrevidx = nbitem

294

endrevidx = nbitem

295

if nbitem - startrevidx <= 1:

295

if nbitem - startrevidx <= 1:

296

break # protect against individual chunk larger than limit

296

break # protect against individual chunk larger than limit

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

while span > targetsize:

299

while span > targetsize:

300

if endrevidx - startrevidx <= 1:

300

if endrevidx - startrevidx <= 1:

301

break # protect against individual chunk larger than limit

301

break # protect against individual chunk larger than limit

302

endrevidx -= (endrevidx - startrevidx) // 2

302

endrevidx -= (endrevidx - startrevidx) // 2

303

localenddata = revlog.end(revs[endrevidx - 1])

303

localenddata = revlog.end(revs[endrevidx - 1])

304

span = localenddata - startdata

304

span = localenddata - startdata

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

startrevidx = endrevidx

308

startrevidx = endrevidx

309

startdata = revlog.start(revs[startrevidx])

309

startdata = revlog.start(revs[startrevidx])

310

311

chunk = _trimchunk(revlog, revs, startrevidx)

311

chunk = _trimchunk(revlog, revs, startrevidx)

312

if chunk:

312

if chunk:

313

yield chunk

313

yield chunk

314

315

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

319

``revs`` is sliced into groups that should be read in one time.

319

``revs`` is sliced into groups that should be read in one time.

320

Assume that revs are sorted.

320

Assume that revs are sorted.

321

322

The initial chunk is sliced until the overall density (payload/chunks-span

322

The initial chunk is sliced until the overall density (payload/chunks-span

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

skipped.

324

skipped.

325

326

>>> revlog = _testrevlog([

326

>>> revlog = _testrevlog([

327

... 5, #00 (5)

327

... 5, #00 (5)

328

... 10, #01 (5)

328

... 10, #01 (5)

329

... 12, #02 (2)

329

... 12, #02 (2)

330

... 12, #03 (empty)

330

... 12, #03 (empty)

331

... 27, #04 (15)

331

... 27, #04 (15)

332

... 31, #05 (4)

332

... 31, #05 (4)

333

... 31, #06 (empty)

333

... 31, #06 (empty)

334

... 42, #07 (11)

334

... 42, #07 (11)

335

... 47, #08 (5)

335

... 47, #08 (5)

336

... 47, #09 (empty)

336

... 47, #09 (empty)

337

... 48, #10 (1)

337

... 48, #10 (1)

338

... 51, #11 (3)

338

... 51, #11 (3)

339

... 74, #12 (23)

339

... 74, #12 (23)

340

... 85, #13 (11)

340

... 85, #13 (11)

341

... 86, #14 (1)

341

... 86, #14 (1)

342

... 91, #15 (5)

342

... 91, #15 (5)

343

... ])

343

... ])

344

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

[[0], [15]]

348

[[0], [15]]

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

[[0], [11], [15]]

350

[[0], [11], [15]]

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

[[0], [11, 13, 15]]

352

[[0], [11, 13, 15]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

[[1, 2], [5, 8, 10, 11], [14]]

354

[[1, 2], [5, 8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... mingapsize=20))

356

... mingapsize=20))

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

... targetdensity=0.95))

359

... targetdensity=0.95))

360

[[1, 2], [5], [8, 10, 11], [14]]

360

[[1, 2], [5], [8, 10, 11], [14]]

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

... targetdensity=0.95, mingapsize=12))

362

... targetdensity=0.95, mingapsize=12))

363

[[1, 2], [5, 8, 10, 11], [14]]

363

[[1, 2], [5, 8, 10, 11], [14]]

364

"""

364

"""

365

start = revlog.start

365

start = revlog.start

366

length = revlog.length

366

length = revlog.length

367

368

if len(revs) <= 1:

368

if len(revs) <= 1:

369

yield revs

369

yield revs

370

return

370

return

371

372

deltachainspan = segmentspan(revlog, revs)

372

deltachainspan = segmentspan(revlog, revs)

373

374

if deltachainspan < mingapsize:

374

if deltachainspan < mingapsize:

375

yield revs

375

yield revs

376

return

376

return

377

378

readdata = deltachainspan

378

readdata = deltachainspan

379

chainpayload = sum(length(r) for r in revs)

379

chainpayload = sum(length(r) for r in revs)

380

381

if deltachainspan:

381

if deltachainspan:

382

density = chainpayload / float(deltachainspan)

382

density = chainpayload / float(deltachainspan)

383

else:

383

else:

384

density = 1.0

384

density = 1.0

385

386

if density >= targetdensity:

386

if density >= targetdensity:

387

yield revs

387

yield revs

388

return

388

return

389

390

# Store the gaps in a heap to have them sorted by decreasing size

390

# Store the gaps in a heap to have them sorted by decreasing size

391

gaps = []

391

gaps = []

392

prevend = None

392

prevend = None

393

for i, rev in enumerate(revs):

393

for i, rev in enumerate(revs):

394

revstart = start(rev)

394

revstart = start(rev)

395

revlen = length(rev)

395

revlen = length(rev)

396

397

# Skip empty revisions to form larger holes

397

# Skip empty revisions to form larger holes

398

if revlen == 0:

398

if revlen == 0:

399

continue

399

continue

400

401

if prevend is not None:

401

if prevend is not None:

402

gapsize = revstart - prevend

402

gapsize = revstart - prevend

403

# only consider holes that are large enough

403

# only consider holes that are large enough

404

if gapsize > mingapsize:

404

if gapsize > mingapsize:

405

gaps.append((gapsize, i))

405

gaps.append((gapsize, i))

406

407

prevend = revstart + revlen

407

prevend = revstart + revlen

408

# sort the gaps to pop them from largest to small

408

# sort the gaps to pop them from largest to small

409

gaps.sort()

409

gaps.sort()

410

411

# Collect the indices of the largest holes until the density is acceptable

411

# Collect the indices of the largest holes until the density is acceptable

412

selected = []

412

selected = []

413

while gaps and density < targetdensity:

413

while gaps and density < targetdensity:

414

gapsize, gapidx = gaps.pop()

414

gapsize, gapidx = gaps.pop()

415

416

selected.append(gapidx)

416

selected.append(gapidx)

417

418

# the gap sizes are stored as negatives to be sorted decreasingly

418

# the gap sizes are stored as negatives to be sorted decreasingly

419

# by the heap

419

# by the heap

420

readdata -= gapsize

420

readdata -= gapsize

421

if readdata > 0:

421

if readdata > 0:

422

density = chainpayload / float(readdata)

422

density = chainpayload / float(readdata)

423

else:

423

else:

424

density = 1.0

424

density = 1.0

425

selected.sort()

425

selected.sort()

426

427

# Cut the revs at collected indices

427

# Cut the revs at collected indices

428

previdx = 0

428

previdx = 0

429

for idx in selected:

429

for idx in selected:

430

431

chunk = _trimchunk(revlog, revs, previdx, idx)

431

chunk = _trimchunk(revlog, revs, previdx, idx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

previdx = idx

435

previdx = idx

436

437

chunk = _trimchunk(revlog, revs, previdx)

437

chunk = _trimchunk(revlog, revs, previdx)

438

if chunk:

438

if chunk:

439

yield chunk

439

yield chunk

440

441

442

def _trimchunk(revlog, revs, startidx, endidx=None):

442

def _trimchunk(revlog, revs, startidx, endidx=None):

443

"""returns revs[startidx:endidx] without empty trailing revs

443

"""returns revs[startidx:endidx] without empty trailing revs

444

445

Doctest Setup

445

Doctest Setup

446

>>> revlog = _testrevlog([

446

>>> revlog = _testrevlog([

447

... 5, #0

447

... 5, #0

448

... 10, #1

448

... 10, #1

449

... 12, #2

449

... 12, #2

450

... 12, #3 (empty)

450

... 12, #3 (empty)

451

... 17, #4

451

... 17, #4

452

... 21, #5

452

... 21, #5

453

... 21, #6 (empty)

453

... 21, #6 (empty)

454

... ])

454

... ])

455

456

Contiguous cases:

456

Contiguous cases:

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

[0, 1, 2, 3, 4, 5]

458

[0, 1, 2, 3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

[0, 1, 2, 3, 4]

460

[0, 1, 2, 3, 4]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

[0, 1, 2]

462

[0, 1, 2]

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

[2]

464

[2]

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

[3, 4, 5]

466

[3, 4, 5]

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

[3, 4]

468

[3, 4]

469

470

Discontiguous cases:

470

Discontiguous cases:

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

[1, 3, 5]

472

[1, 3, 5]

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

[1]

474

[1]

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

[3, 5]

476

[3, 5]

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

[3, 5]

478

[3, 5]

479

"""

479

"""

480

length = revlog.length

480

length = revlog.length

481

482

if endidx is None:

482

if endidx is None:

483

endidx = len(revs)

483

endidx = len(revs)

484

485

# If we have a non-emtpy delta candidate, there are nothing to trim

485

# If we have a non-emtpy delta candidate, there are nothing to trim

486

if revs[endidx - 1] < len(revlog):

486

if revs[endidx - 1] < len(revlog):

487

# Trim empty revs at the end, except the very first revision of a chain

487

# Trim empty revs at the end, except the very first revision of a chain

488

while (

488

while (

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

):

490

):

491

endidx -= 1

491

endidx -= 1

492

493

return revs[startidx:endidx]

493

return revs[startidx:endidx]

494

495

496

def segmentspan(revlog, revs):

496

def segmentspan(revlog, revs):

497

"""Get the byte span of a segment of revisions

497

"""Get the byte span of a segment of revisions

498

499

revs is a sorted array of revision numbers

499

revs is a sorted array of revision numbers

500

501

>>> revlog = _testrevlog([

501

>>> revlog = _testrevlog([

502

... 5, #0

502

... 5, #0

503

... 10, #1

503

... 10, #1

504

... 12, #2

504

... 12, #2

505

... 12, #3 (empty)

505

... 12, #3 (empty)

506

... 17, #4

506

... 17, #4

507

... ])

507

... ])

508

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

17

510

17

511

>>> segmentspan(revlog, [0, 4])

511

>>> segmentspan(revlog, [0, 4])

512

17

512

17

513

>>> segmentspan(revlog, [3, 4])

513

>>> segmentspan(revlog, [3, 4])

514

5

514

5

515

>>> segmentspan(revlog, [1, 2, 3,])

515

>>> segmentspan(revlog, [1, 2, 3,])

516

7

516

7

517

>>> segmentspan(revlog, [1, 3])

517

>>> segmentspan(revlog, [1, 3])

518

7

518

7

519

"""

519

"""

520

if not revs:

520

if not revs:

521

return 0

521

return 0

522

end = revlog.end(revs[-1])

522

end = revlog.end(revs[-1])

523

return end - revlog.start(revs[0])

523

return end - revlog.start(revs[0])

524

525

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

"""build full text from a (base, delta) pair and other metadata"""

527

"""build full text from a (base, delta) pair and other metadata"""

528

# special case deltas which replace entire base; no need to decode

528

# special case deltas which replace entire base; no need to decode

529

# base revision. this neatly avoids censored bases, which throw when

529

# base revision. this neatly avoids censored bases, which throw when

530

# they're decoded.

530

# they're decoded.

531

hlen = struct.calcsize(b">lll")

531

hlen = struct.calcsize(b">lll")

532

if delta[:hlen] == mdiff.replacediffheader(

532

if delta[:hlen] == mdiff.replacediffheader(

533

revlog.rawsize(baserev), len(delta) - hlen

533

revlog.rawsize(baserev), len(delta) - hlen

534

):

534

):

535

fulltext = delta[hlen:]

535

fulltext = delta[hlen:]

536

else:

536

else:

537

# deltabase is rawtext before changed by flag processors, which is

537

# deltabase is rawtext before changed by flag processors, which is

538

# equivalent to non-raw text

538

# equivalent to non-raw text

539

basetext = revlog.revision(baserev)

539

basetext = revlog.revision(baserev)

540

fulltext = mdiff.patch(basetext, delta)

540

fulltext = mdiff.patch(basetext, delta)

541

542

try:

542

try:

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

if validatehash:

544

if validatehash:

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

if flags & REVIDX_ISCENSORED:

546

if flags & REVIDX_ISCENSORED:

547

raise error.StorageError(

547

raise error.StorageError(

548

_(b'node %s is not censored') % expectednode

548

_(b'node %s is not censored') % expectednode

549

)

549

)

550

except error.CensoredNodeError:

550

except error.CensoredNodeError:

551

# must pass the censored index flag to add censored revisions

551

# must pass the censored index flag to add censored revisions

552

if not flags & REVIDX_ISCENSORED:

552

if not flags & REVIDX_ISCENSORED:

553

raise

553

raise

554

return fulltext

554

return fulltext

555

556

557

@attr.s(slots=True, frozen=True)

557

@attr.s(slots=True, frozen=True)

558

class _deltainfo:

558

class _deltainfo:

559

distance = attr.ib()

559

distance = attr.ib()

560

deltalen = attr.ib()

560

deltalen = attr.ib()

561

data = attr.ib()

561

data = attr.ib()

562

base = attr.ib()

562

base = attr.ib()

563

chainbase = attr.ib()

563

chainbase = attr.ib()

564

chainlen = attr.ib()

564

chainlen = attr.ib()

565

compresseddeltalen = attr.ib()

565

compresseddeltalen = attr.ib()

566

snapshotdepth = attr.ib()

566

snapshotdepth = attr.ib()

567

568

569

def drop_u_compression(delta):

569

def drop_u_compression(delta):

570

"""turn into a "u" (no-compression) into no-compression without header

570

"""turn into a "u" (no-compression) into no-compression without header

571

572

This is useful for revlog format that has better compression method.

572

This is useful for revlog format that has better compression method.

573

"""

573

"""

574

assert delta.data[0] == b'u', delta.data[0]

574

assert delta.data[0] == b'u', delta.data[0]

575

return _deltainfo(

575

return _deltainfo(

576

delta.distance,

576

delta.distance,

577

delta.deltalen - 1,

577

delta.deltalen - 1,

578

(b'', delta.data[1]),

578

(b'', delta.data[1]),

579

delta.base,

579

delta.base,

580

delta.chainbase,

580

delta.chainbase,

581

delta.chainlen,

581

delta.chainlen,

582

delta.compresseddeltalen,

582

delta.compresseddeltalen,

583

delta.snapshotdepth,

583

delta.snapshotdepth,

584

)

584

)

585

586

587

def is_good_delta_info(revlog, deltainfo, revinfo):

587

def is_good_delta_info(revlog, deltainfo, revinfo):

588

"""Returns True if the given delta is good. Good means that it is within

588

"""Returns True if the given delta is good. Good means that it is within

589

the disk span, disk size, and chain length bounds that we know to be

589

the disk span, disk size, and chain length bounds that we know to be

590

performant."""

590

performant."""

591

if deltainfo is None:

591

if deltainfo is None:

592

return False

592

return False

593

594

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

594

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

595

# we should never end up asking such question. Adding the assert as a

595

# we should never end up asking such question. Adding the assert as a

596

# safe-guard to detect anything that would be fishy in this regard.

596

# safe-guard to detect anything that would be fishy in this regard.

597

assert (

597

assert (

598

revinfo.cachedelta is None

598

revinfo.cachedelta is None

599

or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

599

or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

600

or not revlog.delta_config.general_delta

600

or not revlog.delta_config.general_delta

601

)

601

)

602

603

# - 'deltainfo.distance' is the distance from the base revision --

603

# - 'deltainfo.distance' is the distance from the base revision --

604

# bounding it limits the amount of I/O we need to do.

604

# bounding it limits the amount of I/O we need to do.

605

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

605

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

606

# deltas we need to apply -- bounding it limits the amount of CPU

606

# deltas we need to apply -- bounding it limits the amount of CPU

607

# we consume.

607

# we consume.

608

609

textlen = revinfo.textlen

609

textlen = revinfo.textlen

610

defaultmax = textlen * 4

610

defaultmax = textlen * 4

611

maxdist = revlog.delta_config.max_deltachain_span

611

maxdist = revlog.delta_config.max_deltachain_span

612

if not maxdist:

612

if not maxdist:

613

maxdist = deltainfo.distance # ensure the conditional pass

613

maxdist = deltainfo.distance # ensure the conditional pass

614

maxdist = max(maxdist, defaultmax)

614

maxdist = max(maxdist, defaultmax)

615

616

# Bad delta from read span:

616

# Bad delta from read span:

617

#

617

#

618

# If the span of data read is larger than the maximum allowed.

618

# If the span of data read is larger than the maximum allowed.

619

#

619

#

620

# In the sparse-revlog case, we rely on the associated "sparse reading"

620

# In the sparse-revlog case, we rely on the associated "sparse reading"

621

# to avoid issue related to the span of data. In theory, it would be

621

# to avoid issue related to the span of data. In theory, it would be

622

# possible to build pathological revlog where delta pattern would lead

622

# possible to build pathological revlog where delta pattern would lead

623

# to too many reads. However, they do not happen in practice at all. So

623

# to too many reads. However, they do not happen in practice at all. So

624

# we skip the span check entirely.

624

# we skip the span check entirely.

625

if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:

625

if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:

626

return False

626

return False

627

628

# Bad delta from new delta size:

628

# Bad delta from new delta size:

629

#

629

#

630

# If the delta size is larger than the target text, storing the

630

# If the delta size is larger than the target text, storing the

631

# delta will be inefficient.

631

# delta will be inefficient.

632

if textlen < deltainfo.deltalen:

632

if textlen < deltainfo.deltalen:

633

return False

633

return False

634

635

# Bad delta from cumulated payload size:

635

# Bad delta from cumulated payload size:

636

#

636

#

637

# If the sum of delta get larger than K * target text length.

637

# If the sum of delta get larger than K * target text length.

638

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

638

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

639

return False

639

return False

640

641

# Bad delta from chain length:

641

# Bad delta from chain length:

642

#

642

#

643

# If the number of delta in the chain gets too high.

643

# If the number of delta in the chain gets too high.

644

if (

644

if (

645

revlog.delta_config.max_chain_len

645

revlog.delta_config.max_chain_len

646

and revlog.delta_config.max_chain_len < deltainfo.chainlen

646

and revlog.delta_config.max_chain_len < deltainfo.chainlen

647

):

647

):

648

return False

648

return False

649

650

# bad delta from intermediate snapshot size limit

650

# bad delta from intermediate snapshot size limit

651

#

651

#

652

# If an intermediate snapshot size is higher than the limit. The

652

# If an intermediate snapshot size is higher than the limit. The

653

# limit exist to prevent endless chain of intermediate delta to be

653

# limit exist to prevent endless chain of intermediate delta to be

654

# created.

654

# created.

655

if (

655

if (

656

deltainfo.snapshotdepth is not None

656

deltainfo.snapshotdepth is not None

657

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

657

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

658

):

658

):

659

return False

659

return False

660

661

# bad delta if new intermediate snapshot is larger than the previous

661

# bad delta if new intermediate snapshot is larger than the previous

662

# snapshot

662

# snapshot

663

if (

663

if (

664

deltainfo.snapshotdepth

664

deltainfo.snapshotdepth

665

and revlog.length(deltainfo.base) < deltainfo.deltalen

665

and revlog.length(deltainfo.base) < deltainfo.deltalen

666

):

666

):

667

return False

667

return False

668

669

return True

669

return True

670

671

672

# If a revision's full text is that much bigger than a base candidate full

672

# If a revision's full text is that much bigger than a base candidate full

673

# text's, it is very unlikely that it will produce a valid delta. We no longer

673

# text's, it is very unlikely that it will produce a valid delta. We no longer

674

# consider these candidates.

674

# consider these candidates.

675

LIMIT_BASE2TEXT = 500

675

LIMIT_BASE2TEXT = 500

676

677

678

class _DeltaSearch:

678

class _DeltaSearch:

679

"""perform the search of a good delta for a single revlog revision

679

"""perform the search of a good delta for a single revlog revision

680

681

note: some of the deltacomputer.finddeltainfo logic should probably move

681

note: some of the deltacomputer.finddeltainfo logic should probably move

682

here.

682

here.

683

"""

683

"""

684

685

def __init__(

685

def __init__(

686

self,

686

self,

687

revlog,

687

revlog,

688

textlen,

688

textlen,

689

p1,

689

p1,

690

p2,

690

p2,

691

cachedelta,

691

cachedelta,

692

excluded_bases=None,

692

excluded_bases=None,

693

target_rev=None,

693

target_rev=None,

694

snapshot_cache=None,

694

snapshot_cache=None,

695

):

695

):

696

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

696

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

697

# so we should never end up asking such question. Adding the assert as

697

# so we should never end up asking such question. Adding the assert as

698

# a safe-guard to detect anything that would be fishy in this regard.

698

# a safe-guard to detect anything that would be fishy in this regard.

699

assert (

699

assert (

700

cachedelta is None

700

cachedelta is None

701

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

701

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

702

or not revlog.delta_config.general_delta

702

or not revlog.delta_config.general_delta

703

)

703

)

704

self.revlog = revlog

704

self.revlog = revlog

705

self.textlen = textlen

705

self.textlen = textlen

706

self.p1 = p1

706

self.p1 = p1

707

self.p2 = p2

707

self.p2 = p2

708

self.cachedelta = cachedelta

708

self.cachedelta = cachedelta

709

self.excluded_bases = excluded_bases

709

self.excluded_bases = excluded_bases

710

if target_rev is None:

710

if target_rev is None:

711

self.target_rev = len(self.revlog)

711

self.target_rev = len(self.revlog)

712

self.target_rev = target_rev

712

self.target_rev = target_rev

713

if snapshot_cache is None:

713

if snapshot_cache is None:

714

# map: base-rev: [snapshot-revs]

714

# map: base-rev: [snapshot-revs]

715

snapshot_cache = SnapshotCache()

715

snapshot_cache = SnapshotCache()

716

self.snapshot_cache = snapshot_cache

716

self.snapshot_cache = snapshot_cache

717

718

self.tested = {nullrev}

719

718

def candidate_groups(self):

720

def candidate_groups(self):

719

"""Provides group of revision to be tested as delta base

721

"""Provides group of revision to be tested as delta base

720

722

721

This top level function focus on emitting groups with unique and

723

This top level function focus on emitting groups with unique and

722

worthwhile content. See _raw_candidate_groups for details about the

724

worthwhile content. See _raw_candidate_groups for details about the

723

group order.

725

group order.

724

"""

726

"""

725

# should we try to build a delta?

727

# should we try to build a delta?

726

if not (len(self.revlog) and self.revlog._storedeltachains):

728

if not (len(self.revlog) and self.revlog._storedeltachains):

727

yield None

729

yield None

728

return

730

return

729

731

730

if not self.revlog.delta_config.general_delta:

732

if not self.revlog.delta_config.general_delta:

731

# before general delta, there is only one possible delta base

733

# before general delta, there is only one possible delta base

732

yield (self.target_rev - 1,)

734

yield (self.target_rev - 1,)

733

yield None

735

yield None

734

return

736

return

735

737

736

deltalength = self.revlog.length

738

deltalength = self.revlog.length

737

deltaparent = self.revlog.deltaparent

739

deltaparent = self.revlog.deltaparent

738

sparse = self.revlog.delta_config.sparse_revlog

740

sparse = self.revlog.delta_config.sparse_revlog

739

good = None

741

good = None

740

742

741

deltas_limit = self.textlen * LIMIT_DELTA2TEXT

743

deltas_limit = self.textlen * LIMIT_DELTA2TEXT

742

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

744

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

743

745

744

tested = {nullrev}

746

tested = self.tested # prefetch for speed and code compactness

745

candidates = self._refined_groups()

747

candidates = self._refined_groups()

746

while True:

748

while True:

747

temptative = candidates.send(good)

749

temptative = candidates.send(good)

748

if temptative is None:

750

if temptative is None:

749

break

751

break

750

group = []

752

group = []

751

for rev in temptative:

753

for rev in temptative:

752

# skip over empty delta (no need to include them in a chain)

754

# skip over empty delta (no need to include them in a chain)

753

while not (rev == nullrev or rev in tested or deltalength(rev)):

755

while not (rev == nullrev or rev in tested or deltalength(rev)):

754

tested.add(rev)

756

tested.add(rev)

755

rev = deltaparent(rev)

757

rev = deltaparent(rev)

756

# no need to try a delta against nullrev, this will be done as

758

# no need to try a delta against nullrev, this will be done as

757

# a last resort.

759

# a last resort.

758

if rev == nullrev:

760

if rev == nullrev:

759

continue

761

continue

760

# filter out revision we tested already

762

# filter out revision we tested already

761

if rev in tested:

763

if rev in tested:

762

continue

764

continue

763

765

764

# an higher authority deamed the base unworthy (e.g. censored)

766

# an higher authority deamed the base unworthy (e.g. censored)

765

if (

767

if (

766

self.excluded_bases is not None

768

self.excluded_bases is not None

767

and rev in self.excluded_bases

769

and rev in self.excluded_bases

768

):

770

):

769

tested.add(rev)

771

tested.add(rev)

770

continue

772

continue

771

# We are in some recomputation cases and that rev is too high

773

# We are in some recomputation cases and that rev is too high

772

# in the revlog

774

# in the revlog

773

if self.target_rev is not None and rev >= self.target_rev:

775

if self.target_rev is not None and rev >= self.target_rev:

774

tested.add(rev)

776

tested.add(rev)

775

continue

777

continue

776

# filter out delta base that will never produce good delta

778

# filter out delta base that will never produce good delta

777

if deltas_limit < self.revlog.length(rev):

779

if deltas_limit < self.revlog.length(rev):

778

tested.add(rev)

780

tested.add(rev)

779

continue

781

continue

780

if sparse and self.revlog.rawsize(rev) < (

782

if sparse and self.revlog.rawsize(rev) < (

781

self.textlen // LIMIT_BASE2TEXT

783

self.textlen // LIMIT_BASE2TEXT

782

):

784

):

783

tested.add(rev)

785

tested.add(rev)

784

continue

786

continue

785

# no delta for rawtext-changing revs (see "candelta" for why)

787

# no delta for rawtext-changing revs (see "candelta" for why)

786

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

788

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

787

tested.add(rev)

789

tested.add(rev)

788

continue

790

continue

789

791

790

# If we reach here, we are about to build and test a delta.

792

# If we reach here, we are about to build and test a delta.

791

# The delta building process will compute the chaininfo in all

793

# The delta building process will compute the chaininfo in all

792

# case, since that computation is cached, it is fine to access

794

# case, since that computation is cached, it is fine to access

793

# it here too.

795

# it here too.

794

chainlen, chainsize = self.revlog._chaininfo(rev)

796

chainlen, chainsize = self.revlog._chaininfo(rev)

795

# if chain will be too long, skip base

797

# if chain will be too long, skip base

796

if (

798

if (

797

self.revlog.delta_config.max_chain_len

799

self.revlog.delta_config.max_chain_len

798

and chainlen >= self.revlog.delta_config.max_chain_len

800

and chainlen >= self.revlog.delta_config.max_chain_len

799

):

801

):

800

tested.add(rev)

802

tested.add(rev)

801

continue

803

continue

802

# if chain already have too much data, skip base

804

# if chain already have too much data, skip base

803

if deltas_limit < chainsize:

805

if deltas_limit < chainsize:

804

tested.add(rev)

806

tested.add(rev)

805

continue

807

continue

806

if (

808

if (

807

sparse

809

sparse

808

and self.revlog.delta_config.upper_bound_comp is not None

810

and self.revlog.delta_config.upper_bound_comp is not None

809

):

811

):

810

maxcomp = self.revlog.delta_config.upper_bound_comp

812

maxcomp = self.revlog.delta_config.upper_bound_comp

811

basenotsnap = (self.p1, self.p2, nullrev)

813

basenotsnap = (self.p1, self.p2, nullrev)

812

if rev not in basenotsnap and self.revlog.issnapshot(rev):

814

if rev not in basenotsnap and self.revlog.issnapshot(rev):

813

snapshotdepth = self.revlog.snapshotdepth(rev)

815

snapshotdepth = self.revlog.snapshotdepth(rev)

814

# If text is significantly larger than the base, we can

816

# If text is significantly larger than the base, we can

815

# expect the resulting delta to be proportional to the size

817

# expect the resulting delta to be proportional to the size

816

# difference

818

# difference

817

revsize = self.revlog.rawsize(rev)

819

revsize = self.revlog.rawsize(rev)

818

rawsizedistance = max(self.textlen - revsize, 0)

820

rawsizedistance = max(self.textlen - revsize, 0)

819

# use an estimate of the compression upper bound.

821

# use an estimate of the compression upper bound.

820

lowestrealisticdeltalen = rawsizedistance // maxcomp

822

lowestrealisticdeltalen = rawsizedistance // maxcomp

821

823

822

# check the absolute constraint on the delta size

824

# check the absolute constraint on the delta size

823

snapshotlimit = self.textlen >> snapshotdepth

825

snapshotlimit = self.textlen >> snapshotdepth

824

if snapshotlimit < lowestrealisticdeltalen:

826

if snapshotlimit < lowestrealisticdeltalen:

825

# delta lower bound is larger than accepted upper

827

# delta lower bound is larger than accepted upper

826

# bound

828

# bound

827

tested.add(rev)

829

tested.add(rev)

828

continue

830

continue

829

831

830

# check the relative constraint on the delta size

832

# check the relative constraint on the delta size

831

revlength = self.revlog.length(rev)

833

revlength = self.revlog.length(rev)

832

if revlength < lowestrealisticdeltalen:

834

if revlength < lowestrealisticdeltalen:

833

# delta probable lower bound is larger than target

835

# delta probable lower bound is larger than target

834

# base

836

# base

835

tested.add(rev)

837

tested.add(rev)

836

continue

838

continue

837

839

838

group.append(rev)

840

group.append(rev)

839

if group:

841

if group:

840

# When the size of the candidate group is big, it can result in

842

# When the size of the candidate group is big, it can result in

841

# a quite significant performance impact. To reduce this, we

843

# a quite significant performance impact. To reduce this, we

842

# can send them in smaller batches until the new batch does not

844

# can send them in smaller batches until the new batch does not

843

# provide any improvements.

845

# provide any improvements.

844

#

846

#

845

# This might reduce the overall efficiency of the compression

847

# This might reduce the overall efficiency of the compression

846

# in some corner cases, but that should also prevent very

848

# in some corner cases, but that should also prevent very

847

# pathological cases from being an issue. (eg. 20 000

849

# pathological cases from being an issue. (eg. 20 000

848

# candidates).

850

# candidates).

849

#

851

#

850

# XXX note that the ordering of the group becomes important as

852

# XXX note that the ordering of the group becomes important as

851

# it now impacts the final result. The current order is

853

# it now impacts the final result. The current order is

852

# unprocessed and can be improved.

854

# unprocessed and can be improved.

853

if group_chunk_size == 0:

855

if group_chunk_size == 0:

854

tested.update(group)

856

tested.update(group)

855

good = yield tuple(group)

857

good = yield tuple(group)

856

else:

858

else:

857

prev_good = good

859

prev_good = good

858

for start in range(0, len(group), group_chunk_size):

860

for start in range(0, len(group), group_chunk_size):

859

sub_group = group[start : start + group_chunk_size]

861

sub_group = group[start : start + group_chunk_size]

860

tested.update(sub_group)

862

tested.update(sub_group)

861

good = yield tuple(sub_group)

863

good = yield tuple(sub_group)

862

if prev_good == good:

864

if prev_good == good:

863

break

865

break

864

866

865

yield None

867

yield None

866

868

867

def _refined_groups(self):

869

def _refined_groups(self):

868

good = None

870

good = None

869

# First we try to reuse a the delta contained in the bundle. (or from

871

# First we try to reuse a the delta contained in the bundle. (or from

870

# the source revlog)

872

# the source revlog)

871

#

873

#

872

# This logic only applies to general delta repositories and can be

874

# This logic only applies to general delta repositories and can be

873

# disabled through configuration. Disabling reuse source delta is

875

# disabled through configuration. Disabling reuse source delta is

874

# useful when we want to make sure we recomputed "optimal" deltas.

876

# useful when we want to make sure we recomputed "optimal" deltas.

875

debug_info = None

877

debug_info = None

876

if (

878

if (

877

self.cachedelta is not None

879

self.cachedelta is not None

878

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

880

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

879

):

881

):

880

# Assume what we received from the server is a good choice

882

# Assume what we received from the server is a good choice

881

# build delta will reuse the cache

883

# build delta will reuse the cache

882

if debug_info is not None:

884

if debug_info is not None:

883

debug_info['cached-delta.tested'] += 1

885

debug_info['cached-delta.tested'] += 1

884

good = yield (self.cachedelta[0],)

886

good = yield (self.cachedelta[0],)

885

if good is not None:

887

if good is not None:

886

if debug_info is not None:

888

if debug_info is not None:

887

debug_info['cached-delta.accepted'] += 1

889

debug_info['cached-delta.accepted'] += 1

888

yield None

890

yield None

889

return

891

return

890

groups = self._raw_groups()

892

groups = self._raw_groups()

891

for candidates in groups:

893

for candidates in groups:

892

good = yield candidates

894

good = yield candidates

893

if good is not None:

895

if good is not None:

894

break

896

break

895

897

896

# If sparse revlog is enabled, we can try to refine the available

898

# If sparse revlog is enabled, we can try to refine the available

897

# deltas

899

# deltas

898

if not self.revlog.delta_config.sparse_revlog:

900

if not self.revlog.delta_config.sparse_revlog:

899

yield None

901

yield None

900

return

902

return

901

903

902

# if we have a refinable value, try to refine it

904

# if we have a refinable value, try to refine it

903

if (

905

if (

904

good is not None

906

good is not None

905

and good not in (self.p1, self.p2)

907

and good not in (self.p1, self.p2)

906

and self.revlog.issnapshot(good)

908

and self.revlog.issnapshot(good)

907

):

909

):

908

# refine snapshot down

910

# refine snapshot down

909

previous = None

911

previous = None

910

while previous != good:

912

while previous != good:

911

previous = good

913

previous = good

912

base = self.revlog.deltaparent(good)

914

base = self.revlog.deltaparent(good)

913

if base == nullrev:

915

if base == nullrev:

914

break

916

break

915

good = yield (base,)

917

good = yield (base,)

916

# refine snapshot up

918

# refine snapshot up

917

if not self.snapshot_cache.snapshots:

919

if not self.snapshot_cache.snapshots:

918

self.snapshot_cache.update(self.revlog, good + 1)

920

self.snapshot_cache.update(self.revlog, good + 1)

919

previous = None

921

previous = None

920

while good != previous:

922

while good != previous:

921

previous = good

923

previous = good

922

children = tuple(

924

children = tuple(

923

sorted(c for c in self.snapshot_cache.snapshots[good])

925

sorted(c for c in self.snapshot_cache.snapshots[good])

924

)

926

)

925

good = yield children

927

good = yield children

926

928

927

if debug_info is not None:

929

if debug_info is not None:

928

if good is None:

930

if good is None:

929

debug_info['no-solution'] += 1

931

debug_info['no-solution'] += 1

930

932

931

yield None

933

yield None

932

934

933

def _raw_groups(self):

935

def _raw_groups(self):

934

"""Provides group of revision to be tested as delta base

936

"""Provides group of revision to be tested as delta base

935

937

936

This lower level function focus on emitting delta theorically

938

This lower level function focus on emitting delta theorically

937

interresting without looking it any practical details.

939

interresting without looking it any practical details.

938

940

939

The group order aims at providing fast or small candidates first.

941

The group order aims at providing fast or small candidates first.

940

"""

942

"""

941

# Why search for delta base if we cannot use a delta base ?

943

# Why search for delta base if we cannot use a delta base ?

942

assert self.revlog.delta_config.general_delta

944

assert self.revlog.delta_config.general_delta

943

# also see issue6056

945

# also see issue6056

944

sparse = self.revlog.delta_config.sparse_revlog

946

sparse = self.revlog.delta_config.sparse_revlog

945

curr = len(self.revlog)

947

curr = len(self.revlog)

946

prev = curr - 1

948

prev = curr - 1

947

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

949

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

948

950

949

# exclude already lazy tested base if any

951

# exclude already lazy tested base if any

950

parents = [p for p in (self.p1, self.p2) if p != nullrev]

952

parents = [p for p in (self.p1, self.p2) if p != nullrev]

951

953

952

if (

954

if (

953

not self.revlog.delta_config.delta_both_parents

955

not self.revlog.delta_config.delta_both_parents

954

and len(parents) == 2

956

and len(parents) == 2

955

):

957

):

956

parents.sort()

958

parents.sort()

957

# To minimize the chance of having to build a fulltext,

959

# To minimize the chance of having to build a fulltext,

958

# pick first whichever parent is closest to us (max rev)

960

# pick first whichever parent is closest to us (max rev)

959

yield (parents[1],)

961

yield (parents[1],)

960

# then the other one (min rev) if the first did not fit

962

# then the other one (min rev) if the first did not fit

961

yield (parents[0],)

963

yield (parents[0],)

962

elif len(parents) > 0:

964

elif len(parents) > 0:

963

# Test all parents (1 or 2), and keep the best candidate

965

# Test all parents (1 or 2), and keep the best candidate

964

yield parents

966

yield parents

965

967

966

if sparse and parents:

968

if sparse and parents:

967

# See if we can use an existing snapshot in the parent chains to

969

# See if we can use an existing snapshot in the parent chains to

968

# use as a base for a new intermediate-snapshot

970

# use as a base for a new intermediate-snapshot

969

#

971

#

970

# search for snapshot in parents delta chain map: snapshot-level:

972

# search for snapshot in parents delta chain map: snapshot-level:

971

# snapshot-rev

973

# snapshot-rev

972

parents_snaps = collections.defaultdict(set)

974

parents_snaps = collections.defaultdict(set)

973

candidate_chains = [deltachain(p) for p in parents]

975

candidate_chains = [deltachain(p) for p in parents]

974

for chain in candidate_chains:

976

for chain in candidate_chains:

975

for idx, s in enumerate(chain):

977

for idx, s in enumerate(chain):

976

if not self.revlog.issnapshot(s):

978

if not self.revlog.issnapshot(s):

977

break

979

break

978

parents_snaps[idx].add(s)

980

parents_snaps[idx].add(s)

979

snapfloor = min(parents_snaps[0]) + 1

981

snapfloor = min(parents_snaps[0]) + 1

980

self.snapshot_cache.update(self.revlog, snapfloor)

982

self.snapshot_cache.update(self.revlog, snapfloor)

981

# search for the highest "unrelated" revision

983

# search for the highest "unrelated" revision

982

#

984

#

983

# Adding snapshots used by "unrelated" revision increase the odd we

985

# Adding snapshots used by "unrelated" revision increase the odd we

984

# reuse an independant, yet better snapshot chain.

986

# reuse an independant, yet better snapshot chain.

985

#

987

#

986

# XXX instead of building a set of revisions, we could lazily

988

# XXX instead of building a set of revisions, we could lazily

987

# enumerate over the chains. That would be more efficient, however

989

# enumerate over the chains. That would be more efficient, however

988

# we stick to simple code for now.

990

# we stick to simple code for now.

989

all_revs = set()

991

all_revs = set()

990

for chain in candidate_chains:

992

for chain in candidate_chains:

991

all_revs.update(chain)

993

all_revs.update(chain)

992

other = None

994

other = None

993

for r in self.revlog.revs(prev, snapfloor):

995

for r in self.revlog.revs(prev, snapfloor):

994

if r not in all_revs:

996

if r not in all_revs:

995

other = r

997

other = r

996

break

998

break

997

if other is not None:

999

if other is not None:

998

# To avoid unfair competition, we won't use unrelated

1000

# To avoid unfair competition, we won't use unrelated

999

# intermediate snapshot that are deeper than the ones from the

1001

# intermediate snapshot that are deeper than the ones from the

1000

# parent delta chain.

1002

# parent delta chain.

1001

max_depth = max(parents_snaps.keys())

1003

max_depth = max(parents_snaps.keys())

1002

chain = deltachain(other)

1004

chain = deltachain(other)

1003

for depth, s in enumerate(chain):

1005

for depth, s in enumerate(chain):

1004

if s < snapfloor:

1006

if s < snapfloor:

1005

continue

1007

continue

1006

if max_depth < depth:

1008

if max_depth < depth:

1007

break

1009

break

1008

if not self.revlog.issnapshot(s):

1010

if not self.revlog.issnapshot(s):

1009

break

1011

break

1010

parents_snaps[depth].add(s)

1012

parents_snaps[depth].add(s)

1011

# Test them as possible intermediate snapshot base We test them

1013

# Test them as possible intermediate snapshot base We test them

1012

# from highest to lowest level. High level one are more likely to

1014

# from highest to lowest level. High level one are more likely to

1013

# result in small delta

1015

# result in small delta

1014

floor = None

1016

floor = None

1015

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

1017

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

1016

siblings = set()

1018

siblings = set()

1017

for s in snaps:

1019

for s in snaps:

1018

siblings.update(self.snapshot_cache.snapshots[s])

1020

siblings.update(self.snapshot_cache.snapshots[s])

1019

# Before considering making a new intermediate snapshot, we

1021

# Before considering making a new intermediate snapshot, we

1020

# check if an existing snapshot, children of base we consider,

1022

# check if an existing snapshot, children of base we consider,

1021

# would be suitable.

1023

# would be suitable.

1022

#

1024

#

1023

# It give a change to reuse a delta chain "unrelated" to the

1025

# It give a change to reuse a delta chain "unrelated" to the

1024

# current revision instead of starting our own. Without such

1026

# current revision instead of starting our own. Without such

1025

# re-use, topological branches would keep reopening new chains.

1027

# re-use, topological branches would keep reopening new chains.

1026

# Creating more and more snapshot as the repository grow.

1028

# Creating more and more snapshot as the repository grow.

1027

1029

1028

if floor is not None:

1030

if floor is not None:

1029

# We only do this for siblings created after the one in our

1031

# We only do this for siblings created after the one in our

1030

# parent's delta chain. Those created before has less

1032

# parent's delta chain. Those created before has less

1031

# chances to be valid base since our ancestors had to

1033

# chances to be valid base since our ancestors had to

1032

# create a new snapshot.

1034

# create a new snapshot.

1033

siblings = [r for r in siblings if floor < r]

1035

siblings = [r for r in siblings if floor < r]

1034

yield tuple(sorted(siblings))

1036

yield tuple(sorted(siblings))

1035

# then test the base from our parent's delta chain.

1037

# then test the base from our parent's delta chain.

1036

yield tuple(sorted(snaps))

1038

yield tuple(sorted(snaps))

1037

floor = min(snaps)

1039

floor = min(snaps)

1038

# No suitable base found in the parent chain, search if any full

1040

# No suitable base found in the parent chain, search if any full

1039

# snapshots emitted since parent's base would be a suitable base

1041

# snapshots emitted since parent's base would be a suitable base

1040

# for an intermediate snapshot.

1042

# for an intermediate snapshot.

1041

#

1043

#

1042

# It give a chance to reuse a delta chain unrelated to the current

1044

# It give a chance to reuse a delta chain unrelated to the current

1043

# revisions instead of starting our own. Without such re-use,

1045

# revisions instead of starting our own. Without such re-use,

1044

# topological branches would keep reopening new full chains.

1046

# topological branches would keep reopening new full chains.

1045

# Creating more and more snapshot as the repository grow.

1047

# Creating more and more snapshot as the repository grow.

1046

full = [

1048

full = [

1047

r

1049

r

1048

for r in self.snapshot_cache.snapshots[nullrev]

1050

for r in self.snapshot_cache.snapshots[nullrev]

1049

if snapfloor <= r

1051

if snapfloor <= r

1050

]

1052

]

1051

yield tuple(sorted(full))

1053

yield tuple(sorted(full))

1052

1054

1053

if not sparse:

1055

if not sparse:

1054

# other approach failed try against prev to hopefully save us a

1056

# other approach failed try against prev to hopefully save us a

1055

# fulltext.

1057

# fulltext.

1056

yield (prev,)

1058

yield (prev,)

1057

1059

1058

1060

1059

class SnapshotCache:

1061

class SnapshotCache:

1060

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1062

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1061

1063

1062

def __init__(self):

1064

def __init__(self):

1063

self.snapshots = collections.defaultdict(set)

1065

self.snapshots = collections.defaultdict(set)

1064

self._start_rev = None

1066

self._start_rev = None

1065

self._end_rev = None

1067

self._end_rev = None

1066

1068

1067

def update(self, revlog, start_rev=0):

1069

def update(self, revlog, start_rev=0):

1068

"""find snapshots from start_rev to tip"""

1070

"""find snapshots from start_rev to tip"""

1069

nb_revs = len(revlog)

1071

nb_revs = len(revlog)

1070

end_rev = nb_revs - 1

1072

end_rev = nb_revs - 1

1071

if start_rev > end_rev:

1073

if start_rev > end_rev:

1072

return # range is empty

1074

return # range is empty

1073

1075

1074

if self._start_rev is None:

1076

if self._start_rev is None:

1075

assert self._end_rev is None

1077

assert self._end_rev is None

1076

self._update(revlog, start_rev, end_rev)

1078

self._update(revlog, start_rev, end_rev)

1077

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1079

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1078

if start_rev < self._start_rev:

1080

if start_rev < self._start_rev:

1079

self._update(revlog, start_rev, self._start_rev - 1)

1081

self._update(revlog, start_rev, self._start_rev - 1)

1080

if self._end_rev < end_rev:

1082

if self._end_rev < end_rev:

1081

self._update(revlog, self._end_rev + 1, end_rev)

1083

self._update(revlog, self._end_rev + 1, end_rev)

1082

1084

1083

if self._start_rev is None:

1085

if self._start_rev is None:

1084

assert self._end_rev is None

1086

assert self._end_rev is None

1085

self._end_rev = end_rev

1087

self._end_rev = end_rev

1086

self._start_rev = start_rev

1088

self._start_rev = start_rev

1087

else:

1089

else:

1088

self._start_rev = min(self._start_rev, start_rev)

1090

self._start_rev = min(self._start_rev, start_rev)

1089

self._end_rev = max(self._end_rev, end_rev)

1091

self._end_rev = max(self._end_rev, end_rev)

1090

assert self._start_rev <= self._end_rev, (

1092

assert self._start_rev <= self._end_rev, (

1091

self._start_rev,

1093

self._start_rev,

1092

self._end_rev,

1094

self._end_rev,

1093

)

1095

)

1094

1096

1095

def _update(self, revlog, start_rev, end_rev):

1097

def _update(self, revlog, start_rev, end_rev):

1096

"""internal method that actually do update content"""

1098

"""internal method that actually do update content"""

1097

assert self._start_rev is None or (

1099

assert self._start_rev is None or (

1098

start_rev < self._start_rev or start_rev > self._end_rev

1100

start_rev < self._start_rev or start_rev > self._end_rev

1099

), (self._start_rev, self._end_rev, start_rev, end_rev)

1101

), (self._start_rev, self._end_rev, start_rev, end_rev)

1100

assert self._start_rev is None or (

1102

assert self._start_rev is None or (

1101

end_rev < self._start_rev or end_rev > self._end_rev

1103

end_rev < self._start_rev or end_rev > self._end_rev

1102

), (self._start_rev, self._end_rev, start_rev, end_rev)

1104

), (self._start_rev, self._end_rev, start_rev, end_rev)

1103

cache = self.snapshots

1105

cache = self.snapshots

1104

if hasattr(revlog.index, 'findsnapshots'):

1106

if hasattr(revlog.index, 'findsnapshots'):

1105

revlog.index.findsnapshots(cache, start_rev, end_rev)

1107

revlog.index.findsnapshots(cache, start_rev, end_rev)

1106

else:

1108

else:

1107

deltaparent = revlog.deltaparent

1109

deltaparent = revlog.deltaparent

1108

issnapshot = revlog.issnapshot

1110

issnapshot = revlog.issnapshot

1109

for rev in revlog.revs(start_rev, end_rev):

1111

for rev in revlog.revs(start_rev, end_rev):

1110

if issnapshot(rev):

1112

if issnapshot(rev):

1111

cache[deltaparent(rev)].add(rev)

1113

cache[deltaparent(rev)].add(rev)

1112

1114

1113

1115

1114

class deltacomputer:

1116

class deltacomputer:

1115

"""object capable of computing delta and finding delta for multiple revision

1117

"""object capable of computing delta and finding delta for multiple revision

1116

1118

1117

This object is meant to compute and find multiple delta applied to the same

1119

This object is meant to compute and find multiple delta applied to the same

1118

revlog.

1120

revlog.

1119

"""

1121

"""

1120

1122

1121

def __init__(

1123

def __init__(

1122

self,

1124

self,

1123

revlog,

1125

revlog,

1124

write_debug=None,

1126

write_debug=None,

1125

debug_search=False,

1127

debug_search=False,

1126

debug_info=None,

1128

debug_info=None,

1127

):

1129

):

1128

self.revlog = revlog

1130

self.revlog = revlog

1129

self._write_debug = write_debug

1131

self._write_debug = write_debug

1130

if write_debug is None:

1132

if write_debug is None:

1131

self._debug_search = False

1133

self._debug_search = False

1132

else:

1134

else:

1133

self._debug_search = debug_search

1135

self._debug_search = debug_search

1134

self._debug_info = debug_info

1136

self._debug_info = debug_info

1135

self._snapshot_cache = SnapshotCache()

1137

self._snapshot_cache = SnapshotCache()

1136

1138

1137

@property

1139

@property

1138

def _gather_debug(self):

1140

def _gather_debug(self):

1139

return self._write_debug is not None or self._debug_info is not None

1141

return self._write_debug is not None or self._debug_info is not None

1140

1142

1141

def buildtext(self, revinfo):

1143

def buildtext(self, revinfo):

1142

"""Builds a fulltext version of a revision

1144

"""Builds a fulltext version of a revision

1143

1145

1144

revinfo: revisioninfo instance that contains all needed info

1146

revinfo: revisioninfo instance that contains all needed info

1145

"""

1147

"""

1146

btext = revinfo.btext

1148

btext = revinfo.btext

1147

if btext[0] is not None:

1149

if btext[0] is not None:

1148

return btext[0]

1150

return btext[0]

1149

1151

1150

revlog = self.revlog

1152

revlog = self.revlog

1151

cachedelta = revinfo.cachedelta

1153

cachedelta = revinfo.cachedelta

1152

baserev = cachedelta[0]

1154

baserev = cachedelta[0]

1153

delta = cachedelta[1]

1155

delta = cachedelta[1]

1154

1156

1155

fulltext = btext[0] = _textfromdelta(

1157

fulltext = btext[0] = _textfromdelta(

1156

revlog,

1158

revlog,

1157

baserev,

1159

baserev,

1158

delta,

1160

delta,

1159

revinfo.p1,

1161

revinfo.p1,

1160

revinfo.p2,

1162

revinfo.p2,

1161

revinfo.flags,

1163

revinfo.flags,

1162

revinfo.node,

1164

revinfo.node,

1163

)

1165

)

1164

return fulltext

1166

return fulltext

1165

1167

1166

def _builddeltadiff(self, base, revinfo):

1168

def _builddeltadiff(self, base, revinfo):

1167

revlog = self.revlog

1169

revlog = self.revlog

1168

t = self.buildtext(revinfo)

1170

t = self.buildtext(revinfo)

1169

if revlog.iscensored(base):

1171

if revlog.iscensored(base):

1170

# deltas based on a censored revision must replace the

1172

# deltas based on a censored revision must replace the

1171

# full content in one patch, so delta works everywhere

1173

# full content in one patch, so delta works everywhere

1172

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1174

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1173

delta = header + t

1175

delta = header + t

1174

else:

1176

else:

1175

ptext = revlog.rawdata(base)

1177

ptext = revlog.rawdata(base)

1176

delta = mdiff.textdiff(ptext, t)

1178

delta = mdiff.textdiff(ptext, t)

1177

1179

1178

return delta

1180

return delta

1179

1181

1180

def _builddeltainfo(self, revinfo, base, target_rev=None):

1182

def _builddeltainfo(self, revinfo, base, target_rev=None):

1181

# can we use the cached delta?

1183

# can we use the cached delta?

1182

revlog = self.revlog

1184

revlog = self.revlog

1183

chainbase = revlog.chainbase(base)

1185

chainbase = revlog.chainbase(base)

1184

if revlog.delta_config.general_delta:

1186

if revlog.delta_config.general_delta:

1185

deltabase = base

1187

deltabase = base

1186

else:

1188

else:

1187

if target_rev is not None and base != target_rev - 1:

1189

if target_rev is not None and base != target_rev - 1:

1188

msg = (

1190

msg = (

1189

b'general delta cannot use delta for something else '

1191

b'general delta cannot use delta for something else '

1190

b'than `prev`: %d<-%d'

1192

b'than `prev`: %d<-%d'

1191

)

1193

)

1192

msg %= (base, target_rev)

1194

msg %= (base, target_rev)

1193

raise error.ProgrammingError(msg)

1195

raise error.ProgrammingError(msg)

1194

deltabase = chainbase

1196

deltabase = chainbase

1195

snapshotdepth = None

1197

snapshotdepth = None

1196

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1198

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1197

snapshotdepth = 0

1199

snapshotdepth = 0

1198

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1200

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1199

# A delta chain should always be one full snapshot,

1201

# A delta chain should always be one full snapshot,

1200

# zero or more semi-snapshots, and zero or more deltas

1202

# zero or more semi-snapshots, and zero or more deltas

1201

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1203

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1202

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1204

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1203

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1205

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1204

delta = None

1206

delta = None

1205

if revinfo.cachedelta:

1207

if revinfo.cachedelta:

1206

cachebase = revinfo.cachedelta[0]

1208

cachebase = revinfo.cachedelta[0]

1207

# check if the diff still apply

1209

# check if the diff still apply

1208

currentbase = cachebase

1210

currentbase = cachebase

1209

while (

1211

while (

1210

currentbase != nullrev

1212

currentbase != nullrev

1211

and currentbase != base

1213

and currentbase != base

1212

and self.revlog.length(currentbase) == 0

1214

and self.revlog.length(currentbase) == 0

1213

):

1215

):

1214

currentbase = self.revlog.deltaparent(currentbase)

1216

currentbase = self.revlog.deltaparent(currentbase)

1215

if self.revlog.delta_config.lazy_delta and currentbase == base:

1217

if self.revlog.delta_config.lazy_delta and currentbase == base:

1216

delta = revinfo.cachedelta[1]

1218

delta = revinfo.cachedelta[1]

1217

if delta is None:

1219

if delta is None:

1218

delta = self._builddeltadiff(base, revinfo)

1220

delta = self._builddeltadiff(base, revinfo)

1219

if self._debug_search:

1221

if self._debug_search:

1220

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1222

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1221

msg %= len(delta)

1223

msg %= len(delta)

1222

self._write_debug(msg)

1224

self._write_debug(msg)

1223

# snapshotdept need to be neither None nor 0 level snapshot

1225

# snapshotdept need to be neither None nor 0 level snapshot

1224

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1226

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1225

lowestrealisticdeltalen = (

1227

lowestrealisticdeltalen = (

1226

len(delta) // revlog.delta_config.upper_bound_comp

1228

len(delta) // revlog.delta_config.upper_bound_comp

1227

)

1229

)

1228

snapshotlimit = revinfo.textlen >> snapshotdepth

1230

snapshotlimit = revinfo.textlen >> snapshotdepth

1229

if self._debug_search:

1231

if self._debug_search:

1230

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1232

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1231

msg %= lowestrealisticdeltalen

1233

msg %= lowestrealisticdeltalen

1232

self._write_debug(msg)

1234

self._write_debug(msg)

1233

if snapshotlimit < lowestrealisticdeltalen:

1235

if snapshotlimit < lowestrealisticdeltalen:

1234

if self._debug_search:

1236

if self._debug_search:

1235

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1237

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1236

self._write_debug(msg)

1238

self._write_debug(msg)

1237

return None

1239

return None

1238

if revlog.length(base) < lowestrealisticdeltalen:

1240

if revlog.length(base) < lowestrealisticdeltalen:

1239

if self._debug_search:

1241

if self._debug_search:

1240

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1242

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1241

self._write_debug(msg)

1243

self._write_debug(msg)

1242

return None

1244

return None

1243

header, data = revlog._inner.compress(delta)

1245

header, data = revlog._inner.compress(delta)

1244

deltalen = len(header) + len(data)

1246

deltalen = len(header) + len(data)

1245

offset = revlog.end(len(revlog) - 1)

1247

offset = revlog.end(len(revlog) - 1)

1246

dist = deltalen + offset - revlog.start(chainbase)

1248

dist = deltalen + offset - revlog.start(chainbase)

1247

chainlen, compresseddeltalen = revlog._chaininfo(base)

1249

chainlen, compresseddeltalen = revlog._chaininfo(base)

1248

chainlen += 1

1250

chainlen += 1

1249

compresseddeltalen += deltalen

1251

compresseddeltalen += deltalen

1250

1252

1251

return _deltainfo(

1253

return _deltainfo(

1252

dist,

1254

dist,

1253

deltalen,

1255

deltalen,

1254

(header, data),

1256

(header, data),

1255

deltabase,

1257

deltabase,

1256

chainbase,

1258

chainbase,

1257

chainlen,

1259

chainlen,

1258

compresseddeltalen,

1260

compresseddeltalen,

1259

snapshotdepth,

1261

snapshotdepth,

1260

)

1262

)

1261

1263

1262

def _fullsnapshotinfo(self, revinfo, curr):

1264

def _fullsnapshotinfo(self, revinfo, curr):

1263

rawtext = self.buildtext(revinfo)

1265

rawtext = self.buildtext(revinfo)

1264

data = self.revlog._inner.compress(rawtext)

1266

data = self.revlog._inner.compress(rawtext)

1265

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1267

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1266

deltabase = chainbase = curr

1268

deltabase = chainbase = curr

1267

snapshotdepth = 0

1269

snapshotdepth = 0

1268

chainlen = 1

1270

chainlen = 1

1269

1271

1270

return _deltainfo(

1272

return _deltainfo(

1271

dist,

1273

dist,

1272

deltalen,

1274

deltalen,

1273

data,

1275

data,

1274

deltabase,

1276

deltabase,

1275

chainbase,

1277

chainbase,

1276

chainlen,

1278

chainlen,

1277

compresseddeltalen,

1279

compresseddeltalen,

1278

snapshotdepth,

1280

snapshotdepth,

1279

)

1281

)

1280

1282

1281

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1283

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1282

"""Find an acceptable delta against a candidate revision

1284

"""Find an acceptable delta against a candidate revision

1283

1285

1284

revinfo: information about the revision (instance of _revisioninfo)

1286

revinfo: information about the revision (instance of _revisioninfo)

1285

1287

1286

Returns the first acceptable candidate revision, as ordered by

1288

Returns the first acceptable candidate revision, as ordered by

1287

_candidategroups

1289

_candidategroups

1288

1290

1289

If no suitable deltabase is found, we return delta info for a full

1291

If no suitable deltabase is found, we return delta info for a full

1290

snapshot.

1292

snapshot.

1291

1293

1292

`excluded_bases` is an optional set of revision that cannot be used as

1294

`excluded_bases` is an optional set of revision that cannot be used as

1293

a delta base. Use this to recompute delta suitable in censor or strip

1295

a delta base. Use this to recompute delta suitable in censor or strip

1294

context.

1296

context.

1295

"""

1297

"""

1296

if target_rev is None:

1298

if target_rev is None:

1297

target_rev = len(self.revlog)

1299

target_rev = len(self.revlog)

1298

1300

1299

gather_debug = self._gather_debug

1301

gather_debug = self._gather_debug

1300

cachedelta = revinfo.cachedelta

1302

cachedelta = revinfo.cachedelta

1301

revlog = self.revlog

1303

revlog = self.revlog

1302

p1r = p2r = None

1304

p1r = p2r = None

1303

1305

1304

if excluded_bases is None:

1306

if excluded_bases is None:

1305

excluded_bases = set()

1307

excluded_bases = set()

1306

1308

1307

if gather_debug:

1309

if gather_debug:

1308

start = util.timer()

1310

start = util.timer()

1309

dbg = self._one_dbg_data()

1311

dbg = self._one_dbg_data()

1310

dbg['revision'] = target_rev

1312

dbg['revision'] = target_rev

1311

target_revlog = b"UNKNOWN"

1313

target_revlog = b"UNKNOWN"

1312

target_type = self.revlog.target[0]

1314

target_type = self.revlog.target[0]

1313

target_key = self.revlog.target[1]

1315

target_key = self.revlog.target[1]

1314

if target_type == KIND_CHANGELOG:

1316

if target_type == KIND_CHANGELOG:

1315

target_revlog = b'CHANGELOG:'

1317

target_revlog = b'CHANGELOG:'

1316

elif target_type == KIND_MANIFESTLOG:

1318

elif target_type == KIND_MANIFESTLOG:

1317

target_revlog = b'MANIFESTLOG:'

1319

target_revlog = b'MANIFESTLOG:'

1318

if target_key:

1320

if target_key:

1319

target_revlog += b'%s:' % target_key

1321

target_revlog += b'%s:' % target_key

1320

elif target_type == KIND_FILELOG:

1322

elif target_type == KIND_FILELOG:

1321

target_revlog = b'FILELOG:'

1323

target_revlog = b'FILELOG:'

1322

if target_key:

1324

if target_key:

1323

target_revlog += b'%s:' % target_key

1325

target_revlog += b'%s:' % target_key

1324

dbg['target-revlog'] = target_revlog

1326

dbg['target-revlog'] = target_revlog

1325

p1r = revlog.rev(revinfo.p1)

1327

p1r = revlog.rev(revinfo.p1)

1326

p2r = revlog.rev(revinfo.p2)

1328

p2r = revlog.rev(revinfo.p2)

1327

if p1r != nullrev:

1329

if p1r != nullrev:

1328

p1_chain_len = revlog._chaininfo(p1r)[0]

1330

p1_chain_len = revlog._chaininfo(p1r)[0]

1329

else:

1331

else:

1330

p1_chain_len = -1

1332

p1_chain_len = -1

1331

if p2r != nullrev:

1333

if p2r != nullrev:

1332

p2_chain_len = revlog._chaininfo(p2r)[0]

1334

p2_chain_len = revlog._chaininfo(p2r)[0]

1333

else:

1335

else:

1334

p2_chain_len = -1

1336

p2_chain_len = -1

1335

dbg['p1-chain-len'] = p1_chain_len

1337

dbg['p1-chain-len'] = p1_chain_len

1336

dbg['p2-chain-len'] = p2_chain_len

1338

dbg['p2-chain-len'] = p2_chain_len

1337

1339

1338

# 1) if the revision is empty, no amount of delta can beat it

1340

# 1) if the revision is empty, no amount of delta can beat it

1339

#

1341

#

1340

# 2) no delta for flag processor revision (see "candelta" for why)

1342

# 2) no delta for flag processor revision (see "candelta" for why)

1341

# not calling candelta since only one revision needs test, also to

1343

# not calling candelta since only one revision needs test, also to

1342

# avoid overhead fetching flags again.

1344

# avoid overhead fetching flags again.

1343

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1345

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1344

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1346

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1345

if gather_debug:

1347

if gather_debug:

1346

end = util.timer()

1348

end = util.timer()

1347

dbg['duration'] = end - start

1349

dbg['duration'] = end - start

1348

dbg[

1350

dbg[

1349

'delta-base'

1351

'delta-base'

1350

] = deltainfo.base # pytype: disable=attribute-error

1352

] = deltainfo.base # pytype: disable=attribute-error

1351

dbg['search_round_count'] = 0

1353

dbg['search_round_count'] = 0

1352

dbg['using-cached-base'] = False

1354

dbg['using-cached-base'] = False

1353

dbg['delta_try_count'] = 0

1355

dbg['delta_try_count'] = 0

1354

dbg['type'] = b"full"

1356

dbg['type'] = b"full"

1355

dbg['snapshot-depth'] = 0

1357

dbg['snapshot-depth'] = 0

1356

self._dbg_process_data(dbg)

1358

self._dbg_process_data(dbg)

1357

return deltainfo

1359

return deltainfo

1358

1360

1359

deltainfo = None

1361

deltainfo = None

1360

1362

1361

# If this source delta are to be forcibly reuse, let us comply early.

1363

# If this source delta are to be forcibly reuse, let us comply early.

1362

if (

1364

if (

1363

revlog.delta_config.general_delta

1365

revlog.delta_config.general_delta

1364

and revinfo.cachedelta is not None

1366

and revinfo.cachedelta is not None

1365

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1367

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1366

):

1368

):

1367

base = revinfo.cachedelta[0]

1369

base = revinfo.cachedelta[0]

1368

if base == nullrev:

1370

if base == nullrev:

1369

dbg_type = b"full"

1371

dbg_type = b"full"

1370

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1372

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1371

if gather_debug:

1373

if gather_debug:

1372

snapshotdepth = 0

1374

snapshotdepth = 0

1373

elif base not in excluded_bases:

1375

elif base not in excluded_bases:

1374

delta = revinfo.cachedelta[1]

1376

delta = revinfo.cachedelta[1]

1375

header, data = revlog.compress(delta)

1377

header, data = revlog.compress(delta)

1376

deltalen = len(header) + len(data)

1378

deltalen = len(header) + len(data)

1377

if gather_debug:

1379

if gather_debug:

1378

offset = revlog.end(len(revlog) - 1)

1380

offset = revlog.end(len(revlog) - 1)

1379

chainbase = revlog.chainbase(base)

1381

chainbase = revlog.chainbase(base)

1380

distance = deltalen + offset - revlog.start(chainbase)

1382

distance = deltalen + offset - revlog.start(chainbase)

1381

chainlen, compresseddeltalen = revlog._chaininfo(base)

1383

chainlen, compresseddeltalen = revlog._chaininfo(base)

1382

chainlen += 1

1384

chainlen += 1

1383

compresseddeltalen += deltalen

1385

compresseddeltalen += deltalen

1384

if base == p1r or base == p2r:

1386

if base == p1r or base == p2r:

1385

dbg_type = b"delta"

1387

dbg_type = b"delta"

1386

snapshotdepth = None

1388

snapshotdepth = None

1387

elif not revlog.issnapshot(base):

1389

elif not revlog.issnapshot(base):

1388

snapshotdepth = None

1390

snapshotdepth = None

1389

else:

1391

else:

1390

dbg_type = b"snapshot"

1392

dbg_type = b"snapshot"

1391

snapshotdepth = revlog.snapshotdepth(base) + 1

1393

snapshotdepth = revlog.snapshotdepth(base) + 1

1392

else:

1394

else:

1393

distance = None

1395

distance = None

1394

chainbase = None

1396

chainbase = None

1395

chainlen = None

1397

chainlen = None

1396

compresseddeltalen = None

1398

compresseddeltalen = None

1397

snapshotdepth = None

1399

snapshotdepth = None

1398

deltainfo = _deltainfo(

1400

deltainfo = _deltainfo(

1399

distance=distance,

1401

distance=distance,

1400

deltalen=deltalen,

1402

deltalen=deltalen,

1401

data=(header, data),

1403

data=(header, data),

1402

base=base,

1404

base=base,

1403

chainbase=chainbase,

1405

chainbase=chainbase,

1404

chainlen=chainlen,

1406

chainlen=chainlen,

1405

compresseddeltalen=compresseddeltalen,

1407

compresseddeltalen=compresseddeltalen,

1406

snapshotdepth=snapshotdepth,

1408

snapshotdepth=snapshotdepth,

1407

)

1409

)

1408

1410

1409

if deltainfo is not None:

1411

if deltainfo is not None:

1410

if gather_debug:

1412

if gather_debug:

1411

end = util.timer()

1413

end = util.timer()

1412

dbg['duration'] = end - start

1414

dbg['duration'] = end - start

1413

dbg[

1415

dbg[

1414

'delta-base'

1416

'delta-base'

1415

] = deltainfo.base # pytype: disable=attribute-error

1417

] = deltainfo.base # pytype: disable=attribute-error

1416

dbg['search_round_count'] = 0

1418

dbg['search_round_count'] = 0

1417

dbg['using-cached-base'] = True

1419

dbg['using-cached-base'] = True

1418

dbg['delta_try_count'] = 0

1420

dbg['delta_try_count'] = 0

1419

dbg['type'] = b"full"

1421

dbg['type'] = b"full"

1420

if snapshotdepth is None:

1422

if snapshotdepth is None:

1421

dbg['snapshot-depth'] = 0

1423

dbg['snapshot-depth'] = 0

1422

else:

1424

else:

1423

dbg['snapshot-depth'] = snapshotdepth

1425

dbg['snapshot-depth'] = snapshotdepth

1424

self._dbg_process_data(dbg)

1426

self._dbg_process_data(dbg)

1425

return deltainfo

1427

return deltainfo

1426

1428

1427

# count the number of different delta we tried (for debug purpose)

1429

# count the number of different delta we tried (for debug purpose)

1428

dbg_try_count = 0

1430

dbg_try_count = 0

1429

# count the number of "search round" we did. (for debug purpose)

1431

# count the number of "search round" we did. (for debug purpose)

1430

dbg_try_rounds = 0

1432

dbg_try_rounds = 0

1431

dbg_type = b'unknown'

1433

dbg_type = b'unknown'

1432

1434

1433

if p1r is None:

1435

if p1r is None:

1434

p1r = revlog.rev(revinfo.p1)

1436

p1r = revlog.rev(revinfo.p1)

1435

p2r = revlog.rev(revinfo.p2)

1437

p2r = revlog.rev(revinfo.p2)

1436

1438

1437

if self._debug_search:

1439

if self._debug_search:

1438

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1440

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1439

msg %= target_rev

1441

msg %= target_rev

1440

self._write_debug(msg)

1442

self._write_debug(msg)

1441

1443

1442

search = _DeltaSearch(

1444

search = _DeltaSearch(

1443

self.revlog,

1445

self.revlog,

1444

revinfo.textlen,

1446

revinfo.textlen,

1445

p1r,

1447

p1r,

1446

p2r,

1448

p2r,

1447

cachedelta,

1449

cachedelta,

1448

excluded_bases,

1450

excluded_bases,

1449

target_rev,

1451

target_rev,

1450

snapshot_cache=self._snapshot_cache,

1452

snapshot_cache=self._snapshot_cache,

1451

)

1453

)

1452

1454

1453

groups = search.candidate_groups()

1455

groups = search.candidate_groups()

1454

candidaterevs = next(groups)

1456

candidaterevs = next(groups)

1455

while candidaterevs is not None:

1457

while candidaterevs is not None:

1456

dbg_try_rounds += 1

1458

dbg_try_rounds += 1

1457

if self._debug_search:

1459

if self._debug_search:

1458

prev = None

1460

prev = None

1459

if deltainfo is not None:

1461

if deltainfo is not None:

1460

prev = deltainfo.base

1462

prev = deltainfo.base

1461

1463

1462

if (

1464

if (

1463

cachedelta is not None

1465

cachedelta is not None

1464

and len(candidaterevs) == 1

1466

and len(candidaterevs) == 1

1465

and cachedelta[0] in candidaterevs

1467

and cachedelta[0] in candidaterevs

1466

):

1468

):

1467

round_type = b"cached-delta"

1469

round_type = b"cached-delta"

1468

elif p1r in candidaterevs or p2r in candidaterevs:

1470

elif p1r in candidaterevs or p2r in candidaterevs:

1469

round_type = b"parents"

1471

round_type = b"parents"

1470

elif prev is not None and all(c < prev for c in candidaterevs):

1472

elif prev is not None and all(c < prev for c in candidaterevs):

1471

round_type = b"refine-down"

1473

round_type = b"refine-down"

1472

elif prev is not None and all(c > prev for c in candidaterevs):

1474

elif prev is not None and all(c > prev for c in candidaterevs):

1473

round_type = b"refine-up"

1475

round_type = b"refine-up"

1474

else:

1476

else:

1475

round_type = b"search-down"

1477

round_type = b"search-down"

1476

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1478

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1477

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1479

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1478

self._write_debug(msg)

1480

self._write_debug(msg)

1479

nominateddeltas = []

1481

nominateddeltas = []

1480

if deltainfo is not None:

1482

if deltainfo is not None:

1481

if self._debug_search:

1483

if self._debug_search:

1482

msg = (

1484

msg = (

1483

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1485

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1484

)

1486

)

1485

msg %= (deltainfo.base, deltainfo.deltalen)

1487

msg %= (deltainfo.base, deltainfo.deltalen)

1486

self._write_debug(msg)

1488

self._write_debug(msg)

1487

# if we already found a good delta,

1489

# if we already found a good delta,

1488

# challenge it against refined candidates

1490

# challenge it against refined candidates

1489

nominateddeltas.append(deltainfo)

1491

nominateddeltas.append(deltainfo)

1490

for candidaterev in candidaterevs:

1492

for candidaterev in candidaterevs:

1491

if self._debug_search:

1493

if self._debug_search:

1492

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1494

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1493

msg %= candidaterev

1495

msg %= candidaterev

1494

self._write_debug(msg)

1496

self._write_debug(msg)

1495

candidate_type = None

1497

candidate_type = None

1496

if candidaterev == p1r:

1498

if candidaterev == p1r:

1497

candidate_type = b"p1"

1499

candidate_type = b"p1"

1498

elif candidaterev == p2r:

1500

elif candidaterev == p2r:

1499

candidate_type = b"p2"

1501

candidate_type = b"p2"

1500

elif self.revlog.issnapshot(candidaterev):

1502

elif self.revlog.issnapshot(candidaterev):

1501

candidate_type = b"snapshot-%d"

1503

candidate_type = b"snapshot-%d"

1502

candidate_type %= self.revlog.snapshotdepth(

1504

candidate_type %= self.revlog.snapshotdepth(

1503

candidaterev

1505

candidaterev

1504

)

1506

)

1505

1507

1506

if candidate_type is not None:

1508

if candidate_type is not None:

1507

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1509

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1508

msg %= candidate_type

1510

msg %= candidate_type

1509

self._write_debug(msg)

1511

self._write_debug(msg)

1510

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1512

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1511

msg %= self.revlog.length(candidaterev)

1513

msg %= self.revlog.length(candidaterev)

1512

self._write_debug(msg)

1514

self._write_debug(msg)

1513

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1515

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1514

msg %= self.revlog.deltaparent(candidaterev)

1516

msg %= self.revlog.deltaparent(candidaterev)

1515

self._write_debug(msg)

1517

self._write_debug(msg)

1516

1518

1517

dbg_try_count += 1

1519

dbg_try_count += 1

1518

1520

1519

if self._debug_search:

1521

if self._debug_search:

1520

delta_start = util.timer()

1522

delta_start = util.timer()

1521

candidatedelta = self._builddeltainfo(

1523

candidatedelta = self._builddeltainfo(

1522

revinfo,

1524

revinfo,

1523

candidaterev,

1525

candidaterev,

1524

target_rev=target_rev,

1526

target_rev=target_rev,

1525

)

1527

)

1526

if self._debug_search:

1528

if self._debug_search:

1527

delta_end = util.timer()

1529

delta_end = util.timer()

1528

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1530

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1529

msg %= delta_end - delta_start

1531

msg %= delta_end - delta_start

1530

self._write_debug(msg)

1532

self._write_debug(msg)

1531

if candidatedelta is not None:

1533

if candidatedelta is not None:

1532

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1534

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1533

if self._debug_search:

1535

if self._debug_search:

1534

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1536

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1535

msg %= candidatedelta.deltalen

1537

msg %= candidatedelta.deltalen

1536

self._write_debug(msg)

1538

self._write_debug(msg)

1537

nominateddeltas.append(candidatedelta)

1539

nominateddeltas.append(candidatedelta)

1538

elif self._debug_search:

1540

elif self._debug_search:

1539

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1541

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1540

msg %= candidatedelta.deltalen

1542

msg %= candidatedelta.deltalen

1541

self._write_debug(msg)

1543

self._write_debug(msg)

1542

elif self._debug_search:

1544

elif self._debug_search:

1543

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1545

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1544

self._write_debug(msg)

1546

self._write_debug(msg)

1545

if nominateddeltas:

1547

if nominateddeltas:

1546

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1548

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1547

if deltainfo is not None:

1549

if deltainfo is not None:

1548

candidaterevs = groups.send(deltainfo.base)

1550

candidaterevs = groups.send(deltainfo.base)

1549

else:

1551

else:

1550

candidaterevs = next(groups)

1552

candidaterevs = next(groups)

1551

1553

1552

if deltainfo is None:

1554

if deltainfo is None:

1553

dbg_type = b"full"

1555

dbg_type = b"full"

1554

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1556

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1555

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1557

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1556

dbg_type = b"snapshot"

1558

dbg_type = b"snapshot"

1557

else:

1559

else:

1558

dbg_type = b"delta"

1560

dbg_type = b"delta"

1559

1561

1560

if gather_debug:

1562

if gather_debug:

1561

end = util.timer()

1563

end = util.timer()

1562

if dbg_type == b'full':

1564

if dbg_type == b'full':

1563

used_cached = (

1565

used_cached = (

1564

cachedelta is not None

1566

cachedelta is not None

1565

and dbg_try_rounds == 0

1567

and dbg_try_rounds == 0

1566

and dbg_try_count == 0

1568

and dbg_try_count == 0

1567

and cachedelta[0] == nullrev

1569

and cachedelta[0] == nullrev

1568

)

1570

)

1569

else:

1571

else:

1570

used_cached = (

1572

used_cached = (

1571

cachedelta is not None

1573

cachedelta is not None

1572

and dbg_try_rounds == 1

1574

and dbg_try_rounds == 1

1573

and dbg_try_count == 1

1575

and dbg_try_count == 1

1574

and deltainfo.base == cachedelta[0]

1576

and deltainfo.base == cachedelta[0]

1575

)

1577

)

1576

dbg['duration'] = end - start

1578

dbg['duration'] = end - start

1577

dbg[

1579

dbg[

1578

'delta-base'

1580

'delta-base'

1579

] = deltainfo.base # pytype: disable=attribute-error

1581

] = deltainfo.base # pytype: disable=attribute-error

1580

dbg['search_round_count'] = dbg_try_rounds

1582

dbg['search_round_count'] = dbg_try_rounds

1581

dbg['using-cached-base'] = used_cached

1583

dbg['using-cached-base'] = used_cached

1582

dbg['delta_try_count'] = dbg_try_count

1584

dbg['delta_try_count'] = dbg_try_count

1583

dbg['type'] = dbg_type

1585

dbg['type'] = dbg_type

1584

if (

1586

if (

1585

deltainfo.snapshotdepth # pytype: disable=attribute-error

1587

deltainfo.snapshotdepth # pytype: disable=attribute-error

1586

is not None

1588

is not None

1587

):

1589

):

1588

dbg[

1590

dbg[

1589

'snapshot-depth'

1591

'snapshot-depth'

1590

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1592

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1591

else:

1593

else:

1592

dbg['snapshot-depth'] = 0

1594

dbg['snapshot-depth'] = 0

1593

self._dbg_process_data(dbg)

1595

self._dbg_process_data(dbg)

1594

return deltainfo

1596

return deltainfo

1595

1597

1596

def _one_dbg_data(self):

1598

def _one_dbg_data(self):

1597

return {

1599

return {

1598

'duration': None,

1600

'duration': None,

1599

'revision': None,

1601

'revision': None,

1600

'delta-base': None,

1602

'delta-base': None,

1601

'search_round_count': None,

1603

'search_round_count': None,

1602

'using-cached-base': None,

1604

'using-cached-base': None,

1603

'delta_try_count': None,

1605

'delta_try_count': None,

1604

'type': None,

1606

'type': None,

1605

'p1-chain-len': None,

1607

'p1-chain-len': None,

1606

'p2-chain-len': None,

1608

'p2-chain-len': None,

1607

'snapshot-depth': None,

1609

'snapshot-depth': None,

1608

'target-revlog': None,

1610

'target-revlog': None,

1609

}

1611

}

1610

1612

1611

def _dbg_process_data(self, dbg):

1613

def _dbg_process_data(self, dbg):

1612

if self._debug_info is not None:

1614

if self._debug_info is not None:

1613

self._debug_info.append(dbg)

1615

self._debug_info.append(dbg)

1614

1616

1615

if self._write_debug is not None:

1617

if self._write_debug is not None:

1616

msg = (

1618

msg = (

1617

b"DBG-DELTAS:"

1619

b"DBG-DELTAS:"

1618

b" %-12s"

1620

b" %-12s"

1619

b" rev=%d:"

1621

b" rev=%d:"

1620

b" delta-base=%d"

1622

b" delta-base=%d"

1621

b" is-cached=%d"

1623

b" is-cached=%d"

1622

b" - search-rounds=%d"

1624

b" - search-rounds=%d"

1623

b" try-count=%d"

1625

b" try-count=%d"

1624

b" - delta-type=%-6s"

1626

b" - delta-type=%-6s"

1625

b" snap-depth=%d"

1627

b" snap-depth=%d"

1626

b" - p1-chain-length=%d"

1628

b" - p1-chain-length=%d"

1627

b" p2-chain-length=%d"

1629

b" p2-chain-length=%d"

1628

b" - duration=%f"

1630

b" - duration=%f"

1629

b"\n"

1631

b"\n"

1630

)

1632

)

1631

msg %= (

1633

msg %= (

1632

dbg["target-revlog"],

1634

dbg["target-revlog"],

1633

dbg["revision"],

1635

dbg["revision"],

1634

dbg["delta-base"],

1636

dbg["delta-base"],

1635

dbg["using-cached-base"],

1637

dbg["using-cached-base"],

1636

dbg["search_round_count"],

1638

dbg["search_round_count"],

1637

dbg["delta_try_count"],

1639

dbg["delta_try_count"],

1638

dbg["type"],

1640

dbg["type"],

1639

dbg["snapshot-depth"],

1641

dbg["snapshot-depth"],

1640

dbg["p1-chain-len"],

1642

dbg["p1-chain-len"],

1641

dbg["p2-chain-len"],

1643

dbg["p2-chain-len"],

1642

dbg["duration"],

1644

dbg["duration"],

1643

)

1645

)

1644

self._write_debug(msg)

1646

self._write_debug(msg)

1645

1647

1646

1648

1647

def delta_compression(default_compression_header, deltainfo):

1649

def delta_compression(default_compression_header, deltainfo):

1648

"""return (COMPRESSION_MODE, deltainfo)

1650

"""return (COMPRESSION_MODE, deltainfo)

1649

1651

1650

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1652

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1651

compression.

1653

compression.

1652

"""

1654

"""

1653

h, d = deltainfo.data

1655

h, d = deltainfo.data

1654

compression_mode = COMP_MODE_INLINE

1656

compression_mode = COMP_MODE_INLINE

1655

if not h and not d:

1657

if not h and not d:

1656

# not data to store at all... declare them uncompressed

1658

# not data to store at all... declare them uncompressed

1657

compression_mode = COMP_MODE_PLAIN

1659

compression_mode = COMP_MODE_PLAIN

1658

elif not h:

1660

elif not h:

1659

t = d[0:1]

1661

t = d[0:1]

1660

if t == b'\0':

1662

if t == b'\0':

1661

compression_mode = COMP_MODE_PLAIN

1663

compression_mode = COMP_MODE_PLAIN

1662

elif t == default_compression_header:

1664

elif t == default_compression_header:

1663

compression_mode = COMP_MODE_DEFAULT

1665

compression_mode = COMP_MODE_DEFAULT

1664

elif h == b'u':

1666

elif h == b'u':

1665

# we have a more efficient way to declare uncompressed

1667

# we have a more efficient way to declare uncompressed

1666

h = b''

1668

h = b''

1667

compression_mode = COMP_MODE_PLAIN

1669

compression_mode = COMP_MODE_PLAIN

1668

deltainfo = drop_u_compression(deltainfo)

1670

deltainfo = drop_u_compression(deltainfo)

1669

return compression_mode, deltainfo

1671

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
                 than `revlog.data_config.sr_min_gap_size` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog.data_config.sr_min_gap_size`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs,
                     revlog.data_config.sr_density_threshold,
                     revlog.data_config.sr_min_gap_size,
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
                 # we should never end up asking such question. Adding the assert as a
                 # safe-guard to detect anything that would be fishy in this regard.
                 assert (
                     revinfo.cachedelta is None
                     or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
                     or not revlog.delta_config.general_delta
                 )
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog.delta_config.max_deltachain_span
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (
                     revlog.delta_config.max_chain_len
                     and revlog.delta_config.max_chain_len < deltainfo.chainlen
                 ):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             class _DeltaSearch:
                 """perform the search of a good delta for a single revlog revision
                 note: some of the deltacomputer.finddeltainfo logic should probably move
                 here.
                 """
                 def __init__(
                     self,
                     revlog,
                     textlen,
                     p1,
                     p2,
                     cachedelta,
                     excluded_bases=None,
                     target_rev=None,
                     snapshot_cache=None,
                 ):
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         cachedelta is None
                         or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not revlog.delta_config.general_delta
                     )
                     self.revlog = revlog
                     self.textlen = textlen
                     self.p1 = p1
                     self.p2 = p2
                     self.cachedelta = cachedelta
                     self.excluded_bases = excluded_bases
                     if target_rev is None:
                         self.target_rev = len(self.revlog)
                     self.target_rev = target_rev
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     self.snapshot_cache = snapshot_cache
+                    self.tested = {nullrev}
                 def candidate_groups(self):
                     """Provides group of revision to be tested as delta base
                     This top level function focus on emitting groups with unique and
                     worthwhile content. See _raw_candidate_groups for details about the
                     group order.
                     """
                     # should we try to build a delta?
                     if not (len(self.revlog) and self.revlog._storedeltachains):
                         yield None
                         return
                     if not self.revlog.delta_config.general_delta:
                         # before general delta, there is only one possible delta base
                         yield (self.target_rev - 1,)
                         yield None
                         return
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     sparse = self.revlog.delta_config.sparse_revlog
                     good = None
                     deltas_limit = self.textlen * LIMIT_DELTA2TEXT
                     group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
-                    tested = {nullrev}
+                    tested = self.tested  # prefetch for speed and code compactness
                     candidates = self._refined_groups()
                     while True:
                         temptative = candidates.send(good)
                         if temptative is None:
                             break
                         group = []
                         for rev in temptative:
                             # skip over empty delta (no need to include them in a chain)
                             while not (rev == nullrev or rev in tested or deltalength(rev)):
                                 tested.add(rev)
                                 rev = deltaparent(rev)
                             # no need to try a delta against nullrev, this will be done as
                             # a last resort.
                             if rev == nullrev:
                                 continue
                             # filter out revision we tested already
                             if rev in tested:
                                 continue
                             # an higher authority deamed the base unworthy (e.g. censored)
                             if (
                                 self.excluded_bases is not None
                                 and rev in self.excluded_bases
                             ):
                                 tested.add(rev)
                                 continue
                             # We are in some recomputation cases and that rev is too high
                             # in the revlog
                             if self.target_rev is not None and rev >= self.target_rev:
                                 tested.add(rev)
                                 continue
                             # filter out delta base that will never produce good delta
                             if deltas_limit < self.revlog.length(rev):
                                 tested.add(rev)
                                 continue
                             if sparse and self.revlog.rawsize(rev) < (
                                 self.textlen // LIMIT_BASE2TEXT
                             ):
                                 tested.add(rev)
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 tested.add(rev)
                                 continue
                             # If we reach here, we are about to build and test a delta.
                             # The delta building process will compute the chaininfo in all
                             # case, since that computation is cached, it is fine to access
                             # it here too.
                             chainlen, chainsize = self.revlog._chaininfo(rev)
                             # if chain will be too long, skip base
                             if (
                                 self.revlog.delta_config.max_chain_len
                                 and chainlen >= self.revlog.delta_config.max_chain_len
                             ):
                                 tested.add(rev)
                                 continue
                             # if chain already have too much data, skip base
                             if deltas_limit < chainsize:
                                 tested.add(rev)
                                 continue
                             if (
                                 sparse
                                 and self.revlog.delta_config.upper_bound_comp is not None
                             ):
                                 maxcomp = self.revlog.delta_config.upper_bound_comp
                                 basenotsnap = (self.p1, self.p2, nullrev)
                                 if rev not in basenotsnap and self.revlog.issnapshot(rev):
                                     snapshotdepth = self.revlog.snapshotdepth(rev)
                                     # If text is significantly larger than the base, we can
                                     # expect the resulting delta to be proportional to the size
                                     # difference
                                     revsize = self.revlog.rawsize(rev)
                                     rawsizedistance = max(self.textlen - revsize, 0)
                                     # use an estimate of the compression upper bound.
                                     lowestrealisticdeltalen = rawsizedistance // maxcomp
                                     # check the absolute constraint on the delta size
                                     snapshotlimit = self.textlen >> snapshotdepth
                                     if snapshotlimit < lowestrealisticdeltalen:
                                         # delta lower bound is larger than accepted upper
                                         # bound
                                         tested.add(rev)
                                         continue
                                     # check the relative constraint on the delta size
                                     revlength = self.revlog.length(rev)
                                     if revlength < lowestrealisticdeltalen:
                                         # delta probable lower bound is larger than target
                                         # base
                                         tested.add(rev)
                                         continue
                             group.append(rev)
                         if group:
                             # When the size of the candidate group is big, it can result in
                             # a quite significant performance impact. To reduce this, we
                             # can send them in smaller batches until the new batch does not
                             # provide any improvements.
                             #
                             # This might reduce the overall efficiency of the compression
                             # in some corner cases, but that should also prevent very
                             # pathological cases from being an issue. (eg. 20 000
                             # candidates).
                             #
                             # XXX note that the ordering of the group becomes important as
                             # it now impacts the final result. The current order is
                             # unprocessed and can be improved.
                             if group_chunk_size == 0:
                                 tested.update(group)
                                 good = yield tuple(group)
                             else:
                                 prev_good = good
                                 for start in range(0, len(group), group_chunk_size):
                                     sub_group = group[start : start + group_chunk_size]
                                     tested.update(sub_group)
                                     good = yield tuple(sub_group)
                                     if prev_good == good:
                                         break
                     yield None
                 def _refined_groups(self):
                     good = None
                     # First we try to reuse a the delta contained in the bundle.  (or from
                     # the source revlog)
                     #
                     # This logic only applies to general delta repositories and can be
                     # disabled through configuration. Disabling reuse source delta is
                     # useful when we want to make sure we recomputed "optimal" deltas.
                     debug_info = None
                     if (
                         self.cachedelta is not None
                         and self.cachedelta[2] > DELTA_BASE_REUSE_NO
                     ):
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         if debug_info is not None:
                             debug_info['cached-delta.tested'] += 1
                         good = yield (self.cachedelta[0],)
                         if good is not None:
                             if debug_info is not None:
                                 debug_info['cached-delta.accepted'] += 1
                             yield None
                             return
                     groups = self._raw_groups()
                     for candidates in groups:
                         good = yield candidates
                         if good is not None:
                             break
                     # If sparse revlog is enabled, we can try to refine the available
                     # deltas
                     if not self.revlog.delta_config.sparse_revlog:
                         yield None
                         return
                     # if we have a refinable value, try to refine it
                     if (
                         good is not None
                         and good not in (self.p1, self.p2)
                         and self.revlog.issnapshot(good)
                     ):
                         # refine snapshot down
                         previous = None
                         while previous != good:
                             previous = good
                             base = self.revlog.deltaparent(good)
                             if base == nullrev:
                                 break
                             good = yield (base,)
                         # refine snapshot up
                         if not self.snapshot_cache.snapshots:
                             self.snapshot_cache.update(self.revlog, good + 1)
                         previous = None
                         while good != previous:
                             previous = good
                             children = tuple(
                                 sorted(c for c in self.snapshot_cache.snapshots[good])
                             )
                             good = yield children
                     if debug_info is not None:
                         if good is None:
                             debug_info['no-solution'] += 1
                     yield None
                 def _raw_groups(self):
                     """Provides group of revision to be tested as delta base
                     This lower level function focus on emitting delta theorically
                     interresting without looking it any practical details.
                     The group order aims at providing fast or small candidates first.
                     """
                     # Why search for delta base if we cannot use a delta base ?
                     assert self.revlog.delta_config.general_delta
                     # also see issue6056
                     sparse = self.revlog.delta_config.sparse_revlog
                     curr = len(self.revlog)
                     prev = curr - 1
                     deltachain = lambda rev: self.revlog._deltachain(rev)[0]
                     # exclude already lazy tested base if any
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     if (
                         not self.revlog.delta_config.delta_both_parents
                         and len(parents) == 2
                     ):
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                     if sparse and parents:
                         # See if we can use an existing snapshot in the parent chains to
                         # use as a base for a new intermediate-snapshot
                         #
                         # search for snapshot in parents delta chain map: snapshot-level:
                         # snapshot-rev
                         parents_snaps = collections.defaultdict(set)
                         candidate_chains = [deltachain(p) for p in parents]
                         for chain in candidate_chains:
                             for idx, s in enumerate(chain):
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[idx].add(s)
                         snapfloor = min(parents_snaps[0]) + 1
                         self.snapshot_cache.update(self.revlog, snapfloor)
                         # search for the highest "unrelated" revision
                         #
                         # Adding snapshots used by "unrelated" revision increase the odd we
                         # reuse an independant, yet better snapshot chain.
                         #
                         # XXX instead of building a set of revisions, we could lazily
                         # enumerate over the chains. That would be more efficient, however
                         # we stick to simple code for now.
                         all_revs = set()
                         for chain in candidate_chains:
                             all_revs.update(chain)
                         other = None
                         for r in self.revlog.revs(prev, snapfloor):
                             if r not in all_revs:
                                 other = r
                                 break
                         if other is not None:
                             # To avoid unfair competition, we won't use unrelated
                             # intermediate snapshot that are deeper than the ones from the
                             # parent delta chain.
                             max_depth = max(parents_snaps.keys())
                             chain = deltachain(other)
                             for depth, s in enumerate(chain):
                                 if s < snapfloor:
                                     continue
                                 if max_depth < depth:
                                     break
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[depth].add(s)
                         # Test them as possible intermediate snapshot base We test them
                         # from highest to lowest level. High level one are more likely to
                         # result in small delta
                         floor = None
                         for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                             siblings = set()
                             for s in snaps:
                                 siblings.update(self.snapshot_cache.snapshots[s])
                             # Before considering making a new intermediate snapshot, we
                             # check if an existing snapshot, children of base we consider,
                             # would be suitable.
                             #
                             # It give a change to reuse a delta chain "unrelated" to the
                             # current revision instead of starting our own. Without such
                             # re-use, topological branches would keep reopening new chains.
                             # Creating more and more snapshot as the repository grow.
                             if floor is not None:
                                 # We only do this for siblings created after the one in our
                                 # parent's delta chain. Those created before has less
                                 # chances to be valid base since our ancestors had to
                                 # create a new snapshot.
                                 siblings = [r for r in siblings if floor < r]
                             yield tuple(sorted(siblings))
                             # then test the base from our parent's delta chain.
                             yield tuple(sorted(snaps))
                             floor = min(snaps)
                         # No suitable base found in the parent chain, search if any full
                         # snapshots emitted since parent's base would be a suitable base
                         # for an intermediate snapshot.
                         #
                         # It give a chance to reuse a delta chain unrelated to the current
                         # revisions instead of starting our own. Without such re-use,
                         # topological branches would keep reopening new full chains.
                         # Creating more and more snapshot as the repository grow.
                         full = [
                             r
                             for r in self.snapshot_cache.snapshots[nullrev]
                             if snapfloor <= r
                         ]
                         yield tuple(sorted(full))
                     if not sparse:
                         # other approach failed try against prev to hopefully save us a
                         # fulltext.
                         yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 """object capable of computing delta and finding delta for multiple revision
                 This object is meant to compute and find multiple delta applied to the same
                 revlog.
                 """
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog.delta_config.sparse_revlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog.delta_config.lazy_delta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
                         lowestrealisticdeltalen = (
                             len(delta) // revlog.delta_config.upper_bound_comp
                         )
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog._inner.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog._inner.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = 0
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     search = _DeltaSearch(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     groups = search.candidate_groups()
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     return {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo