upstream/mercurial-mirror Commit - r51948:fa7d307e

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

18

from .constants import (

18

from .constants import (

19

COMP_MODE_DEFAULT,

19

COMP_MODE_DEFAULT,

20

COMP_MODE_INLINE,

20

COMP_MODE_INLINE,

21

COMP_MODE_PLAIN,

21

COMP_MODE_PLAIN,

22

DELTA_BASE_REUSE_FORCE,

22

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

from .. import revlog

50

from .. import revlog

51

52

self._data = data

52

self._data = data

53

self._srdensitythreshold = density

53

self._srdensitythreshold = density

54

self._srmingapsize = mingap

54

self._srmingapsize = mingap

55

self.data_config = revlog.DataConfig()

55

self.data_config = revlog.DataConfig()

56

self.data_config.sr_density_threshold = density

56

self.data_config.sr_density_threshold = density

57

self.data_config.sr_min_gap_size = mingap

57

self.data_config.sr_min_gap_size = mingap

58

self.delta_config = revlog.DeltaConfig()

58

self.delta_config = revlog.DeltaConfig()

59

self.feature_config = revlog.FeatureConfig()

59

self.feature_config = revlog.FeatureConfig()

60

self._snapshot = set(snapshot)

60

self._snapshot = set(snapshot)

61

self.index = None

61

self.index = None

62

63

def start(self, rev):

63

def start(self, rev):

64

if rev == nullrev:

64

if rev == nullrev:

65

return 0

65

return 0

66

if rev == 0:

66

if rev == 0:

67

return 0

67

return 0

68

return self._data[rev - 1]

68

return self._data[rev - 1]

69

70

def end(self, rev):

70

def end(self, rev):

71

if rev == nullrev:

71

if rev == nullrev:

72

return 0

72

return 0

73

return self._data[rev]

73

return self._data[rev]

74

75

def length(self, rev):

75

def length(self, rev):

76

return self.end(rev) - self.start(rev)

76

return self.end(rev) - self.start(rev)

77

78

def __len__(self):

78

def __len__(self):

79

return len(self._data)

79

return len(self._data)

80

81

def issnapshot(self, rev):

81

def issnapshot(self, rev):

82

if rev == nullrev:

82

if rev == nullrev:

83

return True

83

return True

84

return rev in self._snapshot

84

return rev in self._snapshot

85

86

87

def slicechunk(revlog, revs, targetsize=None):

87

def slicechunk(revlog, revs, targetsize=None):

88

"""slice revs to reduce the amount of unrelated data to be read from disk.

88

"""slice revs to reduce the amount of unrelated data to be read from disk.

89

90

``revs`` is sliced into groups that should be read in one time.

90

``revs`` is sliced into groups that should be read in one time.

91

Assume that revs are sorted.

91

Assume that revs are sorted.

92

93

The initial chunk is sliced until the overall density (payload/chunks-span

93

The initial chunk is sliced until the overall density (payload/chunks-span

94

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

94

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

95

`revlog._srmingapsize` is skipped.

95

`revlog._srmingapsize` is skipped.

96

97

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

97

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

98

For consistency with other slicing choice, this limit won't go lower than

98

For consistency with other slicing choice, this limit won't go lower than

99

`revlog._srmingapsize`.

99

`revlog._srmingapsize`.

100

101

If individual revisions chunk are larger than this limit, they will still

101

If individual revisions chunk are larger than this limit, they will still

102

be raised individually.

102

be raised individually.

103

104

>>> data = [

104

>>> data = [

105

... 5, #00 (5)

105

... 5, #00 (5)

106

... 10, #01 (5)

106

... 10, #01 (5)

107

... 12, #02 (2)

107

... 12, #02 (2)

108

... 12, #03 (empty)

108

... 12, #03 (empty)

109

... 27, #04 (15)

109

... 27, #04 (15)

110

... 31, #05 (4)

110

... 31, #05 (4)

111

... 31, #06 (empty)

111

... 31, #06 (empty)

112

... 42, #07 (11)

112

... 42, #07 (11)

113

... 47, #08 (5)

113

... 47, #08 (5)

114

... 47, #09 (empty)

114

... 47, #09 (empty)

115

... 48, #10 (1)

115

... 48, #10 (1)

116

... 51, #11 (3)

116

... 51, #11 (3)

117

... 74, #12 (23)

117

... 74, #12 (23)

118

... 85, #13 (11)

118

... 85, #13 (11)

119

... 86, #14 (1)

119

... 86, #14 (1)

120

... 91, #15 (5)

120

... 91, #15 (5)

121

... ]

121

... ]

122

>>> revlog = _testrevlog(data, snapshot=range(16))

122

>>> revlog = _testrevlog(data, snapshot=range(16))

123

124

>>> list(slicechunk(revlog, list(range(16))))

124

>>> list(slicechunk(revlog, list(range(16))))

125

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

125

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

126

>>> list(slicechunk(revlog, [0, 15]))

126

>>> list(slicechunk(revlog, [0, 15]))

127

[[0], [15]]

127

[[0], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 15]))

128

>>> list(slicechunk(revlog, [0, 11, 15]))

129

[[0], [11], [15]]

129

[[0], [11], [15]]

130

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

131

[[0], [11, 13, 15]]

131

[[0], [11, 13, 15]]

132

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

132

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

133

[[1, 2], [5, 8, 10, 11], [14]]

133

[[1, 2], [5, 8, 10, 11], [14]]

134

135

Slicing with a maximum chunk size

135

Slicing with a maximum chunk size

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

137

[[0], [11], [13], [15]]

137

[[0], [11], [13], [15]]

138

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

138

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

139

[[0], [11], [13, 15]]

139

[[0], [11], [13, 15]]

140

141

Slicing involving nullrev

141

Slicing involving nullrev

142

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

142

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

143

[[-1, 0], [11], [13, 15]]

143

[[-1, 0], [11], [13, 15]]

144

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

144

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

145

[[-1], [13], [15]]

145

[[-1], [13], [15]]

146

"""

146

"""

147

if targetsize is not None:

147

if targetsize is not None:

148

targetsize = max(targetsize, revlog._srmingapsize)

148

targetsize = max(targetsize, revlog._srmingapsize)

149

# targetsize should not be specified when evaluating delta candidates:

149

# targetsize should not be specified when evaluating delta candidates:

150

# * targetsize is used to ensure we stay within specification when reading,

150

# * targetsize is used to ensure we stay within specification when reading,

151

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

151

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

152

if densityslicing is None:

152

if densityslicing is None:

153

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

153

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

154

for chunk in densityslicing(

154

for chunk in densityslicing(

155

revs, revlog._srdensitythreshold, revlog._srmingapsize

155

revs, revlog._srdensitythreshold, revlog._srmingapsize

156

):

156

):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

yield subchunk

158

yield subchunk

159

160

161

def _slicechunktosize(revlog, revs, targetsize=None):

161

def _slicechunktosize(revlog, revs, targetsize=None):

162

"""slice revs to match the target size

162

"""slice revs to match the target size

163

164

This is intended to be used on chunk that density slicing selected by that

164

This is intended to be used on chunk that density slicing selected by that

165

are still too large compared to the read garantee of revlog. This might

165

are still too large compared to the read garantee of revlog. This might

166

happens when "minimal gap size" interrupted the slicing or when chain are

166

happens when "minimal gap size" interrupted the slicing or when chain are

167

built in a way that create large blocks next to each other.

167

built in a way that create large blocks next to each other.

168

169

>>> data = [

169

>>> data = [

170

... 3, #0 (3)

170

... 3, #0 (3)

171

... 5, #1 (2)

171

... 5, #1 (2)

172

... 6, #2 (1)

172

... 6, #2 (1)

173

... 8, #3 (2)

173

... 8, #3 (2)

174

... 8, #4 (empty)

174

... 8, #4 (empty)

175

... 11, #5 (3)

175

... 11, #5 (3)

176

... 12, #6 (1)

176

... 12, #6 (1)

177

... 13, #7 (1)

177

... 13, #7 (1)

178

... 14, #8 (1)

178

... 14, #8 (1)

179

... ]

179

... ]

180

181

== All snapshots cases ==

181

== All snapshots cases ==

182

>>> revlog = _testrevlog(data, snapshot=range(9))

182

>>> revlog = _testrevlog(data, snapshot=range(9))

183

184

Cases where chunk is already small enough

184

Cases where chunk is already small enough

185

>>> list(_slicechunktosize(revlog, [0], 3))

185

>>> list(_slicechunktosize(revlog, [0], 3))

186

[[0]]

186

[[0]]

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

[[6, 7]]

188

[[6, 7]]

189

>>> list(_slicechunktosize(revlog, [0], None))

189

>>> list(_slicechunktosize(revlog, [0], None))

190

[[0]]

190

[[0]]

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

[[6, 7]]

192

[[6, 7]]

193

194

cases where we need actual slicing

194

cases where we need actual slicing

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

[[0], [1]]

196

[[0], [1]]

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

[[1], [3]]

198

[[1], [3]]

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

[[1, 2], [3]]

200

[[1, 2], [3]]

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

[[3], [5]]

202

[[3], [5]]

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

[[3], [5]]

204

[[3], [5]]

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

[[5], [6, 7, 8]]

206

[[5], [6, 7, 8]]

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

210

Case with too large individual chunk (must return valid chunk)

210

Case with too large individual chunk (must return valid chunk)

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

[[0], [1]]

212

[[0], [1]]

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

[[1], [3]]

214

[[1], [3]]

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

[[3], [5]]

216

[[3], [5]]

217

218

== No Snapshot cases ==

218

== No Snapshot cases ==

219

>>> revlog = _testrevlog(data)

219

>>> revlog = _testrevlog(data)

220

221

Cases where chunk is already small enough

221

Cases where chunk is already small enough

222

>>> list(_slicechunktosize(revlog, [0], 3))

222

>>> list(_slicechunktosize(revlog, [0], 3))

223

[[0]]

223

[[0]]

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

[[6, 7]]

225

[[6, 7]]

226

>>> list(_slicechunktosize(revlog, [0], None))

226

>>> list(_slicechunktosize(revlog, [0], None))

227

[[0]]

227

[[0]]

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

[[6, 7]]

229

[[6, 7]]

230

231

cases where we need actual slicing

231

cases where we need actual slicing

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

[[0], [1]]

233

[[0], [1]]

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

[[1], [3]]

235

[[1], [3]]

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

[[1], [2, 3]]

237

[[1], [2, 3]]

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

[[3], [5]]

239

[[3], [5]]

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

[[3], [4, 5]]

241

[[3], [4, 5]]

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

[[5], [6, 7, 8]]

243

[[5], [6, 7, 8]]

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

247

Case with too large individual chunk (must return valid chunk)

247

Case with too large individual chunk (must return valid chunk)

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

[[0], [1]]

249

[[0], [1]]

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

[[1], [3]]

251

[[1], [3]]

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

[[3], [5]]

253

[[3], [5]]

254

255

== mixed case ==

255

== mixed case ==

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

"""

259

"""

260

assert targetsize is None or 0 <= targetsize

260

assert targetsize is None or 0 <= targetsize

261

startdata = revlog.start(revs[0])

261

startdata = revlog.start(revs[0])

262

enddata = revlog.end(revs[-1])

262

enddata = revlog.end(revs[-1])

263

fullspan = enddata - startdata

263

fullspan = enddata - startdata

264

if targetsize is None or fullspan <= targetsize:

264

if targetsize is None or fullspan <= targetsize:

265

yield revs

265

yield revs

266

return

266

return

267

268

startrevidx = 0

268

startrevidx = 0

269

endrevidx = 1

269

endrevidx = 1

270

iterrevs = enumerate(revs)

270

iterrevs = enumerate(revs)

271

next(iterrevs) # skip first rev.

271

next(iterrevs) # skip first rev.

272

# first step: get snapshots out of the way

272

# first step: get snapshots out of the way

273

for idx, r in iterrevs:

273

for idx, r in iterrevs:

274

span = revlog.end(r) - startdata

274

span = revlog.end(r) - startdata

275

snapshot = revlog.issnapshot(r)

275

snapshot = revlog.issnapshot(r)

276

if span <= targetsize and snapshot:

276

if span <= targetsize and snapshot:

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

else:

278

else:

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

if chunk:

280

if chunk:

281

yield chunk

281

yield chunk

282

startrevidx = idx

282

startrevidx = idx

283

startdata = revlog.start(r)

283

startdata = revlog.start(r)

284

endrevidx = idx + 1

284

endrevidx = idx + 1

285

if not snapshot:

285

if not snapshot:

286

break

286

break

287

288

# for the others, we use binary slicing to quickly converge toward valid

288

# for the others, we use binary slicing to quickly converge toward valid

289

# chunks (otherwise, we might end up looking for start/end of many

289

# chunks (otherwise, we might end up looking for start/end of many

290

# revisions). This logic is not looking for the perfect slicing point, it

290

# revisions). This logic is not looking for the perfect slicing point, it

291

# focuses on quickly converging toward valid chunks.

291

# focuses on quickly converging toward valid chunks.

292

nbitem = len(revs)

292

nbitem = len(revs)

293

while (enddata - startdata) > targetsize:

293

while (enddata - startdata) > targetsize:

294

endrevidx = nbitem

294

endrevidx = nbitem

295

if nbitem - startrevidx <= 1:

295

if nbitem - startrevidx <= 1:

296

break # protect against individual chunk larger than limit

296

break # protect against individual chunk larger than limit

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

while span > targetsize:

299

while span > targetsize:

300

if endrevidx - startrevidx <= 1:

300

if endrevidx - startrevidx <= 1:

301

break # protect against individual chunk larger than limit

301

break # protect against individual chunk larger than limit

302

endrevidx -= (endrevidx - startrevidx) // 2

302

endrevidx -= (endrevidx - startrevidx) // 2

303

localenddata = revlog.end(revs[endrevidx - 1])

303

localenddata = revlog.end(revs[endrevidx - 1])

304

span = localenddata - startdata

304

span = localenddata - startdata

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

startrevidx = endrevidx

308

startrevidx = endrevidx

309

startdata = revlog.start(revs[startrevidx])

309

startdata = revlog.start(revs[startrevidx])

310

311

chunk = _trimchunk(revlog, revs, startrevidx)

311

chunk = _trimchunk(revlog, revs, startrevidx)

312

if chunk:

312

if chunk:

313

yield chunk

313

yield chunk

314

315

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

319

``revs`` is sliced into groups that should be read in one time.

319

``revs`` is sliced into groups that should be read in one time.

320

Assume that revs are sorted.

320

Assume that revs are sorted.

321

322

The initial chunk is sliced until the overall density (payload/chunks-span

322

The initial chunk is sliced until the overall density (payload/chunks-span

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

skipped.

324

skipped.

325

326

>>> revlog = _testrevlog([

326

>>> revlog = _testrevlog([

327

... 5, #00 (5)

327

... 5, #00 (5)

328

... 10, #01 (5)

328

... 10, #01 (5)

329

... 12, #02 (2)

329

... 12, #02 (2)

330

... 12, #03 (empty)

330

... 12, #03 (empty)

331

... 27, #04 (15)

331

... 27, #04 (15)

332

... 31, #05 (4)

332

... 31, #05 (4)

333

... 31, #06 (empty)

333

... 31, #06 (empty)

334

... 42, #07 (11)

334

... 42, #07 (11)

335

... 47, #08 (5)

335

... 47, #08 (5)

336

... 47, #09 (empty)

336

... 47, #09 (empty)

337

... 48, #10 (1)

337

... 48, #10 (1)

338

... 51, #11 (3)

338

... 51, #11 (3)

339

... 74, #12 (23)

339

... 74, #12 (23)

340

... 85, #13 (11)

340

... 85, #13 (11)

341

... 86, #14 (1)

341

... 86, #14 (1)

342

... 91, #15 (5)

342

... 91, #15 (5)

343

... ])

343

... ])

344

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

[[0], [15]]

348

[[0], [15]]

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

[[0], [11], [15]]

350

[[0], [11], [15]]

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

[[0], [11, 13, 15]]

352

[[0], [11, 13, 15]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

[[1, 2], [5, 8, 10, 11], [14]]

354

[[1, 2], [5, 8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... mingapsize=20))

356

... mingapsize=20))

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

... targetdensity=0.95))

359

... targetdensity=0.95))

360

[[1, 2], [5], [8, 10, 11], [14]]

360

[[1, 2], [5], [8, 10, 11], [14]]

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

... targetdensity=0.95, mingapsize=12))

362

... targetdensity=0.95, mingapsize=12))

363

[[1, 2], [5, 8, 10, 11], [14]]

363

[[1, 2], [5, 8, 10, 11], [14]]

364

"""

364

"""

365

start = revlog.start

365

start = revlog.start

366

length = revlog.length

366

length = revlog.length

367

368

if len(revs) <= 1:

368

if len(revs) <= 1:

369

yield revs

369

yield revs

370

return

370

return

371

372

deltachainspan = segmentspan(revlog, revs)

372

deltachainspan = segmentspan(revlog, revs)

373

374

if deltachainspan < mingapsize:

374

if deltachainspan < mingapsize:

375

yield revs

375

yield revs

376

return

376

return

377

378

readdata = deltachainspan

378

readdata = deltachainspan

379

chainpayload = sum(length(r) for r in revs)

379

chainpayload = sum(length(r) for r in revs)

380

381

if deltachainspan:

381

if deltachainspan:

382

density = chainpayload / float(deltachainspan)

382

density = chainpayload / float(deltachainspan)

383

else:

383

else:

384

density = 1.0

384

density = 1.0

385

386

if density >= targetdensity:

386

if density >= targetdensity:

387

yield revs

387

yield revs

388

return

388

return

389

390

# Store the gaps in a heap to have them sorted by decreasing size

390

# Store the gaps in a heap to have them sorted by decreasing size

391

gaps = []

391

gaps = []

392

prevend = None

392

prevend = None

393

for i, rev in enumerate(revs):

393

for i, rev in enumerate(revs):

394

revstart = start(rev)

394

revstart = start(rev)

395

revlen = length(rev)

395

revlen = length(rev)

396

397

# Skip empty revisions to form larger holes

397

# Skip empty revisions to form larger holes

398

if revlen == 0:

398

if revlen == 0:

399

continue

399

continue

400

401

if prevend is not None:

401

if prevend is not None:

402

gapsize = revstart - prevend

402

gapsize = revstart - prevend

403

# only consider holes that are large enough

403

# only consider holes that are large enough

404

if gapsize > mingapsize:

404

if gapsize > mingapsize:

405

gaps.append((gapsize, i))

405

gaps.append((gapsize, i))

406

407

prevend = revstart + revlen

407

prevend = revstart + revlen

408

# sort the gaps to pop them from largest to small

408

# sort the gaps to pop them from largest to small

409

gaps.sort()

409

gaps.sort()

410

411

# Collect the indices of the largest holes until the density is acceptable

411

# Collect the indices of the largest holes until the density is acceptable

412

selected = []

412

selected = []

413

while gaps and density < targetdensity:

413

while gaps and density < targetdensity:

414

gapsize, gapidx = gaps.pop()

414

gapsize, gapidx = gaps.pop()

415

416

selected.append(gapidx)

416

selected.append(gapidx)

417

418

# the gap sizes are stored as negatives to be sorted decreasingly

418

# the gap sizes are stored as negatives to be sorted decreasingly

419

# by the heap

419

# by the heap

420

readdata -= gapsize

420

readdata -= gapsize

421

if readdata > 0:

421

if readdata > 0:

422

density = chainpayload / float(readdata)

422

density = chainpayload / float(readdata)

423

else:

423

else:

424

density = 1.0

424

density = 1.0

425

selected.sort()

425

selected.sort()

426

427

# Cut the revs at collected indices

427

# Cut the revs at collected indices

428

previdx = 0

428

previdx = 0

429

for idx in selected:

429

for idx in selected:

430

431

chunk = _trimchunk(revlog, revs, previdx, idx)

431

chunk = _trimchunk(revlog, revs, previdx, idx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

previdx = idx

435

previdx = idx

436

437

chunk = _trimchunk(revlog, revs, previdx)

437

chunk = _trimchunk(revlog, revs, previdx)

438

if chunk:

438

if chunk:

439

yield chunk

439

yield chunk

440

441

442

def _trimchunk(revlog, revs, startidx, endidx=None):

442

def _trimchunk(revlog, revs, startidx, endidx=None):

443

"""returns revs[startidx:endidx] without empty trailing revs

443

"""returns revs[startidx:endidx] without empty trailing revs

444

445

Doctest Setup

445

Doctest Setup

446

>>> revlog = _testrevlog([

446

>>> revlog = _testrevlog([

447

... 5, #0

447

... 5, #0

448

... 10, #1

448

... 10, #1

449

... 12, #2

449

... 12, #2

450

... 12, #3 (empty)

450

... 12, #3 (empty)

451

... 17, #4

451

... 17, #4

452

... 21, #5

452

... 21, #5

453

... 21, #6 (empty)

453

... 21, #6 (empty)

454

... ])

454

... ])

455

456

Contiguous cases:

456

Contiguous cases:

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

[0, 1, 2, 3, 4, 5]

458

[0, 1, 2, 3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

[0, 1, 2, 3, 4]

460

[0, 1, 2, 3, 4]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

[0, 1, 2]

462

[0, 1, 2]

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

[2]

464

[2]

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

[3, 4, 5]

466

[3, 4, 5]

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

[3, 4]

468

[3, 4]

469

470

Discontiguous cases:

470

Discontiguous cases:

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

[1, 3, 5]

472

[1, 3, 5]

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

[1]

474

[1]

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

[3, 5]

476

[3, 5]

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

[3, 5]

478

[3, 5]

479

"""

479

"""

480

length = revlog.length

480

length = revlog.length

481

482

if endidx is None:

482

if endidx is None:

483

endidx = len(revs)

483

endidx = len(revs)

484

485

# If we have a non-emtpy delta candidate, there are nothing to trim

485

# If we have a non-emtpy delta candidate, there are nothing to trim

486

if revs[endidx - 1] < len(revlog):

486

if revs[endidx - 1] < len(revlog):

487

# Trim empty revs at the end, except the very first revision of a chain

487

# Trim empty revs at the end, except the very first revision of a chain

488

while (

488

while (

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

):

490

):

491

endidx -= 1

491

endidx -= 1

492

493

return revs[startidx:endidx]

493

return revs[startidx:endidx]

494

495

496

def segmentspan(revlog, revs):

496

def segmentspan(revlog, revs):

497

"""Get the byte span of a segment of revisions

497

"""Get the byte span of a segment of revisions

498

499

revs is a sorted array of revision numbers

499

revs is a sorted array of revision numbers

500

501

>>> revlog = _testrevlog([

501

>>> revlog = _testrevlog([

502

... 5, #0

502

... 5, #0

503

... 10, #1

503

... 10, #1

504

... 12, #2

504

... 12, #2

505

... 12, #3 (empty)

505

... 12, #3 (empty)

506

... 17, #4

506

... 17, #4

507

... ])

507

... ])

508

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

17

510

17

511

>>> segmentspan(revlog, [0, 4])

511

>>> segmentspan(revlog, [0, 4])

512

17

512

17

513

>>> segmentspan(revlog, [3, 4])

513

>>> segmentspan(revlog, [3, 4])

514

5

514

5

515

>>> segmentspan(revlog, [1, 2, 3,])

515

>>> segmentspan(revlog, [1, 2, 3,])

516

7

516

7

517

>>> segmentspan(revlog, [1, 3])

517

>>> segmentspan(revlog, [1, 3])

518

7

518

7

519

"""

519

"""

520

if not revs:

520

if not revs:

521

return 0

521

return 0

522

end = revlog.end(revs[-1])

522

end = revlog.end(revs[-1])

523

return end - revlog.start(revs[0])

523

return end - revlog.start(revs[0])

524

525

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

"""build full text from a (base, delta) pair and other metadata"""

527

"""build full text from a (base, delta) pair and other metadata"""

528

# special case deltas which replace entire base; no need to decode

528

# special case deltas which replace entire base; no need to decode

529

# base revision. this neatly avoids censored bases, which throw when

529

# base revision. this neatly avoids censored bases, which throw when

530

# they're decoded.

530

# they're decoded.

531

hlen = struct.calcsize(b">lll")

531

hlen = struct.calcsize(b">lll")

532

if delta[:hlen] == mdiff.replacediffheader(

532

if delta[:hlen] == mdiff.replacediffheader(

533

revlog.rawsize(baserev), len(delta) - hlen

533

revlog.rawsize(baserev), len(delta) - hlen

534

):

534

):

535

fulltext = delta[hlen:]

535

fulltext = delta[hlen:]

536

else:

536

else:

537

# deltabase is rawtext before changed by flag processors, which is

537

# deltabase is rawtext before changed by flag processors, which is

538

# equivalent to non-raw text

538

# equivalent to non-raw text

539

basetext = revlog.revision(baserev)

539

basetext = revlog.revision(baserev)

540

fulltext = mdiff.patch(basetext, delta)

540

fulltext = mdiff.patch(basetext, delta)

541

542

try:

542

try:

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

if validatehash:

544

if validatehash:

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

if flags & REVIDX_ISCENSORED:

546

if flags & REVIDX_ISCENSORED:

547

raise error.StorageError(

547

raise error.StorageError(

548

_(b'node %s is not censored') % expectednode

548

_(b'node %s is not censored') % expectednode

549

)

549

)

550

except error.CensoredNodeError:

550

except error.CensoredNodeError:

551

# must pass the censored index flag to add censored revisions

551

# must pass the censored index flag to add censored revisions

552

if not flags & REVIDX_ISCENSORED:

552

if not flags & REVIDX_ISCENSORED:

553

raise

553

raise

554

return fulltext

554

return fulltext

555

556

557

@attr.s(slots=True, frozen=True)

557

@attr.s(slots=True, frozen=True)

558

class _deltainfo:

558

class _deltainfo:

559

distance = attr.ib()

559

distance = attr.ib()

560

deltalen = attr.ib()

560

deltalen = attr.ib()

561

data = attr.ib()

561

data = attr.ib()

562

base = attr.ib()

562

base = attr.ib()

563

chainbase = attr.ib()

563

chainbase = attr.ib()

564

chainlen = attr.ib()

564

chainlen = attr.ib()

565

compresseddeltalen = attr.ib()

565

compresseddeltalen = attr.ib()

566

snapshotdepth = attr.ib()

566

snapshotdepth = attr.ib()

567

568

569

def drop_u_compression(delta):

569

def drop_u_compression(delta):

570

"""turn into a "u" (no-compression) into no-compression without header

570

"""turn into a "u" (no-compression) into no-compression without header

571

572

This is useful for revlog format that has better compression method.

572

This is useful for revlog format that has better compression method.

573

"""

573

"""

574

assert delta.data[0] == b'u', delta.data[0]

574

assert delta.data[0] == b'u', delta.data[0]

575

return _deltainfo(

575

return _deltainfo(

576

delta.distance,

576

delta.distance,

577

delta.deltalen - 1,

577

delta.deltalen - 1,

578

(b'', delta.data[1]),

578

(b'', delta.data[1]),

579

delta.base,

579

delta.base,

580

delta.chainbase,

580

delta.chainbase,

581

delta.chainlen,

581

delta.chainlen,

582

delta.compresseddeltalen,

582

delta.compresseddeltalen,

583

delta.snapshotdepth,

583

delta.snapshotdepth,

584

)

584

)

585

586

587

def is_good_delta_info(revlog, deltainfo, revinfo):

587

def is_good_delta_info(revlog, deltainfo, revinfo):

588

"""Returns True if the given delta is good. Good means that it is within

588

"""Returns True if the given delta is good. Good means that it is within

589

the disk span, disk size, and chain length bounds that we know to be

589

the disk span, disk size, and chain length bounds that we know to be

590

performant."""

590

performant."""

591

if deltainfo is None:

591

if deltainfo is None:

592

return False

592

return False

593

594

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

594

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

595

# we should never end up asking such question. Adding the assert as a

595

# we should never end up asking such question. Adding the assert as a

596

# safe-guard to detect anything that would be fishy in this regard.

596

# safe-guard to detect anything that would be fishy in this regard.

597

assert (

597

assert (

598

revinfo.cachedelta is None

598

revinfo.cachedelta is None

599

or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

599

or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

600

or not revlog.delta_config.general_delta

600

or not revlog.delta_config.general_delta

601

)

601

)

602

603

# - 'deltainfo.distance' is the distance from the base revision --

603

# - 'deltainfo.distance' is the distance from the base revision --

604

# bounding it limits the amount of I/O we need to do.

604

# bounding it limits the amount of I/O we need to do.

605

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

605

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

606

# deltas we need to apply -- bounding it limits the amount of CPU

606

# deltas we need to apply -- bounding it limits the amount of CPU

607

# we consume.

607

# we consume.

608

609

textlen = revinfo.textlen

609

textlen = revinfo.textlen

610

defaultmax = textlen * 4

610

defaultmax = textlen * 4

611

maxdist = revlog._maxdeltachainspan

611

maxdist = revlog._maxdeltachainspan

612

if not maxdist:

612

if not maxdist:

613

maxdist = deltainfo.distance # ensure the conditional pass

613

maxdist = deltainfo.distance # ensure the conditional pass

614

maxdist = max(maxdist, defaultmax)

614

maxdist = max(maxdist, defaultmax)

615

616

# Bad delta from read span:

616

# Bad delta from read span:

617

#

617

#

618

# If the span of data read is larger than the maximum allowed.

618

# If the span of data read is larger than the maximum allowed.

619

#

619

#

620

# In the sparse-revlog case, we rely on the associated "sparse reading"

620

# In the sparse-revlog case, we rely on the associated "sparse reading"

621

# to avoid issue related to the span of data. In theory, it would be

621

# to avoid issue related to the span of data. In theory, it would be

622

# possible to build pathological revlog where delta pattern would lead

622

# possible to build pathological revlog where delta pattern would lead

623

# to too many reads. However, they do not happen in practice at all. So

623

# to too many reads. However, they do not happen in practice at all. So

624

# we skip the span check entirely.

624

# we skip the span check entirely.

625

if not revlog._sparserevlog and maxdist < deltainfo.distance:

625

if not revlog._sparserevlog and maxdist < deltainfo.distance:

626

return False

626

return False

627

628

# Bad delta from new delta size:

628

# Bad delta from new delta size:

629

#

629

#

630

# If the delta size is larger than the target text, storing the

630

# If the delta size is larger than the target text, storing the

631

# delta will be inefficient.

631

# delta will be inefficient.

632

if textlen < deltainfo.deltalen:

632

if textlen < deltainfo.deltalen:

633

return False

633

return False

634

635

# Bad delta from cumulated payload size:

635

# Bad delta from cumulated payload size:

636

#

636

#

637

# If the sum of delta get larger than K * target text length.

637

# If the sum of delta get larger than K * target text length.

638

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

638

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

639

return False

639

return False

640

641

# Bad delta from chain length:

641

# Bad delta from chain length:

642

#

642

#

643

# If the number of delta in the chain gets too high.

643

# If the number of delta in the chain gets too high.

644

if (

644

if (

645

revlog.delta_config.max_chain_len

645

revlog.delta_config.max_chain_len

646

and revlog.delta_config.max_chain_len < deltainfo.chainlen

646

and revlog.delta_config.max_chain_len < deltainfo.chainlen

647

):

647

):

648

return False

648

return False

649

650

# bad delta from intermediate snapshot size limit

650

# bad delta from intermediate snapshot size limit

651

#

651

#

652

# If an intermediate snapshot size is higher than the limit. The

652

# If an intermediate snapshot size is higher than the limit. The

653

# limit exist to prevent endless chain of intermediate delta to be

653

# limit exist to prevent endless chain of intermediate delta to be

654

# created.

654

# created.

655

if (

655

if (

656

deltainfo.snapshotdepth is not None

656

deltainfo.snapshotdepth is not None

657

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

657

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

658

):

658

):

659

return False

659

return False

660

661

# bad delta if new intermediate snapshot is larger than the previous

661

# bad delta if new intermediate snapshot is larger than the previous

662

# snapshot

662

# snapshot

663

if (

663

if (

664

deltainfo.snapshotdepth

664

deltainfo.snapshotdepth

665

and revlog.length(deltainfo.base) < deltainfo.deltalen

665

and revlog.length(deltainfo.base) < deltainfo.deltalen

666

):

666

):

667

return False

667

return False

668

669

return True

669

return True

670

671

672

# If a revision's full text is that much bigger than a base candidate full

672

# If a revision's full text is that much bigger than a base candidate full

673

# text's, it is very unlikely that it will produce a valid delta. We no longer

673

# text's, it is very unlikely that it will produce a valid delta. We no longer

674

# consider these candidates.

674

# consider these candidates.

675

LIMIT_BASE2TEXT = 500

675

LIMIT_BASE2TEXT = 500

676

677

678

def _candidategroups(

678

def _candidategroups(

679

revlog,

679

revlog,

680

textlen,

680

textlen,

681

p1,

681

p1,

682

p2,

682

p2,

683

cachedelta,

683

cachedelta,

684

excluded_bases=None,

684

excluded_bases=None,

685

target_rev=None,

685

target_rev=None,

686

snapshot_cache=None,

686

snapshot_cache=None,

687

):

687

):

688

"""Provides group of revision to be tested as delta base

688

"""Provides group of revision to be tested as delta base

689

690

This top level function focus on emitting groups with unique and worthwhile

690

This top level function focus on emitting groups with unique and worthwhile

691

content. See _raw_candidate_groups for details about the group order.

691

content. See _raw_candidate_groups for details about the group order.

692

"""

692

"""

693

# should we try to build a delta?

693

# should we try to build a delta?

694

if not (len(revlog) and revlog._storedeltachains):

694

if not (len(revlog) and revlog._storedeltachains):

695

yield None

695

yield None

696

return

696

return

697

698

if target_rev is None:

698

if target_rev is None:

699

target_rev = len(revlog)

699

target_rev = len(revlog)

700

701

if not revlog.delta_config.general_delta:

701

if not revlog.delta_config.general_delta:

702

# before general delta, there is only one possible delta base

702

# before general delta, there is only one possible delta base

703

yield (target_rev - 1,)

703

yield (target_rev - 1,)

704

yield None

704

yield None

705

return

705

return

706

707

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

707

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so

708

# we should never end up asking such question. Adding the assert as a

708

# we should never end up asking such question. Adding the assert as a

709

# safe-guard to detect anything that would be fishy in this regard.

709

# safe-guard to detect anything that would be fishy in this regard.

710

assert (

710

assert (

711

cachedelta is None

711

cachedelta is None

712

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

712

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

713

or not revlog.delta_config.general_delta

713

or not revlog.delta_config.general_delta

714

)

714

)

715

716

deltalength = revlog.length

716

deltalength = revlog.length

717

deltaparent = revlog.deltaparent

717

deltaparent = revlog.deltaparent

718

sparse = revlog._sparserevlog

718

sparse = revlog._sparserevlog

719

good = None

719

good = None

720

721

deltas_limit = textlen * LIMIT_DELTA2TEXT

721

deltas_limit = textlen * LIMIT_DELTA2TEXT

722

group_chunk_size = revlog._candidate_group_chunk_size

722

group_chunk_size = revlog.delta_config.candidate_group_chunk_size

723

724

tested = {nullrev}

724

tested = {nullrev}

725

candidates = _refinedgroups(

725

candidates = _refinedgroups(

726

revlog,

726

revlog,

727

p1,

727

p1,

728

p2,

728

p2,

729

cachedelta,

729

cachedelta,

730

snapshot_cache=snapshot_cache,

730

snapshot_cache=snapshot_cache,

731

)

731

)

732

while True:

732

while True:

733

temptative = candidates.send(good)

733

temptative = candidates.send(good)

734

if temptative is None:

734

if temptative is None:

735

break

735

break

736

group = []

736

group = []

737

for rev in temptative:

737

for rev in temptative:

738

# skip over empty delta (no need to include them in a chain)

738

# skip over empty delta (no need to include them in a chain)

739

while not (rev == nullrev or rev in tested or deltalength(rev)):

739

while not (rev == nullrev or rev in tested or deltalength(rev)):

740

tested.add(rev)

740

tested.add(rev)

741

rev = deltaparent(rev)

741

rev = deltaparent(rev)

742

# no need to try a delta against nullrev, this will be done as a

742

# no need to try a delta against nullrev, this will be done as a

743

# last resort.

743

# last resort.

744

if rev == nullrev:

744

if rev == nullrev:

745

continue

745

continue

746

# filter out revision we tested already

746

# filter out revision we tested already

747

if rev in tested:

747

if rev in tested:

748

continue

748

continue

749

750

# an higher authority deamed the base unworthy (e.g. censored)

750

# an higher authority deamed the base unworthy (e.g. censored)

751

if excluded_bases is not None and rev in excluded_bases:

751

if excluded_bases is not None and rev in excluded_bases:

752

tested.add(rev)

752

tested.add(rev)

753

continue

753

continue

754

# We are in some recomputation cases and that rev is too high in

754

# We are in some recomputation cases and that rev is too high in

755

# the revlog

755

# the revlog

756

if target_rev is not None and rev >= target_rev:

756

if target_rev is not None and rev >= target_rev:

757

tested.add(rev)

757

tested.add(rev)

758

continue

758

continue

759

# filter out delta base that will never produce good delta

759

# filter out delta base that will never produce good delta

760

if deltas_limit < revlog.length(rev):

760

if deltas_limit < revlog.length(rev):

761

tested.add(rev)

761

tested.add(rev)

762

continue

762

continue

763

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

763

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

764

tested.add(rev)

764

tested.add(rev)

765

continue

765

continue

766

# no delta for rawtext-changing revs (see "candelta" for why)

766

# no delta for rawtext-changing revs (see "candelta" for why)

767

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

767

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

768

tested.add(rev)

768

tested.add(rev)

769

continue

769

continue

770

771

# If we reach here, we are about to build and test a delta.

771

# If we reach here, we are about to build and test a delta.

772

# The delta building process will compute the chaininfo in all

772

# The delta building process will compute the chaininfo in all

773

# case, since that computation is cached, it is fine to access it

773

# case, since that computation is cached, it is fine to access it

774

# here too.

774

# here too.

775

chainlen, chainsize = revlog._chaininfo(rev)

775

chainlen, chainsize = revlog._chaininfo(rev)

776

# if chain will be too long, skip base

776

# if chain will be too long, skip base

777

if (

777

if (

778

revlog.delta_config.max_chain_len

778

revlog.delta_config.max_chain_len

779

and chainlen >= revlog.delta_config.max_chain_len

779

and chainlen >= revlog.delta_config.max_chain_len

780

):

780

):

781

tested.add(rev)

781

tested.add(rev)

782

continue

782

continue

783

# if chain already have too much data, skip base

783

# if chain already have too much data, skip base

784

if deltas_limit < chainsize:

784

if deltas_limit < chainsize:

785

tested.add(rev)

785

tested.add(rev)

786

continue

786

continue

787

if sparse and revlog.upperboundcomp is not None:

787

if sparse and revlog.upperboundcomp is not None:

788

maxcomp = revlog.upperboundcomp

788

maxcomp = revlog.upperboundcomp

789

basenotsnap = (p1, p2, nullrev)

789

basenotsnap = (p1, p2, nullrev)

790

if rev not in basenotsnap and revlog.issnapshot(rev):

790

if rev not in basenotsnap and revlog.issnapshot(rev):

791

snapshotdepth = revlog.snapshotdepth(rev)

791

snapshotdepth = revlog.snapshotdepth(rev)

792

# If text is significantly larger than the base, we can

792

# If text is significantly larger than the base, we can

793

# expect the resulting delta to be proportional to the size

793

# expect the resulting delta to be proportional to the size

794

# difference

794

# difference

795

revsize = revlog.rawsize(rev)

795

revsize = revlog.rawsize(rev)

796

rawsizedistance = max(textlen - revsize, 0)

796

rawsizedistance = max(textlen - revsize, 0)

797

# use an estimate of the compression upper bound.

797

# use an estimate of the compression upper bound.

798

lowestrealisticdeltalen = rawsizedistance // maxcomp

798

lowestrealisticdeltalen = rawsizedistance // maxcomp

799

800

# check the absolute constraint on the delta size

800

# check the absolute constraint on the delta size

801

snapshotlimit = textlen >> snapshotdepth

801

snapshotlimit = textlen >> snapshotdepth

802

if snapshotlimit < lowestrealisticdeltalen:

802

if snapshotlimit < lowestrealisticdeltalen:

803

# delta lower bound is larger than accepted upper bound

803

# delta lower bound is larger than accepted upper bound

804

tested.add(rev)

804

tested.add(rev)

805

continue

805

continue

806

807

# check the relative constraint on the delta size

807

# check the relative constraint on the delta size

808

revlength = revlog.length(rev)

808

revlength = revlog.length(rev)

809

if revlength < lowestrealisticdeltalen:

809

if revlength < lowestrealisticdeltalen:

810

# delta probable lower bound is larger than target base

810

# delta probable lower bound is larger than target base

811

tested.add(rev)

811

tested.add(rev)

812

continue

812

continue

813

814

group.append(rev)

814

group.append(rev)

815

if group:

815

if group:

816

# When the size of the candidate group is big, it can result in a

816

# When the size of the candidate group is big, it can result in a

817

# quite significant performance impact. To reduce this, we can send

817

# quite significant performance impact. To reduce this, we can send

818

# them in smaller batches until the new batch does not provide any

818

# them in smaller batches until the new batch does not provide any

819

# improvements.

819

# improvements.

820

#

820

#

821

# This might reduce the overall efficiency of the compression in

821

# This might reduce the overall efficiency of the compression in

822

# some corner cases, but that should also prevent very pathological

822

# some corner cases, but that should also prevent very pathological

823

# cases from being an issue. (eg. 20 000 candidates).

823

# cases from being an issue. (eg. 20 000 candidates).

824

#

824

#

825

# XXX note that the ordering of the group becomes important as it

825

# XXX note that the ordering of the group becomes important as it

826

# now impacts the final result. The current order is unprocessed

826

# now impacts the final result. The current order is unprocessed

827

# and can be improved.

827

# and can be improved.

828

if group_chunk_size == 0:

828

if group_chunk_size == 0:

829

tested.update(group)

829

tested.update(group)

830

good = yield tuple(group)

830

good = yield tuple(group)

831

else:

831

else:

832

prev_good = good

832

prev_good = good

833

for start in range(0, len(group), group_chunk_size):

833

for start in range(0, len(group), group_chunk_size):

834

sub_group = group[start : start + group_chunk_size]

834

sub_group = group[start : start + group_chunk_size]

835

tested.update(sub_group)

835

tested.update(sub_group)

836

good = yield tuple(sub_group)

836

good = yield tuple(sub_group)

837

if prev_good == good:

837

if prev_good == good:

838

break

838

break

839

840

yield None

840

yield None

841

842

843

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

843

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

844

good = None

844

good = None

845

# First we try to reuse a the delta contained in the bundle.

845

# First we try to reuse a the delta contained in the bundle.

846

# (or from the source revlog)

846

# (or from the source revlog)

847

#

847

#

848

# This logic only applies to general delta repositories and can be disabled

848

# This logic only applies to general delta repositories and can be disabled

849

# through configuration. Disabling reuse source delta is useful when

849

# through configuration. Disabling reuse source delta is useful when

850

# we want to make sure we recomputed "optimal" deltas.

850

# we want to make sure we recomputed "optimal" deltas.

851

debug_info = None

851

debug_info = None

852

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

852

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

853

# Assume what we received from the server is a good choice

853

# Assume what we received from the server is a good choice

854

# build delta will reuse the cache

854

# build delta will reuse the cache

855

if debug_info is not None:

855

if debug_info is not None:

856

debug_info['cached-delta.tested'] += 1

856

debug_info['cached-delta.tested'] += 1

857

good = yield (cachedelta[0],)

857

good = yield (cachedelta[0],)

858

if good is not None:

858

if good is not None:

859

if debug_info is not None:

859

if debug_info is not None:

860

debug_info['cached-delta.accepted'] += 1

860

debug_info['cached-delta.accepted'] += 1

861

yield None

861

yield None

862

return

862

return

863

if snapshot_cache is None:

863

if snapshot_cache is None:

864

snapshot_cache = SnapshotCache()

864

snapshot_cache = SnapshotCache()

865

groups = _rawgroups(

865

groups = _rawgroups(

866

revlog,

866

revlog,

867

p1,

867

p1,

868

p2,

868

p2,

869

cachedelta,

869

cachedelta,

870

snapshot_cache,

870

snapshot_cache,

871

)

871

)

872

for candidates in groups:

872

for candidates in groups:

873

good = yield candidates

873

good = yield candidates

874

if good is not None:

874

if good is not None:

875

break

875

break

876

877

# If sparse revlog is enabled, we can try to refine the available deltas

877

# If sparse revlog is enabled, we can try to refine the available deltas

878

if not revlog._sparserevlog:

878

if not revlog._sparserevlog:

879

yield None

879

yield None

880

return

880

return

881

882

# if we have a refinable value, try to refine it

882

# if we have a refinable value, try to refine it

883

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

883

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

884

# refine snapshot down

884

# refine snapshot down

885

previous = None

885

previous = None

886

while previous != good:

886

while previous != good:

887

previous = good

887

previous = good

888

base = revlog.deltaparent(good)

888

base = revlog.deltaparent(good)

889

if base == nullrev:

889

if base == nullrev:

890

break

890

break

891

good = yield (base,)

891

good = yield (base,)

892

# refine snapshot up

892

# refine snapshot up

893

if not snapshot_cache.snapshots:

893

if not snapshot_cache.snapshots:

894

snapshot_cache.update(revlog, good + 1)

894

snapshot_cache.update(revlog, good + 1)

895

previous = None

895

previous = None

896

while good != previous:

896

while good != previous:

897

previous = good

897

previous = good

898

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

898

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

899

good = yield children

899

good = yield children

900

901

if debug_info is not None:

901

if debug_info is not None:

902

if good is None:

902

if good is None:

903

debug_info['no-solution'] += 1

903

debug_info['no-solution'] += 1

904

905

yield None

905

yield None

906

907

908

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

908

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

909

"""Provides group of revision to be tested as delta base

909

"""Provides group of revision to be tested as delta base

910

911

This lower level function focus on emitting delta theorically interresting

911

This lower level function focus on emitting delta theorically interresting

912

without looking it any practical details.

912

without looking it any practical details.

913

914

The group order aims at providing fast or small candidates first.

914

The group order aims at providing fast or small candidates first.

915

"""

915

"""

916

# Why search for delta base if we cannot use a delta base ?

916

# Why search for delta base if we cannot use a delta base ?

917

assert revlog.delta_config.general_delta

917

assert revlog.delta_config.general_delta

918

# also see issue6056

918

# also see issue6056

919

sparse = revlog._sparserevlog

919

sparse = revlog._sparserevlog

920

curr = len(revlog)

920

curr = len(revlog)

921

prev = curr - 1

921

prev = curr - 1

922

deltachain = lambda rev: revlog._deltachain(rev)[0]

922

deltachain = lambda rev: revlog._deltachain(rev)[0]

923

924

# exclude already lazy tested base if any

924

# exclude already lazy tested base if any

925

parents = [p for p in (p1, p2) if p != nullrev]

925

parents = [p for p in (p1, p2) if p != nullrev]

926

927

if not revlog.delta_config.delta_both_parents and len(parents) == 2:

927

if not revlog.delta_config.delta_both_parents and len(parents) == 2:

928

parents.sort()

928

parents.sort()

929

# To minimize the chance of having to build a fulltext,

929

# To minimize the chance of having to build a fulltext,

930

# pick first whichever parent is closest to us (max rev)

930

# pick first whichever parent is closest to us (max rev)

931

yield (parents[1],)

931

yield (parents[1],)

932

# then the other one (min rev) if the first did not fit

932

# then the other one (min rev) if the first did not fit

933

yield (parents[0],)

933

yield (parents[0],)

934

elif len(parents) > 0:

934

elif len(parents) > 0:

935

# Test all parents (1 or 2), and keep the best candidate

935

# Test all parents (1 or 2), and keep the best candidate

936

yield parents

936

yield parents

937

938

if sparse and parents:

938

if sparse and parents:

939

if snapshot_cache is None:

939

if snapshot_cache is None:

940

# map: base-rev: [snapshot-revs]

940

# map: base-rev: [snapshot-revs]

941

snapshot_cache = SnapshotCache()

941

snapshot_cache = SnapshotCache()

942

# See if we can use an existing snapshot in the parent chains to use as

942

# See if we can use an existing snapshot in the parent chains to use as

943

# a base for a new intermediate-snapshot

943

# a base for a new intermediate-snapshot

944

#

944

#

945

# search for snapshot in parents delta chain

945

# search for snapshot in parents delta chain

946

# map: snapshot-level: snapshot-rev

946

# map: snapshot-level: snapshot-rev

947

parents_snaps = collections.defaultdict(set)

947

parents_snaps = collections.defaultdict(set)

948

candidate_chains = [deltachain(p) for p in parents]

948

candidate_chains = [deltachain(p) for p in parents]

949

for chain in candidate_chains:

949

for chain in candidate_chains:

950

for idx, s in enumerate(chain):

950

for idx, s in enumerate(chain):

951

if not revlog.issnapshot(s):

951

if not revlog.issnapshot(s):

952

break

952

break

953

parents_snaps[idx].add(s)

953

parents_snaps[idx].add(s)

954

snapfloor = min(parents_snaps[0]) + 1

954

snapfloor = min(parents_snaps[0]) + 1

955

snapshot_cache.update(revlog, snapfloor)

955

snapshot_cache.update(revlog, snapfloor)

956

# search for the highest "unrelated" revision

956

# search for the highest "unrelated" revision

957

#

957

#

958

# Adding snapshots used by "unrelated" revision increase the odd we

958

# Adding snapshots used by "unrelated" revision increase the odd we

959

# reuse an independant, yet better snapshot chain.

959

# reuse an independant, yet better snapshot chain.

960

#

960

#

961

# XXX instead of building a set of revisions, we could lazily enumerate

961

# XXX instead of building a set of revisions, we could lazily enumerate

962

# over the chains. That would be more efficient, however we stick to

962

# over the chains. That would be more efficient, however we stick to

963

# simple code for now.

963

# simple code for now.

964

all_revs = set()

964

all_revs = set()

965

for chain in candidate_chains:

965

for chain in candidate_chains:

966

all_revs.update(chain)

966

all_revs.update(chain)

967

other = None

967

other = None

968

for r in revlog.revs(prev, snapfloor):

968

for r in revlog.revs(prev, snapfloor):

969

if r not in all_revs:

969

if r not in all_revs:

970

other = r

970

other = r

971

break

971

break

972

if other is not None:

972

if other is not None:

973

# To avoid unfair competition, we won't use unrelated intermediate

973

# To avoid unfair competition, we won't use unrelated intermediate

974

# snapshot that are deeper than the ones from the parent delta

974

# snapshot that are deeper than the ones from the parent delta

975

# chain.

975

# chain.

976

max_depth = max(parents_snaps.keys())

976

max_depth = max(parents_snaps.keys())

977

chain = deltachain(other)

977

chain = deltachain(other)

978

for depth, s in enumerate(chain):

978

for depth, s in enumerate(chain):

979

if s < snapfloor:

979

if s < snapfloor:

980

continue

980

continue

981

if max_depth < depth:

981

if max_depth < depth:

982

break

982

break

983

if not revlog.issnapshot(s):

983

if not revlog.issnapshot(s):

984

break

984

break

985

parents_snaps[depth].add(s)

985

parents_snaps[depth].add(s)

986

# Test them as possible intermediate snapshot base

986

# Test them as possible intermediate snapshot base

987

# We test them from highest to lowest level. High level one are more

987

# We test them from highest to lowest level. High level one are more

988

# likely to result in small delta

988

# likely to result in small delta

989

floor = None

989

floor = None

990

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

990

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

991

siblings = set()

991

siblings = set()

992

for s in snaps:

992

for s in snaps:

993

siblings.update(snapshot_cache.snapshots[s])

993

siblings.update(snapshot_cache.snapshots[s])

994

# Before considering making a new intermediate snapshot, we check

994

# Before considering making a new intermediate snapshot, we check

995

# if an existing snapshot, children of base we consider, would be

995

# if an existing snapshot, children of base we consider, would be

996

# suitable.

996

# suitable.

997

#

997

#

998

# It give a change to reuse a delta chain "unrelated" to the

998

# It give a change to reuse a delta chain "unrelated" to the

999

# current revision instead of starting our own. Without such

999

# current revision instead of starting our own. Without such

1000

# re-use, topological branches would keep reopening new chains.

1000

# re-use, topological branches would keep reopening new chains.

1001

# Creating more and more snapshot as the repository grow.

1001

# Creating more and more snapshot as the repository grow.

1002

1003

if floor is not None:

1003

if floor is not None:

1004

# We only do this for siblings created after the one in our

1004

# We only do this for siblings created after the one in our

1005

# parent's delta chain. Those created before has less chances

1005

# parent's delta chain. Those created before has less chances

1006

# to be valid base since our ancestors had to create a new

1006

# to be valid base since our ancestors had to create a new

1007

# snapshot.

1007

# snapshot.

1008

siblings = [r for r in siblings if floor < r]

1008

siblings = [r for r in siblings if floor < r]

1009

yield tuple(sorted(siblings))

1009

yield tuple(sorted(siblings))

1010

# then test the base from our parent's delta chain.

1010

# then test the base from our parent's delta chain.

1011

yield tuple(sorted(snaps))

1011

yield tuple(sorted(snaps))

1012

floor = min(snaps)

1012

floor = min(snaps)

1013

# No suitable base found in the parent chain, search if any full

1013

# No suitable base found in the parent chain, search if any full

1014

# snapshots emitted since parent's base would be a suitable base for an

1014

# snapshots emitted since parent's base would be a suitable base for an

1015

# intermediate snapshot.

1015

# intermediate snapshot.

1016

#

1016

#

1017

# It give a chance to reuse a delta chain unrelated to the current

1017

# It give a chance to reuse a delta chain unrelated to the current

1018

# revisions instead of starting our own. Without such re-use,

1018

# revisions instead of starting our own. Without such re-use,

1019

# topological branches would keep reopening new full chains. Creating

1019

# topological branches would keep reopening new full chains. Creating

1020

# more and more snapshot as the repository grow.

1020

# more and more snapshot as the repository grow.

1021

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1021

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1022

yield tuple(sorted(full))

1022

yield tuple(sorted(full))

1023

1024

if not sparse:

1024

if not sparse:

1025

# other approach failed try against prev to hopefully save us a

1025

# other approach failed try against prev to hopefully save us a

1026

# fulltext.

1026

# fulltext.

1027

yield (prev,)

1027

yield (prev,)

1028

1029

1030

class SnapshotCache:

1030

class SnapshotCache:

1031

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1031

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1032

1033

def __init__(self):

1033

def __init__(self):

1034

self.snapshots = collections.defaultdict(set)

1034

self.snapshots = collections.defaultdict(set)

1035

self._start_rev = None

1035

self._start_rev = None

1036

self._end_rev = None

1036

self._end_rev = None

1037

1038

def update(self, revlog, start_rev=0):

1038

def update(self, revlog, start_rev=0):

1039

"""find snapshots from start_rev to tip"""

1039

"""find snapshots from start_rev to tip"""

1040

nb_revs = len(revlog)

1040

nb_revs = len(revlog)

1041

end_rev = nb_revs - 1

1041

end_rev = nb_revs - 1

1042

if start_rev > end_rev:

1042

if start_rev > end_rev:

1043

return # range is empty

1043

return # range is empty

1044

1045

if self._start_rev is None:

1045

if self._start_rev is None:

1046

assert self._end_rev is None

1046

assert self._end_rev is None

1047

self._update(revlog, start_rev, end_rev)

1047

self._update(revlog, start_rev, end_rev)

1048

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1048

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1049

if start_rev < self._start_rev:

1049

if start_rev < self._start_rev:

1050

self._update(revlog, start_rev, self._start_rev - 1)

1050

self._update(revlog, start_rev, self._start_rev - 1)

1051

if self._end_rev < end_rev:

1051

if self._end_rev < end_rev:

1052

self._update(revlog, self._end_rev + 1, end_rev)

1052

self._update(revlog, self._end_rev + 1, end_rev)

1053

1054

if self._start_rev is None:

1054

if self._start_rev is None:

1055

assert self._end_rev is None

1055

assert self._end_rev is None

1056

self._end_rev = end_rev

1056

self._end_rev = end_rev

1057

self._start_rev = start_rev

1057

self._start_rev = start_rev

1058

else:

1058

else:

1059

self._start_rev = min(self._start_rev, start_rev)

1059

self._start_rev = min(self._start_rev, start_rev)

1060

self._end_rev = max(self._end_rev, end_rev)

1060

self._end_rev = max(self._end_rev, end_rev)

1061

assert self._start_rev <= self._end_rev, (

1061

assert self._start_rev <= self._end_rev, (

1062

self._start_rev,

1062

self._start_rev,

1063

self._end_rev,

1063

self._end_rev,

1064

)

1064

)

1065

1066

def _update(self, revlog, start_rev, end_rev):

1066

def _update(self, revlog, start_rev, end_rev):

1067

"""internal method that actually do update content"""

1067

"""internal method that actually do update content"""

1068

assert self._start_rev is None or (

1068

assert self._start_rev is None or (

1069

start_rev < self._start_rev or start_rev > self._end_rev

1069

start_rev < self._start_rev or start_rev > self._end_rev

1070

), (self._start_rev, self._end_rev, start_rev, end_rev)

1070

), (self._start_rev, self._end_rev, start_rev, end_rev)

1071

assert self._start_rev is None or (

1071

assert self._start_rev is None or (

1072

end_rev < self._start_rev or end_rev > self._end_rev

1072

end_rev < self._start_rev or end_rev > self._end_rev

1073

), (self._start_rev, self._end_rev, start_rev, end_rev)

1073

), (self._start_rev, self._end_rev, start_rev, end_rev)

1074

cache = self.snapshots

1074

cache = self.snapshots

1075

if hasattr(revlog.index, 'findsnapshots'):

1075

if hasattr(revlog.index, 'findsnapshots'):

1076

revlog.index.findsnapshots(cache, start_rev, end_rev)

1076

revlog.index.findsnapshots(cache, start_rev, end_rev)

1077

else:

1077

else:

1078

deltaparent = revlog.deltaparent

1078

deltaparent = revlog.deltaparent

1079

issnapshot = revlog.issnapshot

1079

issnapshot = revlog.issnapshot

1080

for rev in revlog.revs(start_rev, end_rev):

1080

for rev in revlog.revs(start_rev, end_rev):

1081

if issnapshot(rev):

1081

if issnapshot(rev):

1082

cache[deltaparent(rev)].add(rev)

1082

cache[deltaparent(rev)].add(rev)

1083

1084

1085

class deltacomputer:

1085

class deltacomputer:

1086

def __init__(

1086

def __init__(

1087

self,

1087

self,

1088

revlog,

1088

revlog,

1089

write_debug=None,

1089

write_debug=None,

1090

debug_search=False,

1090

debug_search=False,

1091

debug_info=None,

1091

debug_info=None,

1092

):

1092

):

1093

self.revlog = revlog

1093

self.revlog = revlog

1094

self._write_debug = write_debug

1094

self._write_debug = write_debug

1095

if write_debug is None:

1095

if write_debug is None:

1096

self._debug_search = False

1096

self._debug_search = False

1097

else:

1097

else:

1098

self._debug_search = debug_search

1098

self._debug_search = debug_search

1099

self._debug_info = debug_info

1099

self._debug_info = debug_info

1100

self._snapshot_cache = SnapshotCache()

1100

self._snapshot_cache = SnapshotCache()

1101

1102

@property

1102

@property

1103

def _gather_debug(self):

1103

def _gather_debug(self):

1104

return self._write_debug is not None or self._debug_info is not None

1104

return self._write_debug is not None or self._debug_info is not None

1105

1106

def buildtext(self, revinfo):

1106

def buildtext(self, revinfo):

1107

"""Builds a fulltext version of a revision

1107

"""Builds a fulltext version of a revision

1108

1109

revinfo: revisioninfo instance that contains all needed info

1109

revinfo: revisioninfo instance that contains all needed info

1110

"""

1110

"""

1111

btext = revinfo.btext

1111

btext = revinfo.btext

1112

if btext[0] is not None:

1112

if btext[0] is not None:

1113

return btext[0]

1113

return btext[0]

1114

1115

revlog = self.revlog

1115

revlog = self.revlog

1116

cachedelta = revinfo.cachedelta

1116

cachedelta = revinfo.cachedelta

1117

baserev = cachedelta[0]

1117

baserev = cachedelta[0]

1118

delta = cachedelta[1]

1118

delta = cachedelta[1]

1119

1120

fulltext = btext[0] = _textfromdelta(

1120

fulltext = btext[0] = _textfromdelta(

1121

revlog,

1121

revlog,

1122

baserev,

1122

baserev,

1123

delta,

1123

delta,

1124

revinfo.p1,

1124

revinfo.p1,

1125

revinfo.p2,

1125

revinfo.p2,

1126

revinfo.flags,

1126

revinfo.flags,

1127

revinfo.node,

1127

revinfo.node,

1128

)

1128

)

1129

return fulltext

1129

return fulltext

1130

1131

def _builddeltadiff(self, base, revinfo):

1131

def _builddeltadiff(self, base, revinfo):

1132

revlog = self.revlog

1132

revlog = self.revlog

1133

t = self.buildtext(revinfo)

1133

t = self.buildtext(revinfo)

1134

if revlog.iscensored(base):

1134

if revlog.iscensored(base):

1135

# deltas based on a censored revision must replace the

1135

# deltas based on a censored revision must replace the

1136

# full content in one patch, so delta works everywhere

1136

# full content in one patch, so delta works everywhere

1137

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1137

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1138

delta = header + t

1138

delta = header + t

1139

else:

1139

else:

1140

ptext = revlog.rawdata(base)

1140

ptext = revlog.rawdata(base)

1141

delta = mdiff.textdiff(ptext, t)

1141

delta = mdiff.textdiff(ptext, t)

1142

1143

return delta

1143

return delta

1144

1145

def _builddeltainfo(self, revinfo, base, target_rev=None):

1145

def _builddeltainfo(self, revinfo, base, target_rev=None):

1146

# can we use the cached delta?

1146

# can we use the cached delta?

1147

revlog = self.revlog

1147

revlog = self.revlog

1148

chainbase = revlog.chainbase(base)

1148

chainbase = revlog.chainbase(base)

1149

if revlog.delta_config.general_delta:

1149

if revlog.delta_config.general_delta:

1150

deltabase = base

1150

deltabase = base

1151

else:

1151

else:

1152

if target_rev is not None and base != target_rev - 1:

1152

if target_rev is not None and base != target_rev - 1:

1153

msg = (

1153

msg = (

1154

b'general delta cannot use delta for something else '

1154

b'general delta cannot use delta for something else '

1155

b'than `prev`: %d<-%d'

1155

b'than `prev`: %d<-%d'

1156

)

1156

)

1157

msg %= (base, target_rev)

1157

msg %= (base, target_rev)

1158

raise error.ProgrammingError(msg)

1158

raise error.ProgrammingError(msg)

1159

deltabase = chainbase

1159

deltabase = chainbase

1160

snapshotdepth = None

1160

snapshotdepth = None

1161

if revlog._sparserevlog and deltabase == nullrev:

1161

if revlog._sparserevlog and deltabase == nullrev:

1162

snapshotdepth = 0

1162

snapshotdepth = 0

1163

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1163

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1164

# A delta chain should always be one full snapshot,

1164

# A delta chain should always be one full snapshot,

1165

# zero or more semi-snapshots, and zero or more deltas

1165

# zero or more semi-snapshots, and zero or more deltas

1166

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1166

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1167

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1167

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1168

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1168

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1169

delta = None

1169

delta = None

1170

if revinfo.cachedelta:

1170

if revinfo.cachedelta:

1171

cachebase = revinfo.cachedelta[0]

1171

cachebase = revinfo.cachedelta[0]

1172

# check if the diff still apply

1172

# check if the diff still apply

1173

currentbase = cachebase

1173

currentbase = cachebase

1174

while (

1174

while (

1175

currentbase != nullrev

1175

currentbase != nullrev

1176

and currentbase != base

1176

and currentbase != base

1177

and self.revlog.length(currentbase) == 0

1177

and self.revlog.length(currentbase) == 0

1178

):

1178

):

1179

currentbase = self.revlog.deltaparent(currentbase)

1179

currentbase = self.revlog.deltaparent(currentbase)

1180

if self.revlog._lazydelta and currentbase == base:

1180

if self.revlog._lazydelta and currentbase == base:

1181

delta = revinfo.cachedelta[1]

1181

delta = revinfo.cachedelta[1]

1182

if delta is None:

1182

if delta is None:

1183

delta = self._builddeltadiff(base, revinfo)

1183

delta = self._builddeltadiff(base, revinfo)

1184

if self._debug_search:

1184

if self._debug_search:

1185

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1185

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1186

msg %= len(delta)

1186

msg %= len(delta)

1187

self._write_debug(msg)

1187

self._write_debug(msg)

1188

# snapshotdept need to be neither None nor 0 level snapshot

1188

# snapshotdept need to be neither None nor 0 level snapshot

1189

if revlog.upperboundcomp is not None and snapshotdepth:

1189

if revlog.upperboundcomp is not None and snapshotdepth:

1190

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1190

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1191

snapshotlimit = revinfo.textlen >> snapshotdepth

1191

snapshotlimit = revinfo.textlen >> snapshotdepth

1192

if self._debug_search:

1192

if self._debug_search:

1193

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1193

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1194

msg %= lowestrealisticdeltalen

1194

msg %= lowestrealisticdeltalen

1195

self._write_debug(msg)

1195

self._write_debug(msg)

1196

if snapshotlimit < lowestrealisticdeltalen:

1196

if snapshotlimit < lowestrealisticdeltalen:

1197

if self._debug_search:

1197

if self._debug_search:

1198

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1198

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1199

self._write_debug(msg)

1199

self._write_debug(msg)

1200

return None

1200

return None

1201

if revlog.length(base) < lowestrealisticdeltalen:

1201

if revlog.length(base) < lowestrealisticdeltalen:

1202

if self._debug_search:

1202

if self._debug_search:

1203

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1203

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1204

self._write_debug(msg)

1204

self._write_debug(msg)

1205

return None

1205

return None

1206

header, data = revlog.compress(delta)

1206

header, data = revlog.compress(delta)

1207

deltalen = len(header) + len(data)

1207

deltalen = len(header) + len(data)

1208

offset = revlog.end(len(revlog) - 1)

1208

offset = revlog.end(len(revlog) - 1)

1209

dist = deltalen + offset - revlog.start(chainbase)

1209

dist = deltalen + offset - revlog.start(chainbase)

1210

chainlen, compresseddeltalen = revlog._chaininfo(base)

1210

chainlen, compresseddeltalen = revlog._chaininfo(base)

1211

chainlen += 1

1211

chainlen += 1

1212

compresseddeltalen += deltalen

1212

compresseddeltalen += deltalen

1213

1214

return _deltainfo(

1214

return _deltainfo(

1215

dist,

1215

dist,

1216

deltalen,

1216

deltalen,

1217

(header, data),

1217

(header, data),

1218

deltabase,

1218

deltabase,

1219

chainbase,

1219

chainbase,

1220

chainlen,

1220

chainlen,

1221

compresseddeltalen,

1221

compresseddeltalen,

1222

snapshotdepth,

1222

snapshotdepth,

1223

)

1223

)

1224

1225

def _fullsnapshotinfo(self, revinfo, curr):

1225

def _fullsnapshotinfo(self, revinfo, curr):

1226

rawtext = self.buildtext(revinfo)

1226

rawtext = self.buildtext(revinfo)

1227

data = self.revlog.compress(rawtext)

1227

data = self.revlog.compress(rawtext)

1228

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1228

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1229

deltabase = chainbase = curr

1229

deltabase = chainbase = curr

1230

snapshotdepth = 0

1230

snapshotdepth = 0

1231

chainlen = 1

1231

chainlen = 1

1232

1233

return _deltainfo(

1233

return _deltainfo(

1234

dist,

1234

dist,

1235

deltalen,

1235

deltalen,

1236

data,

1236

data,

1237

deltabase,

1237

deltabase,

1238

chainbase,

1238

chainbase,

1239

chainlen,

1239

chainlen,

1240

compresseddeltalen,

1240

compresseddeltalen,

1241

snapshotdepth,

1241

snapshotdepth,

1242

)

1242

)

1243

1244

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1244

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1245

"""Find an acceptable delta against a candidate revision

1245

"""Find an acceptable delta against a candidate revision

1246

1247

revinfo: information about the revision (instance of _revisioninfo)

1247

revinfo: information about the revision (instance of _revisioninfo)

1248

1249

Returns the first acceptable candidate revision, as ordered by

1249

Returns the first acceptable candidate revision, as ordered by

1250

_candidategroups

1250

_candidategroups

1251

1252

If no suitable deltabase is found, we return delta info for a full

1252

If no suitable deltabase is found, we return delta info for a full

1253

snapshot.

1253

snapshot.

1254

1255

`excluded_bases` is an optional set of revision that cannot be used as

1255

`excluded_bases` is an optional set of revision that cannot be used as

1256

a delta base. Use this to recompute delta suitable in censor or strip

1256

a delta base. Use this to recompute delta suitable in censor or strip

1257

context.

1257

context.

1258

"""

1258

"""

1259

if target_rev is None:

1259

if target_rev is None:

1260

target_rev = len(self.revlog)

1260

target_rev = len(self.revlog)

1261

1262

gather_debug = self._gather_debug

1262

gather_debug = self._gather_debug

1263

cachedelta = revinfo.cachedelta

1263

cachedelta = revinfo.cachedelta

1264

revlog = self.revlog

1264

revlog = self.revlog

1265

p1r = p2r = None

1265

p1r = p2r = None

1266

1267

if excluded_bases is None:

1267

if excluded_bases is None:

1268

excluded_bases = set()

1268

excluded_bases = set()

1269

1270

if gather_debug:

1270

if gather_debug:

1271

start = util.timer()

1271

start = util.timer()

1272

dbg = self._one_dbg_data()

1272

dbg = self._one_dbg_data()

1273

dbg['revision'] = target_rev

1273

dbg['revision'] = target_rev

1274

target_revlog = b"UNKNOWN"

1274

target_revlog = b"UNKNOWN"

1275

target_type = self.revlog.target[0]

1275

target_type = self.revlog.target[0]

1276

target_key = self.revlog.target[1]

1276

target_key = self.revlog.target[1]

1277

if target_type == KIND_CHANGELOG:

1277

if target_type == KIND_CHANGELOG:

1278

target_revlog = b'CHANGELOG:'

1278

target_revlog = b'CHANGELOG:'

1279

elif target_type == KIND_MANIFESTLOG:

1279

elif target_type == KIND_MANIFESTLOG:

1280

target_revlog = b'MANIFESTLOG:'

1280

target_revlog = b'MANIFESTLOG:'

1281

if target_key:

1281

if target_key:

1282

target_revlog += b'%s:' % target_key

1282

target_revlog += b'%s:' % target_key

1283

elif target_type == KIND_FILELOG:

1283

elif target_type == KIND_FILELOG:

1284

target_revlog = b'FILELOG:'

1284

target_revlog = b'FILELOG:'

1285

if target_key:

1285

if target_key:

1286

target_revlog += b'%s:' % target_key

1286

target_revlog += b'%s:' % target_key

1287

dbg['target-revlog'] = target_revlog

1287

dbg['target-revlog'] = target_revlog

1288

p1r = revlog.rev(revinfo.p1)

1288

p1r = revlog.rev(revinfo.p1)

1289

p2r = revlog.rev(revinfo.p2)

1289

p2r = revlog.rev(revinfo.p2)

1290

if p1r != nullrev:

1290

if p1r != nullrev:

1291

p1_chain_len = revlog._chaininfo(p1r)[0]

1291

p1_chain_len = revlog._chaininfo(p1r)[0]

1292

else:

1292

else:

1293

p1_chain_len = -1

1293

p1_chain_len = -1

1294

if p2r != nullrev:

1294

if p2r != nullrev:

1295

p2_chain_len = revlog._chaininfo(p2r)[0]

1295

p2_chain_len = revlog._chaininfo(p2r)[0]

1296

else:

1296

else:

1297

p2_chain_len = -1

1297

p2_chain_len = -1

1298

dbg['p1-chain-len'] = p1_chain_len

1298

dbg['p1-chain-len'] = p1_chain_len

1299

dbg['p2-chain-len'] = p2_chain_len

1299

dbg['p2-chain-len'] = p2_chain_len

1300

1301

# 1) if the revision is empty, no amount of delta can beat it

1301

# 1) if the revision is empty, no amount of delta can beat it

1302

#

1302

#

1303

# 2) no delta for flag processor revision (see "candelta" for why)

1303

# 2) no delta for flag processor revision (see "candelta" for why)

1304

# not calling candelta since only one revision needs test, also to

1304

# not calling candelta since only one revision needs test, also to

1305

# avoid overhead fetching flags again.

1305

# avoid overhead fetching flags again.

1306

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1306

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1307

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1307

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1308

if gather_debug:

1308

if gather_debug:

1309

end = util.timer()

1309

end = util.timer()

1310

dbg['duration'] = end - start

1310

dbg['duration'] = end - start

1311

dbg[

1311

dbg[

1312

'delta-base'

1312

'delta-base'

1313

] = deltainfo.base # pytype: disable=attribute-error

1313

] = deltainfo.base # pytype: disable=attribute-error

1314

dbg['search_round_count'] = 0

1314

dbg['search_round_count'] = 0

1315

dbg['using-cached-base'] = False

1315

dbg['using-cached-base'] = False

1316

dbg['delta_try_count'] = 0

1316

dbg['delta_try_count'] = 0

1317

dbg['type'] = b"full"

1317

dbg['type'] = b"full"

1318

dbg['snapshot-depth'] = 0

1318

dbg['snapshot-depth'] = 0

1319

self._dbg_process_data(dbg)

1319

self._dbg_process_data(dbg)

1320

return deltainfo

1320

return deltainfo

1321

1322

deltainfo = None

1322

deltainfo = None

1323

1324

# If this source delta are to be forcibly reuse, let us comply early.

1324

# If this source delta are to be forcibly reuse, let us comply early.

1325

if (

1325

if (

1326

revlog.delta_config.general_delta

1326

revlog.delta_config.general_delta

1327

and revinfo.cachedelta is not None

1327

and revinfo.cachedelta is not None

1328

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1328

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1329

):

1329

):

1330

base = revinfo.cachedelta[0]

1330

base = revinfo.cachedelta[0]

1331

if base == nullrev:

1331

if base == nullrev:

1332

dbg_type = b"full"

1332

dbg_type = b"full"

1333

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1333

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1334

if gather_debug:

1334

if gather_debug:

1335

snapshotdepth = 0

1335

snapshotdepth = 0

1336

elif base not in excluded_bases:

1336

elif base not in excluded_bases:

1337

delta = revinfo.cachedelta[1]

1337

delta = revinfo.cachedelta[1]

1338

header, data = revlog.compress(delta)

1338

header, data = revlog.compress(delta)

1339

deltalen = len(header) + len(data)

1339

deltalen = len(header) + len(data)

1340

if gather_debug:

1340

if gather_debug:

1341

offset = revlog.end(len(revlog) - 1)

1341

offset = revlog.end(len(revlog) - 1)

1342

chainbase = revlog.chainbase(base)

1342

chainbase = revlog.chainbase(base)

1343

distance = deltalen + offset - revlog.start(chainbase)

1343

distance = deltalen + offset - revlog.start(chainbase)

1344

chainlen, compresseddeltalen = revlog._chaininfo(base)

1344

chainlen, compresseddeltalen = revlog._chaininfo(base)

1345

chainlen += 1

1345

chainlen += 1

1346

compresseddeltalen += deltalen

1346

compresseddeltalen += deltalen

1347

if base == p1r or base == p2r:

1347

if base == p1r or base == p2r:

1348

dbg_type = b"delta"

1348

dbg_type = b"delta"

1349

snapshotdepth = None

1349

snapshotdepth = None

1350

elif not revlog.issnapshot(base):

1350

elif not revlog.issnapshot(base):

1351

snapshotdepth = None

1351

snapshotdepth = None

1352

else:

1352

else:

1353

dbg_type = b"snapshot"

1353

dbg_type = b"snapshot"

1354

snapshotdepth = revlog.snapshotdepth(base) + 1

1354

snapshotdepth = revlog.snapshotdepth(base) + 1

1355

else:

1355

else:

1356

distance = None

1356

distance = None

1357

chainbase = None

1357

chainbase = None

1358

chainlen = None

1358

chainlen = None

1359

compresseddeltalen = None

1359

compresseddeltalen = None

1360

snapshotdepth = None

1360

snapshotdepth = None

1361

deltainfo = _deltainfo(

1361

deltainfo = _deltainfo(

1362

distance=distance,

1362

distance=distance,

1363

deltalen=deltalen,

1363

deltalen=deltalen,

1364

data=(header, data),

1364

data=(header, data),

1365

base=base,

1365

base=base,

1366

chainbase=chainbase,

1366

chainbase=chainbase,

1367

chainlen=chainlen,

1367

chainlen=chainlen,

1368

compresseddeltalen=compresseddeltalen,

1368

compresseddeltalen=compresseddeltalen,

1369

snapshotdepth=snapshotdepth,

1369

snapshotdepth=snapshotdepth,

1370

)

1370

)

1371

1372

if deltainfo is not None:

1372

if deltainfo is not None:

1373

if gather_debug:

1373

if gather_debug:

1374

end = util.timer()

1374

end = util.timer()

1375

dbg['duration'] = end - start

1375

dbg['duration'] = end - start

1376

dbg[

1376

dbg[

1377

'delta-base'

1377

'delta-base'

1378

] = deltainfo.base # pytype: disable=attribute-error

1378

] = deltainfo.base # pytype: disable=attribute-error

1379

dbg['search_round_count'] = 0

1379

dbg['search_round_count'] = 0

1380

dbg['using-cached-base'] = True

1380

dbg['using-cached-base'] = True

1381

dbg['delta_try_count'] = 0

1381

dbg['delta_try_count'] = 0

1382

dbg['type'] = b"full"

1382

dbg['type'] = b"full"

1383

if snapshotdepth is None:

1383

if snapshotdepth is None:

1384

dbg['snapshot-depth'] = 0

1384

dbg['snapshot-depth'] = 0

1385

else:

1385

else:

1386

dbg['snapshot-depth'] = snapshotdepth

1386

dbg['snapshot-depth'] = snapshotdepth

1387

self._dbg_process_data(dbg)

1387

self._dbg_process_data(dbg)

1388

return deltainfo

1388

return deltainfo

1389

1390

# count the number of different delta we tried (for debug purpose)

1390

# count the number of different delta we tried (for debug purpose)

1391

dbg_try_count = 0

1391

dbg_try_count = 0

1392

# count the number of "search round" we did. (for debug purpose)

1392

# count the number of "search round" we did. (for debug purpose)

1393

dbg_try_rounds = 0

1393

dbg_try_rounds = 0

1394

dbg_type = b'unknown'

1394

dbg_type = b'unknown'

1395

1396

if p1r is None:

1396

if p1r is None:

1397

p1r = revlog.rev(revinfo.p1)

1397

p1r = revlog.rev(revinfo.p1)

1398

p2r = revlog.rev(revinfo.p2)

1398

p2r = revlog.rev(revinfo.p2)

1399

1400

if self._debug_search:

1400

if self._debug_search:

1401

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1401

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1402

msg %= target_rev

1402

msg %= target_rev

1403

self._write_debug(msg)

1403

self._write_debug(msg)

1404

1405

groups = _candidategroups(

1405

groups = _candidategroups(

1406

self.revlog,

1406

self.revlog,

1407

revinfo.textlen,

1407

revinfo.textlen,

1408

p1r,

1408

p1r,

1409

p2r,

1409

p2r,

1410

cachedelta,

1410

cachedelta,

1411

excluded_bases,

1411

excluded_bases,

1412

target_rev,

1412

target_rev,

1413

snapshot_cache=self._snapshot_cache,

1413

snapshot_cache=self._snapshot_cache,

1414

)

1414

)

1415

candidaterevs = next(groups)

1415

candidaterevs = next(groups)

1416

while candidaterevs is not None:

1416

while candidaterevs is not None:

1417

dbg_try_rounds += 1

1417

dbg_try_rounds += 1

1418

if self._debug_search:

1418

if self._debug_search:

1419

prev = None

1419

prev = None

1420

if deltainfo is not None:

1420

if deltainfo is not None:

1421

prev = deltainfo.base

1421

prev = deltainfo.base

1422

1423

if (

1423

if (

1424

cachedelta is not None

1424

cachedelta is not None

1425

and len(candidaterevs) == 1

1425

and len(candidaterevs) == 1

1426

and cachedelta[0] in candidaterevs

1426

and cachedelta[0] in candidaterevs

1427

):

1427

):

1428

round_type = b"cached-delta"

1428

round_type = b"cached-delta"

1429

elif p1r in candidaterevs or p2r in candidaterevs:

1429

elif p1r in candidaterevs or p2r in candidaterevs:

1430

round_type = b"parents"

1430

round_type = b"parents"

1431

elif prev is not None and all(c < prev for c in candidaterevs):

1431

elif prev is not None and all(c < prev for c in candidaterevs):

1432

round_type = b"refine-down"

1432

round_type = b"refine-down"

1433

elif prev is not None and all(c > prev for c in candidaterevs):

1433

elif prev is not None and all(c > prev for c in candidaterevs):

1434

round_type = b"refine-up"

1434

round_type = b"refine-up"

1435

else:

1435

else:

1436

round_type = b"search-down"

1436

round_type = b"search-down"

1437

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1437

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1438

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1438

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1439

self._write_debug(msg)

1439

self._write_debug(msg)

1440

nominateddeltas = []

1440

nominateddeltas = []

1441

if deltainfo is not None:

1441

if deltainfo is not None:

1442

if self._debug_search:

1442

if self._debug_search:

1443

msg = (

1443

msg = (

1444

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1444

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1445

)

1445

)

1446

msg %= (deltainfo.base, deltainfo.deltalen)

1446

msg %= (deltainfo.base, deltainfo.deltalen)

1447

self._write_debug(msg)

1447

self._write_debug(msg)

1448

# if we already found a good delta,

1448

# if we already found a good delta,

1449

# challenge it against refined candidates

1449

# challenge it against refined candidates

1450

nominateddeltas.append(deltainfo)

1450

nominateddeltas.append(deltainfo)

1451

for candidaterev in candidaterevs:

1451

for candidaterev in candidaterevs:

1452

if self._debug_search:

1452

if self._debug_search:

1453

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1453

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1454

msg %= candidaterev

1454

msg %= candidaterev

1455

self._write_debug(msg)

1455

self._write_debug(msg)

1456

candidate_type = None

1456

candidate_type = None

1457

if candidaterev == p1r:

1457

if candidaterev == p1r:

1458

candidate_type = b"p1"

1458

candidate_type = b"p1"

1459

elif candidaterev == p2r:

1459

elif candidaterev == p2r:

1460

candidate_type = b"p2"

1460

candidate_type = b"p2"

1461

elif self.revlog.issnapshot(candidaterev):

1461

elif self.revlog.issnapshot(candidaterev):

1462

candidate_type = b"snapshot-%d"

1462

candidate_type = b"snapshot-%d"

1463

candidate_type %= self.revlog.snapshotdepth(

1463

candidate_type %= self.revlog.snapshotdepth(

1464

candidaterev

1464

candidaterev

1465

)

1465

)

1466

1467

if candidate_type is not None:

1467

if candidate_type is not None:

1468

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1468

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1469

msg %= candidate_type

1469

msg %= candidate_type

1470

self._write_debug(msg)

1470

self._write_debug(msg)

1471

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1471

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1472

msg %= self.revlog.length(candidaterev)

1472

msg %= self.revlog.length(candidaterev)

1473

self._write_debug(msg)

1473

self._write_debug(msg)

1474

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1474

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1475

msg %= self.revlog.deltaparent(candidaterev)

1475

msg %= self.revlog.deltaparent(candidaterev)

1476

self._write_debug(msg)

1476

self._write_debug(msg)

1477

1478

dbg_try_count += 1

1478

dbg_try_count += 1

1479

1480

if self._debug_search:

1480

if self._debug_search:

1481

delta_start = util.timer()

1481

delta_start = util.timer()

1482

candidatedelta = self._builddeltainfo(

1482

candidatedelta = self._builddeltainfo(

1483

revinfo,

1483

revinfo,

1484

candidaterev,

1484

candidaterev,

1485

target_rev=target_rev,

1485

target_rev=target_rev,

1486

)

1486

)

1487

if self._debug_search:

1487

if self._debug_search:

1488

delta_end = util.timer()

1488

delta_end = util.timer()

1489

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1489

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1490

msg %= delta_end - delta_start

1490

msg %= delta_end - delta_start

1491

self._write_debug(msg)

1491

self._write_debug(msg)

1492

if candidatedelta is not None:

1492

if candidatedelta is not None:

1493

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1493

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1494

if self._debug_search:

1494

if self._debug_search:

1495

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1495

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1496

msg %= candidatedelta.deltalen

1496

msg %= candidatedelta.deltalen

1497

self._write_debug(msg)

1497

self._write_debug(msg)

1498

nominateddeltas.append(candidatedelta)

1498

nominateddeltas.append(candidatedelta)

1499

elif self._debug_search:

1499

elif self._debug_search:

1500

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1500

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1501

msg %= candidatedelta.deltalen

1501

msg %= candidatedelta.deltalen

1502

self._write_debug(msg)

1502

self._write_debug(msg)

1503

elif self._debug_search:

1503

elif self._debug_search:

1504

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1504

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1505

self._write_debug(msg)

1505

self._write_debug(msg)

1506

if nominateddeltas:

1506

if nominateddeltas:

1507

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1507

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1508

if deltainfo is not None:

1508

if deltainfo is not None:

1509

candidaterevs = groups.send(deltainfo.base)

1509

candidaterevs = groups.send(deltainfo.base)

1510

else:

1510

else:

1511

candidaterevs = next(groups)

1511

candidaterevs = next(groups)

1512

1513

if deltainfo is None:

1513

if deltainfo is None:

1514

dbg_type = b"full"

1514

dbg_type = b"full"

1515

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1515

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1516

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1516

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1517

dbg_type = b"snapshot"

1517

dbg_type = b"snapshot"

1518

else:

1518

else:

1519

dbg_type = b"delta"

1519

dbg_type = b"delta"

1520

1521

if gather_debug:

1521

if gather_debug:

1522

end = util.timer()

1522

end = util.timer()

1523

if dbg_type == b'full':

1523

if dbg_type == b'full':

1524

used_cached = (

1524

used_cached = (

1525

cachedelta is not None

1525

cachedelta is not None

1526

and dbg_try_rounds == 0

1526

and dbg_try_rounds == 0

1527

and dbg_try_count == 0

1527

and dbg_try_count == 0

1528

and cachedelta[0] == nullrev

1528

and cachedelta[0] == nullrev

1529

)

1529

)

1530

else:

1530

else:

1531

used_cached = (

1531

used_cached = (

1532

cachedelta is not None

1532

cachedelta is not None

1533

and dbg_try_rounds == 1

1533

and dbg_try_rounds == 1

1534

and dbg_try_count == 1

1534

and dbg_try_count == 1

1535

and deltainfo.base == cachedelta[0]

1535

and deltainfo.base == cachedelta[0]

1536

)

1536

)

1537

dbg['duration'] = end - start

1537

dbg['duration'] = end - start

1538

dbg[

1538

dbg[

1539

'delta-base'

1539

'delta-base'

1540

] = deltainfo.base # pytype: disable=attribute-error

1540

] = deltainfo.base # pytype: disable=attribute-error

1541

dbg['search_round_count'] = dbg_try_rounds

1541

dbg['search_round_count'] = dbg_try_rounds

1542

dbg['using-cached-base'] = used_cached

1542

dbg['using-cached-base'] = used_cached

1543

dbg['delta_try_count'] = dbg_try_count

1543

dbg['delta_try_count'] = dbg_try_count

1544

dbg['type'] = dbg_type

1544

dbg['type'] = dbg_type

1545

if (

1545

if (

1546

deltainfo.snapshotdepth # pytype: disable=attribute-error

1546

deltainfo.snapshotdepth # pytype: disable=attribute-error

1547

is not None

1547

is not None

1548

):

1548

):

1549

dbg[

1549

dbg[

1550

'snapshot-depth'

1550

'snapshot-depth'

1551

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1551

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1552

else:

1552

else:

1553

dbg['snapshot-depth'] = 0

1553

dbg['snapshot-depth'] = 0

1554

self._dbg_process_data(dbg)

1554

self._dbg_process_data(dbg)

1555

return deltainfo

1555

return deltainfo

1556

1557

def _one_dbg_data(self):

1557

def _one_dbg_data(self):

1558

return {

1558

return {

1559

'duration': None,

1559

'duration': None,

1560

'revision': None,

1560

'revision': None,

1561

'delta-base': None,

1561

'delta-base': None,

1562

'search_round_count': None,

1562

'search_round_count': None,

1563

'using-cached-base': None,

1563

'using-cached-base': None,

1564

'delta_try_count': None,

1564

'delta_try_count': None,

1565

'type': None,

1565

'type': None,

1566

'p1-chain-len': None,

1566

'p1-chain-len': None,

1567

'p2-chain-len': None,

1567

'p2-chain-len': None,

1568

'snapshot-depth': None,

1568

'snapshot-depth': None,

1569

'target-revlog': None,

1569

'target-revlog': None,

1570

}

1570

}

1571

1572

def _dbg_process_data(self, dbg):

1572

def _dbg_process_data(self, dbg):

1573

if self._debug_info is not None:

1573

if self._debug_info is not None:

1574

self._debug_info.append(dbg)

1574

self._debug_info.append(dbg)

1575

1576

if self._write_debug is not None:

1576

if self._write_debug is not None:

1577

msg = (

1577

msg = (

1578

b"DBG-DELTAS:"

1578

b"DBG-DELTAS:"

1579

b" %-12s"

1579

b" %-12s"

1580

b" rev=%d:"

1580

b" rev=%d:"

1581

b" delta-base=%d"

1581

b" delta-base=%d"

1582

b" is-cached=%d"

1582

b" is-cached=%d"

1583

b" - search-rounds=%d"

1583

b" - search-rounds=%d"

1584

b" try-count=%d"

1584

b" try-count=%d"

1585

b" - delta-type=%-6s"

1585

b" - delta-type=%-6s"

1586

b" snap-depth=%d"

1586

b" snap-depth=%d"

1587

b" - p1-chain-length=%d"

1587

b" - p1-chain-length=%d"

1588

b" p2-chain-length=%d"

1588

b" p2-chain-length=%d"

1589

b" - duration=%f"

1589

b" - duration=%f"

1590

b"\n"

1590

b"\n"

1591

)

1591

)

1592

msg %= (

1592

msg %= (

1593

dbg["target-revlog"],

1593

dbg["target-revlog"],

1594

dbg["revision"],

1594

dbg["revision"],

1595

dbg["delta-base"],

1595

dbg["delta-base"],

1596

dbg["using-cached-base"],

1596

dbg["using-cached-base"],

1597

dbg["search_round_count"],

1597

dbg["search_round_count"],

1598

dbg["delta_try_count"],

1598

dbg["delta_try_count"],

1599

dbg["type"],

1599

dbg["type"],

1600

dbg["snapshot-depth"],

1600

dbg["snapshot-depth"],

1601

dbg["p1-chain-len"],

1601

dbg["p1-chain-len"],

1602

dbg["p2-chain-len"],

1602

dbg["p2-chain-len"],

1603

dbg["duration"],

1603

dbg["duration"],

1604

)

1604

)

1605

self._write_debug(msg)

1605

self._write_debug(msg)

1606

1607

1608

def delta_compression(default_compression_header, deltainfo):

1608

def delta_compression(default_compression_header, deltainfo):

1609

"""return (COMPRESSION_MODE, deltainfo)

1609

"""return (COMPRESSION_MODE, deltainfo)

1610

1611

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1611

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1612

compression.

1612

compression.

1613

"""

1613

"""

1614

h, d = deltainfo.data

1614

h, d = deltainfo.data

1615

compression_mode = COMP_MODE_INLINE

1615

compression_mode = COMP_MODE_INLINE

1616

if not h and not d:

1616

if not h and not d:

1617

# not data to store at all... declare them uncompressed

1617

# not data to store at all... declare them uncompressed

1618

compression_mode = COMP_MODE_PLAIN

1618

compression_mode = COMP_MODE_PLAIN

1619

elif not h:

1619

elif not h:

1620

t = d[0:1]

1620

t = d[0:1]

1621

if t == b'\0':

1621

if t == b'\0':

1622

compression_mode = COMP_MODE_PLAIN

1622

compression_mode = COMP_MODE_PLAIN

1623

elif t == default_compression_header:

1623

elif t == default_compression_header:

1624

compression_mode = COMP_MODE_DEFAULT

1624

compression_mode = COMP_MODE_DEFAULT

1625

elif h == b'u':

1625

elif h == b'u':

1626

# we have a more efficient way to declare uncompressed

1626

# we have a more efficient way to declare uncompressed

1627

h = b''

1627

h = b''

1628

compression_mode = COMP_MODE_PLAIN

1628

compression_mode = COMP_MODE_PLAIN

1629

deltainfo = drop_u_compression(deltainfo)

1629

deltainfo = drop_u_compression(deltainfo)

1630

return compression_mode, deltainfo

1630

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
                 # we should never end up asking such question. Adding the assert as a
                 # safe-guard to detect anything that would be fishy in this regard.
                 assert (
                     revinfo.cachedelta is None
                     or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
                     or not revlog.delta_config.general_delta
                 )
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (
                     revlog.delta_config.max_chain_len
                     and revlog.delta_config.max_chain_len < deltainfo.chainlen
                 ):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
                 snapshot_cache=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 if target_rev is None:
                     target_rev = len(revlog)
                 if not revlog.delta_config.general_delta:
                     # before general delta, there is only one possible delta base
                     yield (target_rev - 1,)
                     yield None
                     return
                 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
                 # we should never end up asking such question. Adding the assert as a
                 # safe-guard to detect anything that would be fishy in this regard.
                 assert (
                     cachedelta is None
                     or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                     or not revlog.delta_config.general_delta
                 )
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
-                group_chunk_size = revlog._candidate_group_chunk_size
+                group_chunk_size = revlog.delta_config.candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache=snapshot_cache,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if (
                             revlog.delta_config.max_chain_len
                             and chainlen >= revlog.delta_config.max_chain_len
                         ):
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
             def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 if snapshot_cache is None:
                     snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 # Why search for delta base if we cannot use a delta base ?
                 assert revlog.delta_config.general_delta
                 # also see issue6056
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 # exclude already lazy tested base if any
                 parents = [p for p in (p1, p2) if p != nullrev]
                 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
                     parents.sort()
                     # To minimize the chance of having to build a fulltext,
                     # pick first whichever parent is closest to us (max rev)
                     yield (parents[1],)
                     # then the other one (min rev) if the first did not fit
                     yield (parents[0],)
                 elif len(parents) > 0:
                     # Test all parents (1 or 2), and keep the best candidate
                     yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
                     yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = 0
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     return {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo