upstream/mercurial-mirror Commit - r52230:ac8b798e

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

18

from .constants import (

18

from .constants import (

19

COMP_MODE_DEFAULT,

19

COMP_MODE_DEFAULT,

20

COMP_MODE_INLINE,

20

COMP_MODE_INLINE,

21

COMP_MODE_PLAIN,

21

COMP_MODE_PLAIN,

22

DELTA_BASE_REUSE_FORCE,

22

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

from .. import revlog

50

from .. import revlog

51

52

self._data = data

52

self._data = data

53

self.data_config = revlog.DataConfig()

53

self.data_config = revlog.DataConfig()

54

self.data_config.sr_density_threshold = density

54

self.data_config.sr_density_threshold = density

55

self.data_config.sr_min_gap_size = mingap

55

self.data_config.sr_min_gap_size = mingap

56

self.delta_config = revlog.DeltaConfig()

56

self.delta_config = revlog.DeltaConfig()

57

self.feature_config = revlog.FeatureConfig()

57

self.feature_config = revlog.FeatureConfig()

58

self._snapshot = set(snapshot)

58

self._snapshot = set(snapshot)

59

self.index = None

59

self.index = None

60

61

def start(self, rev):

61

def start(self, rev):

62

if rev == nullrev:

62

if rev == nullrev:

63

return 0

63

return 0

64

if rev == 0:

64

if rev == 0:

65

return 0

65

return 0

66

return self._data[rev - 1]

66

return self._data[rev - 1]

67

68

def end(self, rev):

68

def end(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return 0

70

return 0

71

return self._data[rev]

71

return self._data[rev]

72

73

def length(self, rev):

73

def length(self, rev):

74

return self.end(rev) - self.start(rev)

74

return self.end(rev) - self.start(rev)

75

76

def __len__(self):

76

def __len__(self):

77

return len(self._data)

77

return len(self._data)

78

79

def issnapshot(self, rev):

79

def issnapshot(self, rev):

80

if rev == nullrev:

80

if rev == nullrev:

81

return True

81

return True

82

return rev in self._snapshot

82

return rev in self._snapshot

83

84

85

def slicechunk(revlog, revs, targetsize=None):

85

def slicechunk(revlog, revs, targetsize=None):

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

86

"""slice revs to reduce the amount of unrelated data to be read from disk.

87

88

``revs`` is sliced into groups that should be read in one time.

88

``revs`` is sliced into groups that should be read in one time.

89

Assume that revs are sorted.

89

Assume that revs are sorted.

90

91

The initial chunk is sliced until the overall density (payload/chunks-span

91

The initial chunk is sliced until the overall density (payload/chunks-span

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

92

ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller

93

than `revlog.data_config.sr_min_gap_size` is skipped.

93

than `revlog.data_config.sr_min_gap_size` is skipped.

94

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

95

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

96

For consistency with other slicing choice, this limit won't go lower than

96

For consistency with other slicing choice, this limit won't go lower than

97

`revlog.data_config.sr_min_gap_size`.

97

`revlog.data_config.sr_min_gap_size`.

98

99

If individual revisions chunk are larger than this limit, they will still

99

If individual revisions chunk are larger than this limit, they will still

100

be raised individually.

100

be raised individually.

101

102

>>> data = [

102

>>> data = [

103

... 5, #00 (5)

103

... 5, #00 (5)

104

... 10, #01 (5)

104

... 10, #01 (5)

105

... 12, #02 (2)

105

... 12, #02 (2)

106

... 12, #03 (empty)

106

... 12, #03 (empty)

107

... 27, #04 (15)

107

... 27, #04 (15)

108

... 31, #05 (4)

108

... 31, #05 (4)

109

... 31, #06 (empty)

109

... 31, #06 (empty)

110

... 42, #07 (11)

110

... 42, #07 (11)

111

... 47, #08 (5)

111

... 47, #08 (5)

112

... 47, #09 (empty)

112

... 47, #09 (empty)

113

... 48, #10 (1)

113

... 48, #10 (1)

114

... 51, #11 (3)

114

... 51, #11 (3)

115

... 74, #12 (23)

115

... 74, #12 (23)

116

... 85, #13 (11)

116

... 85, #13 (11)

117

... 86, #14 (1)

117

... 86, #14 (1)

118

... 91, #15 (5)

118

... 91, #15 (5)

119

... ]

119

... ]

120

>>> revlog = _testrevlog(data, snapshot=range(16))

120

>>> revlog = _testrevlog(data, snapshot=range(16))

121

122

>>> list(slicechunk(revlog, list(range(16))))

122

>>> list(slicechunk(revlog, list(range(16))))

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

123

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

124

>>> list(slicechunk(revlog, [0, 15]))

124

>>> list(slicechunk(revlog, [0, 15]))

125

[[0], [15]]

125

[[0], [15]]

126

>>> list(slicechunk(revlog, [0, 11, 15]))

126

>>> list(slicechunk(revlog, [0, 11, 15]))

127

[[0], [11], [15]]

127

[[0], [11], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

129

[[0], [11, 13, 15]]

129

[[0], [11, 13, 15]]

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

130

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

131

[[1, 2], [5, 8, 10, 11], [14]]

131

[[1, 2], [5, 8, 10, 11], [14]]

132

133

Slicing with a maximum chunk size

133

Slicing with a maximum chunk size

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

134

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

135

[[0], [11], [13], [15]]

135

[[0], [11], [13], [15]]

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

137

[[0], [11], [13, 15]]

137

[[0], [11], [13, 15]]

138

139

Slicing involving nullrev

139

Slicing involving nullrev

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

140

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

141

[[-1, 0], [11], [13, 15]]

141

[[-1, 0], [11], [13, 15]]

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

142

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

143

[[-1], [13], [15]]

143

[[-1], [13], [15]]

144

"""

144

"""

145

if targetsize is not None:

145

if targetsize is not None:

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

146

targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)

147

# targetsize should not be specified when evaluating delta candidates:

147

# targetsize should not be specified when evaluating delta candidates:

148

# * targetsize is used to ensure we stay within specification when reading,

148

# * targetsize is used to ensure we stay within specification when reading,

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

149

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

150

if densityslicing is None:

150

if densityslicing is None:

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

151

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

152

for chunk in densityslicing(

152

for chunk in densityslicing(

153

revs,

153

revs,

154

revlog.data_config.sr_density_threshold,

154

revlog.data_config.sr_density_threshold,

155

revlog.data_config.sr_min_gap_size,

155

revlog.data_config.sr_min_gap_size,

156

):

156

):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

157

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

158

yield subchunk

158

yield subchunk

159

160

161

def _slicechunktosize(revlog, revs, targetsize=None):

161

def _slicechunktosize(revlog, revs, targetsize=None):

162

"""slice revs to match the target size

162

"""slice revs to match the target size

163

164

This is intended to be used on chunk that density slicing selected by that

164

This is intended to be used on chunk that density slicing selected by that

165

are still too large compared to the read garantee of revlog. This might

165

are still too large compared to the read garantee of revlog. This might

166

happens when "minimal gap size" interrupted the slicing or when chain are

166

happens when "minimal gap size" interrupted the slicing or when chain are

167

built in a way that create large blocks next to each other.

167

built in a way that create large blocks next to each other.

168

169

>>> data = [

169

>>> data = [

170

... 3, #0 (3)

170

... 3, #0 (3)

171

... 5, #1 (2)

171

... 5, #1 (2)

172

... 6, #2 (1)

172

... 6, #2 (1)

173

... 8, #3 (2)

173

... 8, #3 (2)

174

... 8, #4 (empty)

174

... 8, #4 (empty)

175

... 11, #5 (3)

175

... 11, #5 (3)

176

... 12, #6 (1)

176

... 12, #6 (1)

177

... 13, #7 (1)

177

... 13, #7 (1)

178

... 14, #8 (1)

178

... 14, #8 (1)

179

... ]

179

... ]

180

181

== All snapshots cases ==

181

== All snapshots cases ==

182

>>> revlog = _testrevlog(data, snapshot=range(9))

182

>>> revlog = _testrevlog(data, snapshot=range(9))

183

184

Cases where chunk is already small enough

184

Cases where chunk is already small enough

185

>>> list(_slicechunktosize(revlog, [0], 3))

185

>>> list(_slicechunktosize(revlog, [0], 3))

186

[[0]]

186

[[0]]

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

187

>>> list(_slicechunktosize(revlog, [6, 7], 3))

188

[[6, 7]]

188

[[6, 7]]

189

>>> list(_slicechunktosize(revlog, [0], None))

189

>>> list(_slicechunktosize(revlog, [0], None))

190

[[0]]

190

[[0]]

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

191

>>> list(_slicechunktosize(revlog, [6, 7], None))

192

[[6, 7]]

192

[[6, 7]]

193

194

cases where we need actual slicing

194

cases where we need actual slicing

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

195

>>> list(_slicechunktosize(revlog, [0, 1], 3))

196

[[0], [1]]

196

[[0], [1]]

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

197

>>> list(_slicechunktosize(revlog, [1, 3], 3))

198

[[1], [3]]

198

[[1], [3]]

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

199

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

200

[[1, 2], [3]]

200

[[1, 2], [3]]

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

201

>>> list(_slicechunktosize(revlog, [3, 5], 3))

202

[[3], [5]]

202

[[3], [5]]

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

203

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

204

[[3], [5]]

204

[[3], [5]]

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

205

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

206

[[5], [6, 7, 8]]

206

[[5], [6, 7, 8]]

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

207

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

208

[[0], [1, 2], [3], [5], [6, 7, 8]]

209

210

Case with too large individual chunk (must return valid chunk)

210

Case with too large individual chunk (must return valid chunk)

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

211

>>> list(_slicechunktosize(revlog, [0, 1], 2))

212

[[0], [1]]

212

[[0], [1]]

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

213

>>> list(_slicechunktosize(revlog, [1, 3], 1))

214

[[1], [3]]

214

[[1], [3]]

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

215

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

216

[[3], [5]]

216

[[3], [5]]

217

218

== No Snapshot cases ==

218

== No Snapshot cases ==

219

>>> revlog = _testrevlog(data)

219

>>> revlog = _testrevlog(data)

220

221

Cases where chunk is already small enough

221

Cases where chunk is already small enough

222

>>> list(_slicechunktosize(revlog, [0], 3))

222

>>> list(_slicechunktosize(revlog, [0], 3))

223

[[0]]

223

[[0]]

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

224

>>> list(_slicechunktosize(revlog, [6, 7], 3))

225

[[6, 7]]

225

[[6, 7]]

226

>>> list(_slicechunktosize(revlog, [0], None))

226

>>> list(_slicechunktosize(revlog, [0], None))

227

[[0]]

227

[[0]]

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

228

>>> list(_slicechunktosize(revlog, [6, 7], None))

229

[[6, 7]]

229

[[6, 7]]

230

231

cases where we need actual slicing

231

cases where we need actual slicing

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

232

>>> list(_slicechunktosize(revlog, [0, 1], 3))

233

[[0], [1]]

233

[[0], [1]]

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

234

>>> list(_slicechunktosize(revlog, [1, 3], 3))

235

[[1], [3]]

235

[[1], [3]]

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

236

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

237

[[1], [2, 3]]

237

[[1], [2, 3]]

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

238

>>> list(_slicechunktosize(revlog, [3, 5], 3))

239

[[3], [5]]

239

[[3], [5]]

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

240

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

241

[[3], [4, 5]]

241

[[3], [4, 5]]

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

242

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

243

[[5], [6, 7, 8]]

243

[[5], [6, 7, 8]]

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

244

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

245

[[0], [1, 2], [3], [5], [6, 7, 8]]

246

247

Case with too large individual chunk (must return valid chunk)

247

Case with too large individual chunk (must return valid chunk)

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

248

>>> list(_slicechunktosize(revlog, [0, 1], 2))

249

[[0], [1]]

249

[[0], [1]]

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

250

>>> list(_slicechunktosize(revlog, [1, 3], 1))

251

[[1], [3]]

251

[[1], [3]]

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

252

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

253

[[3], [5]]

253

[[3], [5]]

254

255

== mixed case ==

255

== mixed case ==

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

256

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

257

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

258

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

259

"""

259

"""

260

assert targetsize is None or 0 <= targetsize

260

assert targetsize is None or 0 <= targetsize

261

startdata = revlog.start(revs[0])

261

startdata = revlog.start(revs[0])

262

enddata = revlog.end(revs[-1])

262

enddata = revlog.end(revs[-1])

263

fullspan = enddata - startdata

263

fullspan = enddata - startdata

264

if targetsize is None or fullspan <= targetsize:

264

if targetsize is None or fullspan <= targetsize:

265

yield revs

265

yield revs

266

return

266

return

267

268

startrevidx = 0

268

startrevidx = 0

269

endrevidx = 1

269

endrevidx = 1

270

iterrevs = enumerate(revs)

270

iterrevs = enumerate(revs)

271

next(iterrevs) # skip first rev.

271

next(iterrevs) # skip first rev.

272

# first step: get snapshots out of the way

272

# first step: get snapshots out of the way

273

for idx, r in iterrevs:

273

for idx, r in iterrevs:

274

span = revlog.end(r) - startdata

274

span = revlog.end(r) - startdata

275

snapshot = revlog.issnapshot(r)

275

snapshot = revlog.issnapshot(r)

276

if span <= targetsize and snapshot:

276

if span <= targetsize and snapshot:

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

else:

278

else:

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

279

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

280

if chunk:

280

if chunk:

281

yield chunk

281

yield chunk

282

startrevidx = idx

282

startrevidx = idx

283

startdata = revlog.start(r)

283

startdata = revlog.start(r)

284

endrevidx = idx + 1

284

endrevidx = idx + 1

285

if not snapshot:

285

if not snapshot:

286

break

286

break

287

288

# for the others, we use binary slicing to quickly converge toward valid

288

# for the others, we use binary slicing to quickly converge toward valid

289

# chunks (otherwise, we might end up looking for start/end of many

289

# chunks (otherwise, we might end up looking for start/end of many

290

# revisions). This logic is not looking for the perfect slicing point, it

290

# revisions). This logic is not looking for the perfect slicing point, it

291

# focuses on quickly converging toward valid chunks.

291

# focuses on quickly converging toward valid chunks.

292

nbitem = len(revs)

292

nbitem = len(revs)

293

while (enddata - startdata) > targetsize:

293

while (enddata - startdata) > targetsize:

294

endrevidx = nbitem

294

endrevidx = nbitem

295

if nbitem - startrevidx <= 1:

295

if nbitem - startrevidx <= 1:

296

break # protect against individual chunk larger than limit

296

break # protect against individual chunk larger than limit

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

while span > targetsize:

299

while span > targetsize:

300

if endrevidx - startrevidx <= 1:

300

if endrevidx - startrevidx <= 1:

301

break # protect against individual chunk larger than limit

301

break # protect against individual chunk larger than limit

302

endrevidx -= (endrevidx - startrevidx) // 2

302

endrevidx -= (endrevidx - startrevidx) // 2

303

localenddata = revlog.end(revs[endrevidx - 1])

303

localenddata = revlog.end(revs[endrevidx - 1])

304

span = localenddata - startdata

304

span = localenddata - startdata

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

startrevidx = endrevidx

308

startrevidx = endrevidx

309

startdata = revlog.start(revs[startrevidx])

309

startdata = revlog.start(revs[startrevidx])

310

311

chunk = _trimchunk(revlog, revs, startrevidx)

311

chunk = _trimchunk(revlog, revs, startrevidx)

312

if chunk:

312

if chunk:

313

yield chunk

313

yield chunk

314

315

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

316

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

317

"""slice revs to reduce the amount of unrelated data to be read from disk.

318

319

``revs`` is sliced into groups that should be read in one time.

319

``revs`` is sliced into groups that should be read in one time.

320

Assume that revs are sorted.

320

Assume that revs are sorted.

321

322

The initial chunk is sliced until the overall density (payload/chunks-span

322

The initial chunk is sliced until the overall density (payload/chunks-span

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

323

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

324

skipped.

324

skipped.

325

326

>>> revlog = _testrevlog([

326

>>> revlog = _testrevlog([

327

... 5, #00 (5)

327

... 5, #00 (5)

328

... 10, #01 (5)

328

... 10, #01 (5)

329

... 12, #02 (2)

329

... 12, #02 (2)

330

... 12, #03 (empty)

330

... 12, #03 (empty)

331

... 27, #04 (15)

331

... 27, #04 (15)

332

... 31, #05 (4)

332

... 31, #05 (4)

333

... 31, #06 (empty)

333

... 31, #06 (empty)

334

... 42, #07 (11)

334

... 42, #07 (11)

335

... 47, #08 (5)

335

... 47, #08 (5)

336

... 47, #09 (empty)

336

... 47, #09 (empty)

337

... 48, #10 (1)

337

... 48, #10 (1)

338

... 51, #11 (3)

338

... 51, #11 (3)

339

... 74, #12 (23)

339

... 74, #12 (23)

340

... 85, #13 (11)

340

... 85, #13 (11)

341

... 86, #14 (1)

341

... 86, #14 (1)

342

... 91, #15 (5)

342

... 91, #15 (5)

343

... ])

343

... ])

344

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

345

>>> list(_slicechunktodensity(revlog, list(range(16))))

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

346

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

347

>>> list(_slicechunktodensity(revlog, [0, 15]))

348

[[0], [15]]

348

[[0], [15]]

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

349

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

350

[[0], [11], [15]]

350

[[0], [11], [15]]

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

351

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

352

[[0], [11, 13, 15]]

352

[[0], [11, 13, 15]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

354

[[1, 2], [5, 8, 10, 11], [14]]

354

[[1, 2], [5, 8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... mingapsize=20))

356

... mingapsize=20))

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

357

[[1, 2, 3, 5, 8, 10, 11], [14]]

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

358

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

359

... targetdensity=0.95))

359

... targetdensity=0.95))

360

[[1, 2], [5], [8, 10, 11], [14]]

360

[[1, 2], [5], [8, 10, 11], [14]]

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

361

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

362

... targetdensity=0.95, mingapsize=12))

362

... targetdensity=0.95, mingapsize=12))

363

[[1, 2], [5, 8, 10, 11], [14]]

363

[[1, 2], [5, 8, 10, 11], [14]]

364

"""

364

"""

365

start = revlog.start

365

start = revlog.start

366

length = revlog.length

366

length = revlog.length

367

368

if len(revs) <= 1:

368

if len(revs) <= 1:

369

yield revs

369

yield revs

370

return

370

return

371

372

deltachainspan = segmentspan(revlog, revs)

372

deltachainspan = segmentspan(revlog, revs)

373

374

if deltachainspan < mingapsize:

374

if deltachainspan < mingapsize:

375

yield revs

375

yield revs

376

return

376

return

377

378

readdata = deltachainspan

378

readdata = deltachainspan

379

chainpayload = sum(length(r) for r in revs)

379

chainpayload = sum(length(r) for r in revs)

380

381

if deltachainspan:

381

if deltachainspan:

382

density = chainpayload / float(deltachainspan)

382

density = chainpayload / float(deltachainspan)

383

else:

383

else:

384

density = 1.0

384

density = 1.0

385

386

if density >= targetdensity:

386

if density >= targetdensity:

387

yield revs

387

yield revs

388

return

388

return

389

390

# Store the gaps in a heap to have them sorted by decreasing size

390

# Store the gaps in a heap to have them sorted by decreasing size

391

gaps = []

391

gaps = []

392

prevend = None

392

prevend = None

393

for i, rev in enumerate(revs):

393

for i, rev in enumerate(revs):

394

revstart = start(rev)

394

revstart = start(rev)

395

revlen = length(rev)

395

revlen = length(rev)

396

397

# Skip empty revisions to form larger holes

397

# Skip empty revisions to form larger holes

398

if revlen == 0:

398

if revlen == 0:

399

continue

399

continue

400

401

if prevend is not None:

401

if prevend is not None:

402

gapsize = revstart - prevend

402

gapsize = revstart - prevend

403

# only consider holes that are large enough

403

# only consider holes that are large enough

404

if gapsize > mingapsize:

404

if gapsize > mingapsize:

405

gaps.append((gapsize, i))

405

gaps.append((gapsize, i))

406

407

prevend = revstart + revlen

407

prevend = revstart + revlen

408

# sort the gaps to pop them from largest to small

408

# sort the gaps to pop them from largest to small

409

gaps.sort()

409

gaps.sort()

410

411

# Collect the indices of the largest holes until the density is acceptable

411

# Collect the indices of the largest holes until the density is acceptable

412

selected = []

412

selected = []

413

while gaps and density < targetdensity:

413

while gaps and density < targetdensity:

414

gapsize, gapidx = gaps.pop()

414

gapsize, gapidx = gaps.pop()

415

416

selected.append(gapidx)

416

selected.append(gapidx)

417

418

# the gap sizes are stored as negatives to be sorted decreasingly

418

# the gap sizes are stored as negatives to be sorted decreasingly

419

# by the heap

419

# by the heap

420

readdata -= gapsize

420

readdata -= gapsize

421

if readdata > 0:

421

if readdata > 0:

422

density = chainpayload / float(readdata)

422

density = chainpayload / float(readdata)

423

else:

423

else:

424

density = 1.0

424

density = 1.0

425

selected.sort()

425

selected.sort()

426

427

# Cut the revs at collected indices

427

# Cut the revs at collected indices

428

previdx = 0

428

previdx = 0

429

for idx in selected:

429

for idx in selected:

430

431

chunk = _trimchunk(revlog, revs, previdx, idx)

431

chunk = _trimchunk(revlog, revs, previdx, idx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

previdx = idx

435

previdx = idx

436

437

chunk = _trimchunk(revlog, revs, previdx)

437

chunk = _trimchunk(revlog, revs, previdx)

438

if chunk:

438

if chunk:

439

yield chunk

439

yield chunk

440

441

442

def _trimchunk(revlog, revs, startidx, endidx=None):

442

def _trimchunk(revlog, revs, startidx, endidx=None):

443

"""returns revs[startidx:endidx] without empty trailing revs

443

"""returns revs[startidx:endidx] without empty trailing revs

444

445

Doctest Setup

445

Doctest Setup

446

>>> revlog = _testrevlog([

446

>>> revlog = _testrevlog([

447

... 5, #0

447

... 5, #0

448

... 10, #1

448

... 10, #1

449

... 12, #2

449

... 12, #2

450

... 12, #3 (empty)

450

... 12, #3 (empty)

451

... 17, #4

451

... 17, #4

452

... 21, #5

452

... 21, #5

453

... 21, #6 (empty)

453

... 21, #6 (empty)

454

... ])

454

... ])

455

456

Contiguous cases:

456

Contiguous cases:

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

458

[0, 1, 2, 3, 4, 5]

458

[0, 1, 2, 3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

460

[0, 1, 2, 3, 4]

460

[0, 1, 2, 3, 4]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

462

[0, 1, 2]

462

[0, 1, 2]

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

463

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

464

[2]

464

[2]

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

465

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

466

[3, 4, 5]

466

[3, 4, 5]

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

467

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

468

[3, 4]

468

[3, 4]

469

470

Discontiguous cases:

470

Discontiguous cases:

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

472

[1, 3, 5]

472

[1, 3, 5]

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

473

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

474

[1]

474

[1]

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

475

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

476

[3, 5]

476

[3, 5]

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

477

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

478

[3, 5]

478

[3, 5]

479

"""

479

"""

480

length = revlog.length

480

length = revlog.length

481

482

if endidx is None:

482

if endidx is None:

483

endidx = len(revs)

483

endidx = len(revs)

484

485

# If we have a non-emtpy delta candidate, there are nothing to trim

485

# If we have a non-emtpy delta candidate, there are nothing to trim

486

if revs[endidx - 1] < len(revlog):

486

if revs[endidx - 1] < len(revlog):

487

# Trim empty revs at the end, except the very first revision of a chain

487

# Trim empty revs at the end, except the very first revision of a chain

488

while (

488

while (

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

489

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

490

):

490

):

491

endidx -= 1

491

endidx -= 1

492

493

return revs[startidx:endidx]

493

return revs[startidx:endidx]

494

495

496

def segmentspan(revlog, revs):

496

def segmentspan(revlog, revs):

497

"""Get the byte span of a segment of revisions

497

"""Get the byte span of a segment of revisions

498

499

revs is a sorted array of revision numbers

499

revs is a sorted array of revision numbers

500

501

>>> revlog = _testrevlog([

501

>>> revlog = _testrevlog([

502

... 5, #0

502

... 5, #0

503

... 10, #1

503

... 10, #1

504

... 12, #2

504

... 12, #2

505

... 12, #3 (empty)

505

... 12, #3 (empty)

506

... 17, #4

506

... 17, #4

507

... ])

507

... ])

508

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

509

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

510

17

510

17

511

>>> segmentspan(revlog, [0, 4])

511

>>> segmentspan(revlog, [0, 4])

512

17

512

17

513

>>> segmentspan(revlog, [3, 4])

513

>>> segmentspan(revlog, [3, 4])

514

5

514

5

515

>>> segmentspan(revlog, [1, 2, 3,])

515

>>> segmentspan(revlog, [1, 2, 3,])

516

7

516

7

517

>>> segmentspan(revlog, [1, 3])

517

>>> segmentspan(revlog, [1, 3])

518

7

518

7

519

"""

519

"""

520

if not revs:

520

if not revs:

521

return 0

521

return 0

522

end = revlog.end(revs[-1])

522

end = revlog.end(revs[-1])

523

return end - revlog.start(revs[0])

523

return end - revlog.start(revs[0])

524

525

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

526

def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):

527

"""build full text from a (base, delta) pair and other metadata"""

527

"""build full text from a (base, delta) pair and other metadata"""

528

# special case deltas which replace entire base; no need to decode

528

# special case deltas which replace entire base; no need to decode

529

# base revision. this neatly avoids censored bases, which throw when

529

# base revision. this neatly avoids censored bases, which throw when

530

# they're decoded.

530

# they're decoded.

531

hlen = struct.calcsize(b">lll")

531

hlen = struct.calcsize(b">lll")

532

if delta[:hlen] == mdiff.replacediffheader(

532

if delta[:hlen] == mdiff.replacediffheader(

533

revlog.rawsize(baserev), len(delta) - hlen

533

revlog.rawsize(baserev), len(delta) - hlen

534

):

534

):

535

fulltext = delta[hlen:]

535

fulltext = delta[hlen:]

536

else:

536

else:

537

# deltabase is rawtext before changed by flag processors, which is

537

# deltabase is rawtext before changed by flag processors, which is

538

# equivalent to non-raw text

538

# equivalent to non-raw text

539

basetext = revlog.revision(baserev)

539

basetext = revlog.revision(baserev)

540

fulltext = mdiff.patch(basetext, delta)

540

fulltext = mdiff.patch(basetext, delta)

541

542

try:

542

try:

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

543

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

544

if validatehash:

544

if validatehash:

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

545

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

546

if flags & REVIDX_ISCENSORED:

546

if flags & REVIDX_ISCENSORED:

547

raise error.StorageError(

547

raise error.StorageError(

548

_(b'node %s is not censored') % expectednode

548

_(b'node %s is not censored') % expectednode

549

)

549

)

550

except error.CensoredNodeError:

550

except error.CensoredNodeError:

551

# must pass the censored index flag to add censored revisions

551

# must pass the censored index flag to add censored revisions

552

if not flags & REVIDX_ISCENSORED:

552

if not flags & REVIDX_ISCENSORED:

553

raise

553

raise

554

return fulltext

554

return fulltext

555

556

557

@attr.s(slots=True, frozen=True)

557

@attr.s(slots=True, frozen=True)

558

class _deltainfo:

558

class _deltainfo:

559

distance = attr.ib()

559

distance = attr.ib()

560

deltalen = attr.ib()

560

deltalen = attr.ib()

561

data = attr.ib()

561

data = attr.ib()

562

base = attr.ib()

562

base = attr.ib()

563

chainbase = attr.ib()

563

chainbase = attr.ib()

564

chainlen = attr.ib()

564

chainlen = attr.ib()

565

compresseddeltalen = attr.ib()

565

compresseddeltalen = attr.ib()

566

snapshotdepth = attr.ib()

566

snapshotdepth = attr.ib()

567

568

569

def drop_u_compression(delta):

569

def drop_u_compression(delta):

570

"""turn into a "u" (no-compression) into no-compression without header

570

"""turn into a "u" (no-compression) into no-compression without header

571

572

This is useful for revlog format that has better compression method.

572

This is useful for revlog format that has better compression method.

573

"""

573

"""

574

assert delta.data[0] == b'u', delta.data[0]

574

assert delta.data[0] == b'u', delta.data[0]

575

return _deltainfo(

575

return _deltainfo(

576

delta.distance,

576

delta.distance,

577

delta.deltalen - 1,

577

delta.deltalen - 1,

578

(b'', delta.data[1]),

578

(b'', delta.data[1]),

579

delta.base,

579

delta.base,

580

delta.chainbase,

580

delta.chainbase,

581

delta.chainlen,

581

delta.chainlen,

582

delta.compresseddeltalen,

582

delta.compresseddeltalen,

583

delta.snapshotdepth,

583

delta.snapshotdepth,

584

)

584

)

585

586

587

# If a revision's full text is that much bigger than a base candidate full

587

# If a revision's full text is that much bigger than a base candidate full

588

# text's, it is very unlikely that it will produce a valid delta. We no longer

588

# text's, it is very unlikely that it will produce a valid delta. We no longer

589

# consider these candidates.

589

# consider these candidates.

590

LIMIT_BASE2TEXT = 500

590

LIMIT_BASE2TEXT = 500

591

592

593

class _DeltaSearch:

593

class _DeltaSearch:

594

"""perform the search of a good delta for a single revlog revision

594

"""perform the search of a good delta for a single revlog revision

595

596

note: some of the deltacomputer.finddeltainfo logic should probably move

596

note: some of the deltacomputer.finddeltainfo logic should probably move

597

here.

597

here.

598

"""

598

"""

599

600

def __init__(

600

def __init__(

601

self,

601

self,

602

revlog,

602

revlog,

603

revinfo,

603

revinfo,

604

p1,

604

p1,

605

p2,

605

p2,

606

cachedelta,

606

cachedelta,

607

excluded_bases=None,

607

excluded_bases=None,

608

target_rev=None,

608

target_rev=None,

609

snapshot_cache=None,

609

snapshot_cache=None,

610

):

610

):

611

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

611

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

612

# so we should never end up asking such question. Adding the assert as

612

# so we should never end up asking such question. Adding the assert as

613

# a safe-guard to detect anything that would be fishy in this regard.

613

# a safe-guard to detect anything that would be fishy in this regard.

614

assert (

614

assert (

615

cachedelta is None

615

cachedelta is None

616

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

616

or cachedelta[2] != DELTA_BASE_REUSE_FORCE

617

or not revlog.delta_config.general_delta

617

or not revlog.delta_config.general_delta

618

)

618

)

619

self.revlog = revlog

619

self.revlog = revlog

620

self.revinfo = revinfo

620

self.revinfo = revinfo

621

self.textlen = revinfo.textlen

621

self.textlen = revinfo.textlen

622

self.p1 = p1

622

self.p1 = p1

623

self.p2 = p2

623

self.p2 = p2

624

self.cachedelta = cachedelta

624

self.cachedelta = cachedelta

625

self.excluded_bases = excluded_bases

625

self.excluded_bases = excluded_bases

626

if target_rev is None:

626

if target_rev is None:

627

self.target_rev = len(self.revlog)

627

self.target_rev = len(self.revlog)

628

self.target_rev = target_rev

628

self.target_rev = target_rev

629

if snapshot_cache is None:

629

if snapshot_cache is None:

630

# map: base-rev: [snapshot-revs]

630

# map: base-rev: [snapshot-revs]

631

snapshot_cache = SnapshotCache()

631

snapshot_cache = SnapshotCache()

632

self.snapshot_cache = snapshot_cache

632

self.snapshot_cache = snapshot_cache

633

634

self.tested = {nullrev}

634

self.tested = {nullrev}

635

636

self._candidates_iterator = self._candidate_groups()

636

self._candidates_iterator = self._candidate_groups()

637

self._last_good = None

637

self._last_good = None

638

self.current_group = self._candidates_iterator.send(self._last_good)

638

self.current_group = self._candidates_iterator.send(self._last_good)

639

640

@property

640

@property

641

def done(self):

641

def done(self):

642

"""True when all possible candidate have been tested"""

642

"""True when all possible candidate have been tested"""

643

return self.current_group is None

643

return self.current_group is None

644

645

def next_group(self, good_delta=None):

645

def next_group(self, good_delta=None):

646

"""move to the next group to test

646

"""move to the next group to test

647

648

The group of revision to test will be available in

648

The group of revision to test will be available in

649

`self.current_group`. If the previous group had any good delta, the

649

`self.current_group`. If the previous group had any good delta, the

650

best one can be passed as the `good_delta` parameter to help selecting

650

best one can be passed as the `good_delta` parameter to help selecting

651

the next group.

651

the next group.

652

653

If not revision remains to be, `self.done` will be True and

653

If not revision remains to be, `self.done` will be True and

654

`self.current_group` will be None.

654

`self.current_group` will be None.

655

"""

655

"""

656

if good_delta is not None:

656

if good_delta is not None:

657

self._last_good = good_delta.base

657

self._last_good = good_delta.base

658

self.current_group = self._candidates_iterator.send(self._last_good)

658

self.current_group = self._candidates_iterator.send(self._last_good)

659

660

def _candidate_groups(self):

660

def _candidate_groups(self):

661

"""Provides group of revision to be tested as delta base

661

"""Provides group of revision to be tested as delta base

662

663

This top level function focus on emitting groups with unique and

663

This top level function focus on emitting groups with unique and

664

worthwhile content. See _raw_candidate_groups for details about the

664

worthwhile content. See _raw_candidate_groups for details about the

665

group order.

665

group order.

666

"""

666

"""

667

# should we try to build a delta?

667

# should we try to build a delta?

668

if not (len(self.revlog) and self.revlog._storedeltachains):

668

if not (len(self.revlog) and self.revlog._storedeltachains):

669

yield None

669

yield None

670

return

670

return

671

672

if not self.revlog.delta_config.general_delta:

672

if not self.revlog.delta_config.general_delta:

673

# before general delta, there is only one possible delta base

673

# before general delta, there is only one possible delta base

674

yield (self.target_rev - 1,)

674

yield (self.target_rev - 1,)

675

yield None

675

yield None

676

return

676

return

677

678

good = None

678

good = None

679

680

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

680

group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size

681

682

tested = self.tested # prefetch for speed and code compactness

682

tested = self.tested # prefetch for speed and code compactness

683

candidates = self._refined_groups()

683

candidates = self._refined_groups()

684

while True:

684

while True:

685

temptative = candidates.send(good)

685

temptative = candidates.send(good)

686

if temptative is None:

686

if temptative is None:

687

break

687

break

688

group = self._pre_filter_candidate_revs(temptative)

688

group = self._pre_filter_candidate_revs(temptative)

689

if group:

689

if group:

690

# When the size of the candidate group is big, it can result in

690

# When the size of the candidate group is big, it can result in

691

# a quite significant performance impact. To reduce this, we

691

# a quite significant performance impact. To reduce this, we

692

# can send them in smaller batches until the new batch does not

692

# can send them in smaller batches until the new batch does not

693

# provide any improvements.

693

# provide any improvements.

694

#

694

#

695

# This might reduce the overall efficiency of the compression

695

# This might reduce the overall efficiency of the compression

696

# in some corner cases, but that should also prevent very

696

# in some corner cases, but that should also prevent very

697

# pathological cases from being an issue. (eg. 20 000

697

# pathological cases from being an issue. (eg. 20 000

698

# candidates).

698

# candidates).

699

#

699

#

700

# XXX note that the ordering of the group becomes important as

700

# XXX note that the ordering of the group becomes important as

701

# it now impacts the final result. The current order is

701

# it now impacts the final result. The current order is

702

# unprocessed and can be improved.

702

# unprocessed and can be improved.

703

if group_chunk_size == 0:

703

if group_chunk_size == 0:

704

tested.update(group)

704

tested.update(group)

705

good = yield tuple(group)

705

good = yield tuple(group)

706

else:

706

else:

707

prev_good = good

707

prev_good = good

708

for start in range(0, len(group), group_chunk_size):

708

for start in range(0, len(group), group_chunk_size):

709

sub_group = group[start : start + group_chunk_size]

709

sub_group = group[start : start + group_chunk_size]

710

tested.update(sub_group)

710

tested.update(sub_group)

711

good = yield tuple(sub_group)

711

good = yield tuple(sub_group)

712

if prev_good == good:

712

if prev_good == good:

713

break

713

break

714

715

yield None

715

yield None

716

717

def _pre_filter_candidate_revs(self, temptative):

717

def _pre_filter_candidate_revs(self, temptative):

718

"""filter possible candidate before computing a delta

718

"""filter possible candidate before computing a delta

719

720

This function use various criteria to pre-filter candidate delta base

720

This function use various criteria to pre-filter candidate delta base

721

before we compute a delta and evaluate its quality.

721

before we compute a delta and evaluate its quality.

722

723

Such pre-filter limit the number of computed delta, an expensive operation.

723

Such pre-filter limit the number of computed delta, an expensive operation.

724

725

return the updated list of revision to test

725

return the updated list of revision to test

726

"""

726

"""

727

deltalength = self.revlog.length

727

deltalength = self.revlog.length

728

deltaparent = self.revlog.deltaparent

728

deltaparent = self.revlog.deltaparent

729

730

tested = self.tested

730

tested = self.tested

731

group = []

731

group = []

732

for rev in temptative:

732

for rev in temptative:

733

# skip over empty delta (no need to include them in a chain)

733

# skip over empty delta (no need to include them in a chain)

734

while not (rev == nullrev or rev in tested or deltalength(rev)):

734

while not (rev == nullrev or rev in tested or deltalength(rev)):

735

tested.add(rev)

735

tested.add(rev)

736

rev = deltaparent(rev)

736

rev = deltaparent(rev)

737

if self._pre_filter_rev(rev):

737

if self._pre_filter_rev(rev):

738

group.append(rev)

738

group.append(rev)

739

else:

739

else:

740

self.tested.add(rev)

740

self.tested.add(rev)

741

return group

741

return group

742

743

def _pre_filter_rev(self, rev):

743

def _pre_filter_rev(self, rev):

744

"""return True if it seems okay to test a rev, False otherwise"""

744

"""return True if it seems okay to test a rev, False otherwise"""

745

if True:

745

# no need to try a delta against nullrev, this will be done as

746

# no need to try a delta against nullrev, this will be done as

746

# a last resort.

747

# a last resort.

747

if rev == nullrev:

748

if ~~rev~~ == ~~nullrev~~:

748

return False

749

return False

749

# filter out revision we tested already

750

# filter out revision we tested already

750

if rev in self.tested:

751

if ~~rev~~ in ~~self~~.~~tested~~:

751

return False

752

return False

753

752

754

# an higher authority deamed the base unworthy (e.g. censored)

753

# an higher authority deamed the base unworthy (e.g. censored)

755

if self.excluded_bases is not None and rev in self.excluded_bases:

754

if self.excluded_bases is not None and rev in self.excluded_bases:

756

return False

755

return False

757

# We are in some recomputation cases and that rev is too high

756

# We are in some recomputation cases and that rev is too high

758

# in the revlog

757

# in the revlog

759

if self.target_rev is not None and rev >= self.target_rev:

758

if self.target_rev is not None and rev >= self.target_rev:

760

return False

759

return False

761

760

762

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

761

deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT

763

# filter out delta base that will never produce good delta

762

# filter out delta base that will never produce good delta

764

#

763

#

765

# if the delta of that base is already bigger than the limit

764

# if the delta of that base is already bigger than the limit

766

# for the delta chain size, doing a delta is hopeless.

765

# for the delta chain size, doing a delta is hopeless.

767

if deltas_limit < self.revlog.length(rev):

766

if deltas_limit < self.revlog.length(rev):

768

return False

767

return False

769

768

770

sparse = self.revlog.delta_config.sparse_revlog

769

sparse = self.revlog.delta_config.sparse_revlog

771

# if the revision we test again is too small, the resulting delta

770

# if the revision we test again is too small, the resulting delta

772

# will be large anyway as that amount of data to be added is big

771

# will be large anyway as that amount of data to be added is big

773

if sparse and self.revlog.rawsize(rev) < (

772

if sparse and self.revlog.rawsize(rev) < (

774

self.textlen // LIMIT_BASE2TEXT

773

self.textlen // LIMIT_BASE2TEXT

775

):

774

):

776

return False

775

return False

777

776

778

# no delta for rawtext-changing revs (see "candelta" for why)

777

# no delta for rawtext-changing revs (see "candelta" for why)

779

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

778

if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

780

return False

779

return False

781

780

782

# If we reach here, we are about to build and test a delta.

781

# If we reach here, we are about to build and test a delta.

783

# The delta building process will compute the chaininfo in all

782

# The delta building process will compute the chaininfo in all

784

# case, since that computation is cached, it is fine to access

783

# case, since that computation is cached, it is fine to access

785

# it here too.

784

# it here too.

786

chainlen, chainsize = self.revlog._chaininfo(rev)

785

chainlen, chainsize = self.revlog._chaininfo(rev)

787

# if chain will be too long, skip base

786

# if chain will be too long, skip base

788

if (

787

if (

789

self.revlog.delta_config.max_chain_len

788

self.revlog.delta_config.max_chain_len

790

and chainlen >= self.revlog.delta_config.max_chain_len

789

and chainlen >= self.revlog.delta_config.max_chain_len

791

):

790

):

792

return False

791

return False

793

# if chain already have too much data, skip base

792

# if chain already have too much data, skip base

794

if deltas_limit < chainsize:

793

if deltas_limit < chainsize:

795

return False

794

return False

796

if sparse and self.revlog.delta_config.upper_bound_comp is not None:

795

if sparse and self.revlog.delta_config.upper_bound_comp is not None:

797

maxcomp = self.revlog.delta_config.upper_bound_comp

796

maxcomp = self.revlog.delta_config.upper_bound_comp

798

basenotsnap = (self.p1, self.p2, nullrev)

797

basenotsnap = (self.p1, self.p2, nullrev)

799

if rev not in basenotsnap and self.revlog.issnapshot(rev):

798

if rev not in basenotsnap and self.revlog.issnapshot(rev):

800

snapshotdepth = self.revlog.snapshotdepth(rev)

799

snapshotdepth = self.revlog.snapshotdepth(rev)

801

# If text is significantly larger than the base, we can

800

# If text is significantly larger than the base, we can

802

# expect the resulting delta to be proportional to the size

801

# expect the resulting delta to be proportional to the size

803

# difference

802

# difference

804

revsize = self.revlog.rawsize(rev)

803

revsize = self.revlog.rawsize(rev)

805

rawsizedistance = max(self.textlen - revsize, 0)

804

rawsizedistance = max(self.textlen - revsize, 0)

806

# use an estimate of the compression upper bound.

805

# use an estimate of the compression upper bound.

807

lowestrealisticdeltalen = rawsizedistance // maxcomp

806

lowestrealisticdeltalen = rawsizedistance // maxcomp

808

807

809

# check the absolute constraint on the delta size

808

# check the absolute constraint on the delta size

810

snapshotlimit = self.textlen >> snapshotdepth

809

snapshotlimit = self.textlen >> snapshotdepth

811

if snapshotlimit < lowestrealisticdeltalen:

810

if snapshotlimit < lowestrealisticdeltalen:

812

# delta lower bound is larger than accepted upper

811

# delta lower bound is larger than accepted upper

813

# bound

812

# bound

814

return False

813

return False

815

814

816

# check the relative constraint on the delta size

815

# check the relative constraint on the delta size

817

revlength = self.revlog.length(rev)

816

revlength = self.revlog.length(rev)

818

if revlength < lowestrealisticdeltalen:

817

if revlength < lowestrealisticdeltalen:

819

# delta probable lower bound is larger than target

818

# delta probable lower bound is larger than target

820

# base

819

# base

821

return False

820

return False

822

return True

821

return True

823

822

824

def _refined_groups(self):

823

def _refined_groups(self):

825

good = None

824

good = None

826

# First we try to reuse a the delta contained in the bundle. (or from

825

# First we try to reuse a the delta contained in the bundle. (or from

827

# the source revlog)

826

# the source revlog)

828

#

827

#

829

# This logic only applies to general delta repositories and can be

828

# This logic only applies to general delta repositories and can be

830

# disabled through configuration. Disabling reuse source delta is

829

# disabled through configuration. Disabling reuse source delta is

831

# useful when we want to make sure we recomputed "optimal" deltas.

830

# useful when we want to make sure we recomputed "optimal" deltas.

832

debug_info = None

831

debug_info = None

833

if (

832

if (

834

self.cachedelta is not None

833

self.cachedelta is not None

835

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

834

and self.cachedelta[2] > DELTA_BASE_REUSE_NO

836

):

835

):

837

# Assume what we received from the server is a good choice

836

# Assume what we received from the server is a good choice

838

# build delta will reuse the cache

837

# build delta will reuse the cache

839

if debug_info is not None:

838

if debug_info is not None:

840

debug_info['cached-delta.tested'] += 1

839

debug_info['cached-delta.tested'] += 1

841

good = yield (self.cachedelta[0],)

840

good = yield (self.cachedelta[0],)

842

if good is not None:

841

if good is not None:

843

if debug_info is not None:

842

if debug_info is not None:

844

debug_info['cached-delta.accepted'] += 1

843

debug_info['cached-delta.accepted'] += 1

845

yield None

844

yield None

846

return

845

return

847

groups = self._raw_groups()

846

groups = self._raw_groups()

848

for candidates in groups:

847

for candidates in groups:

849

good = yield candidates

848

good = yield candidates

850

if good is not None:

849

if good is not None:

851

break

850

break

852

851

853

# If sparse revlog is enabled, we can try to refine the available

852

# If sparse revlog is enabled, we can try to refine the available

854

# deltas

853

# deltas

855

if not self.revlog.delta_config.sparse_revlog:

854

if not self.revlog.delta_config.sparse_revlog:

856

yield None

855

yield None

857

return

856

return

858

857

859

# if we have a refinable value, try to refine it

858

# if we have a refinable value, try to refine it

860

if (

859

if (

861

good is not None

860

good is not None

862

and good not in (self.p1, self.p2)

861

and good not in (self.p1, self.p2)

863

and self.revlog.issnapshot(good)

862

and self.revlog.issnapshot(good)

864

):

863

):

865

# refine snapshot down

864

# refine snapshot down

866

previous = None

865

previous = None

867

while previous != good:

866

while previous != good:

868

previous = good

867

previous = good

869

base = self.revlog.deltaparent(good)

868

base = self.revlog.deltaparent(good)

870

if base == nullrev:

869

if base == nullrev:

871

break

870

break

872

good = yield (base,)

871

good = yield (base,)

873

# refine snapshot up

872

# refine snapshot up

874

if not self.snapshot_cache.snapshots:

873

if not self.snapshot_cache.snapshots:

875

self.snapshot_cache.update(self.revlog, good + 1)

874

self.snapshot_cache.update(self.revlog, good + 1)

876

previous = None

875

previous = None

877

while good != previous:

876

while good != previous:

878

previous = good

877

previous = good

879

children = tuple(

878

children = tuple(

880

sorted(c for c in self.snapshot_cache.snapshots[good])

879

sorted(c for c in self.snapshot_cache.snapshots[good])

881

)

880

)

882

good = yield children

881

good = yield children

883

882

884

if debug_info is not None:

883

if debug_info is not None:

885

if good is None:

884

if good is None:

886

debug_info['no-solution'] += 1

885

debug_info['no-solution'] += 1

887

886

888

yield None

887

yield None

889

888

890

def _raw_groups(self):

889

def _raw_groups(self):

891

"""Provides group of revision to be tested as delta base

890

"""Provides group of revision to be tested as delta base

892

891

893

This lower level function focus on emitting delta theorically

892

This lower level function focus on emitting delta theorically

894

interresting without looking it any practical details.

893

interresting without looking it any practical details.

895

894

896

The group order aims at providing fast or small candidates first.

895

The group order aims at providing fast or small candidates first.

897

"""

896

"""

898

# Why search for delta base if we cannot use a delta base ?

897

# Why search for delta base if we cannot use a delta base ?

899

assert self.revlog.delta_config.general_delta

898

assert self.revlog.delta_config.general_delta

900

# also see issue6056

899

# also see issue6056

901

sparse = self.revlog.delta_config.sparse_revlog

900

sparse = self.revlog.delta_config.sparse_revlog

902

prev = self.target_rev - 1

901

prev = self.target_rev - 1

903

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

902

deltachain = lambda rev: self.revlog._deltachain(rev)[0]

904

903

905

# exclude already lazy tested base if any

904

# exclude already lazy tested base if any

906

parents = [p for p in (self.p1, self.p2) if p != nullrev]

905

parents = [p for p in (self.p1, self.p2) if p != nullrev]

907

906

908

if (

907

if (

909

not self.revlog.delta_config.delta_both_parents

908

not self.revlog.delta_config.delta_both_parents

910

and len(parents) == 2

909

and len(parents) == 2

911

):

910

):

912

parents.sort()

911

parents.sort()

913

# To minimize the chance of having to build a fulltext,

912

# To minimize the chance of having to build a fulltext,

914

# pick first whichever parent is closest to us (max rev)

913

# pick first whichever parent is closest to us (max rev)

915

yield (parents[1],)

914

yield (parents[1],)

916

# then the other one (min rev) if the first did not fit

915

# then the other one (min rev) if the first did not fit

917

yield (parents[0],)

916

yield (parents[0],)

918

elif len(parents) > 0:

917

elif len(parents) > 0:

919

# Test all parents (1 or 2), and keep the best candidate

918

# Test all parents (1 or 2), and keep the best candidate

920

yield parents

919

yield parents

921

920

922

if sparse and parents:

921

if sparse and parents:

923

# See if we can use an existing snapshot in the parent chains to

922

# See if we can use an existing snapshot in the parent chains to

924

# use as a base for a new intermediate-snapshot

923

# use as a base for a new intermediate-snapshot

925

#

924

#

926

# search for snapshot in parents delta chain map: snapshot-level:

925

# search for snapshot in parents delta chain map: snapshot-level:

927

# snapshot-rev

926

# snapshot-rev

928

parents_snaps = collections.defaultdict(set)

927

parents_snaps = collections.defaultdict(set)

929

candidate_chains = [deltachain(p) for p in parents]

928

candidate_chains = [deltachain(p) for p in parents]

930

for chain in candidate_chains:

929

for chain in candidate_chains:

931

for idx, s in enumerate(chain):

930

for idx, s in enumerate(chain):

932

if not self.revlog.issnapshot(s):

931

if not self.revlog.issnapshot(s):

933

break

932

break

934

parents_snaps[idx].add(s)

933

parents_snaps[idx].add(s)

935

snapfloor = min(parents_snaps[0]) + 1

934

snapfloor = min(parents_snaps[0]) + 1

936

self.snapshot_cache.update(self.revlog, snapfloor)

935

self.snapshot_cache.update(self.revlog, snapfloor)

937

# search for the highest "unrelated" revision

936

# search for the highest "unrelated" revision

938

#

937

#

939

# Adding snapshots used by "unrelated" revision increase the odd we

938

# Adding snapshots used by "unrelated" revision increase the odd we

940

# reuse an independant, yet better snapshot chain.

939

# reuse an independant, yet better snapshot chain.

941

#

940

#

942

# XXX instead of building a set of revisions, we could lazily

941

# XXX instead of building a set of revisions, we could lazily

943

# enumerate over the chains. That would be more efficient, however

942

# enumerate over the chains. That would be more efficient, however

944

# we stick to simple code for now.

943

# we stick to simple code for now.

945

all_revs = set()

944

all_revs = set()

946

for chain in candidate_chains:

945

for chain in candidate_chains:

947

all_revs.update(chain)

946

all_revs.update(chain)

948

other = None

947

other = None

949

for r in self.revlog.revs(prev, snapfloor):

948

for r in self.revlog.revs(prev, snapfloor):

950

if r not in all_revs:

949

if r not in all_revs:

951

other = r

950

other = r

952

break

951

break

953

if other is not None:

952

if other is not None:

954

# To avoid unfair competition, we won't use unrelated

953

# To avoid unfair competition, we won't use unrelated

955

# intermediate snapshot that are deeper than the ones from the

954

# intermediate snapshot that are deeper than the ones from the

956

# parent delta chain.

955

# parent delta chain.

957

max_depth = max(parents_snaps.keys())

956

max_depth = max(parents_snaps.keys())

958

chain = deltachain(other)

957

chain = deltachain(other)

959

for depth, s in enumerate(chain):

958

for depth, s in enumerate(chain):

960

if s < snapfloor:

959

if s < snapfloor:

961

continue

960

continue

962

if max_depth < depth:

961

if max_depth < depth:

963

break

962

break

964

if not self.revlog.issnapshot(s):

963

if not self.revlog.issnapshot(s):

965

break

964

break

966

parents_snaps[depth].add(s)

965

parents_snaps[depth].add(s)

967

# Test them as possible intermediate snapshot base We test them

966

# Test them as possible intermediate snapshot base We test them

968

# from highest to lowest level. High level one are more likely to

967

# from highest to lowest level. High level one are more likely to

969

# result in small delta

968

# result in small delta

970

floor = None

969

floor = None

971

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

970

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

972

siblings = set()

971

siblings = set()

973

for s in snaps:

972

for s in snaps:

974

siblings.update(self.snapshot_cache.snapshots[s])

973

siblings.update(self.snapshot_cache.snapshots[s])

975

# Before considering making a new intermediate snapshot, we

974

# Before considering making a new intermediate snapshot, we

976

# check if an existing snapshot, children of base we consider,

975

# check if an existing snapshot, children of base we consider,

977

# would be suitable.

976

# would be suitable.

978

#

977

#

979

# It give a change to reuse a delta chain "unrelated" to the

978

# It give a change to reuse a delta chain "unrelated" to the

980

# current revision instead of starting our own. Without such

979

# current revision instead of starting our own. Without such

981

# re-use, topological branches would keep reopening new chains.

980

# re-use, topological branches would keep reopening new chains.

982

# Creating more and more snapshot as the repository grow.

981

# Creating more and more snapshot as the repository grow.

983

982

984

if floor is not None:

983

if floor is not None:

985

# We only do this for siblings created after the one in our

984

# We only do this for siblings created after the one in our

986

# parent's delta chain. Those created before has less

985

# parent's delta chain. Those created before has less

987

# chances to be valid base since our ancestors had to

986

# chances to be valid base since our ancestors had to

988

# create a new snapshot.

987

# create a new snapshot.

989

siblings = [r for r in siblings if floor < r]

988

siblings = [r for r in siblings if floor < r]

990

yield tuple(sorted(siblings))

989

yield tuple(sorted(siblings))

991

# then test the base from our parent's delta chain.

990

# then test the base from our parent's delta chain.

992

yield tuple(sorted(snaps))

991

yield tuple(sorted(snaps))

993

floor = min(snaps)

992

floor = min(snaps)

994

# No suitable base found in the parent chain, search if any full

993

# No suitable base found in the parent chain, search if any full

995

# snapshots emitted since parent's base would be a suitable base

994

# snapshots emitted since parent's base would be a suitable base

996

# for an intermediate snapshot.

995

# for an intermediate snapshot.

997

#

996

#

998

# It give a chance to reuse a delta chain unrelated to the current

997

# It give a chance to reuse a delta chain unrelated to the current

999

# revisions instead of starting our own. Without such re-use,

998

# revisions instead of starting our own. Without such re-use,

1000

# topological branches would keep reopening new full chains.

999

# topological branches would keep reopening new full chains.

1001

# Creating more and more snapshot as the repository grow.

1000

# Creating more and more snapshot as the repository grow.

1002

full = [

1001

full = [

1003

r

1002

r

1004

for r in self.snapshot_cache.snapshots[nullrev]

1003

for r in self.snapshot_cache.snapshots[nullrev]

1005

if snapfloor <= r

1004

if snapfloor <= r

1006

]

1005

]

1007

yield tuple(sorted(full))

1006

yield tuple(sorted(full))

1008

1007

1009

if not sparse:

1008

if not sparse:

1010

# other approach failed try against prev to hopefully save us a

1009

# other approach failed try against prev to hopefully save us a

1011

# fulltext.

1010

# fulltext.

1012

yield (prev,)

1011

yield (prev,)

1013

1012

1014

def is_good_delta_info(self, deltainfo):

1013

def is_good_delta_info(self, deltainfo):

1015

"""Returns True if the given delta is good. Good means that it is

1014

"""Returns True if the given delta is good. Good means that it is

1016

within the disk span, disk size, and chain length bounds that we know

1015

within the disk span, disk size, and chain length bounds that we know

1017

to be performant."""

1016

to be performant."""

1018

if deltainfo is None:

1017

if deltainfo is None:

1019

return False

1018

return False

1020

1019

1021

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

1020

# the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner

1022

# so we should never end up asking such question. Adding the assert as

1021

# so we should never end up asking such question. Adding the assert as

1023

# a safe-guard to detect anything that would be fishy in this regard.

1022

# a safe-guard to detect anything that would be fishy in this regard.

1024

assert (

1023

assert (

1025

self.revinfo.cachedelta is None

1024

self.revinfo.cachedelta is None

1026

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

1025

or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE

1027

or not self.revlog.delta_config.general_delta

1026

or not self.revlog.delta_config.general_delta

1028

)

1027

)

1029

1028

1030

# - 'deltainfo.distance' is the distance from the base revision --

1029

# - 'deltainfo.distance' is the distance from the base revision --

1031

# bounding it limits the amount of I/O we need to do.

1030

# bounding it limits the amount of I/O we need to do.

1032

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

1031

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

1033

# deltas we need to apply -- bounding it limits the amount of CPU

1032

# deltas we need to apply -- bounding it limits the amount of CPU

1034

# we consume.

1033

# we consume.

1035

1034

1036

textlen = self.revinfo.textlen

1035

textlen = self.revinfo.textlen

1037

defaultmax = textlen * 4

1036

defaultmax = textlen * 4

1038

maxdist = self.revlog.delta_config.max_deltachain_span

1037

maxdist = self.revlog.delta_config.max_deltachain_span

1039

if not maxdist:

1038

if not maxdist:

1040

maxdist = deltainfo.distance # ensure the conditional pass

1039

maxdist = deltainfo.distance # ensure the conditional pass

1041

maxdist = max(maxdist, defaultmax)

1040

maxdist = max(maxdist, defaultmax)

1042

1041

1043

# Bad delta from read span:

1042

# Bad delta from read span:

1044

#

1043

#

1045

# If the span of data read is larger than the maximum allowed.

1044

# If the span of data read is larger than the maximum allowed.

1046

#

1045

#

1047

# In the sparse-revlog case, we rely on the associated "sparse

1046

# In the sparse-revlog case, we rely on the associated "sparse

1048

# reading" to avoid issue related to the span of data. In theory, it

1047

# reading" to avoid issue related to the span of data. In theory, it

1049

# would be possible to build pathological revlog where delta pattern

1048

# would be possible to build pathological revlog where delta pattern

1050

# would lead to too many reads. However, they do not happen in

1049

# would lead to too many reads. However, they do not happen in

1051

# practice at all. So we skip the span check entirely.

1050

# practice at all. So we skip the span check entirely.

1052

if (

1051

if (

1053

not self.revlog.delta_config.sparse_revlog

1052

not self.revlog.delta_config.sparse_revlog

1054

and maxdist < deltainfo.distance

1053

and maxdist < deltainfo.distance

1055

):

1054

):

1056

return False

1055

return False

1057

1056

1058

# Bad delta from new delta size:

1057

# Bad delta from new delta size:

1059

#

1058

#

1060

# If the delta size is larger than the target text, storing the delta

1059

# If the delta size is larger than the target text, storing the delta

1061

# will be inefficient.

1060

# will be inefficient.

1062

if textlen < deltainfo.deltalen:

1061

if textlen < deltainfo.deltalen:

1063

return False

1062

return False

1064

1063

1065

# Bad delta from cumulated payload size:

1064

# Bad delta from cumulated payload size:

1066

#

1065

#

1067

# If the sum of delta get larger than K * target text length.

1066

# If the sum of delta get larger than K * target text length.

1068

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

1067

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

1069

return False

1068

return False

1070

1069

1071

# Bad delta from chain length:

1070

# Bad delta from chain length:

1072

#

1071

#

1073

# If the number of delta in the chain gets too high.

1072

# If the number of delta in the chain gets too high.

1074

if (

1073

if (

1075

self.revlog.delta_config.max_chain_len

1074

self.revlog.delta_config.max_chain_len

1076

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

1075

and self.revlog.delta_config.max_chain_len < deltainfo.chainlen

1077

):

1076

):

1078

return False

1077

return False

1079

1078

1080

# bad delta from intermediate snapshot size limit

1079

# bad delta from intermediate snapshot size limit

1081

#

1080

#

1082

# If an intermediate snapshot size is higher than the limit. The

1081

# If an intermediate snapshot size is higher than the limit. The

1083

# limit exist to prevent endless chain of intermediate delta to be

1082

# limit exist to prevent endless chain of intermediate delta to be

1084

# created.

1083

# created.

1085

if (

1084

if (

1086

deltainfo.snapshotdepth is not None

1085

deltainfo.snapshotdepth is not None

1087

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

1086

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

1088

):

1087

):

1089

return False

1088

return False

1090

1089

1091

# bad delta if new intermediate snapshot is larger than the previous

1090

# bad delta if new intermediate snapshot is larger than the previous

1092

# snapshot

1091

# snapshot

1093

if (

1092

if (

1094

deltainfo.snapshotdepth

1093

deltainfo.snapshotdepth

1095

and self.revlog.length(deltainfo.base) < deltainfo.deltalen

1094

and self.revlog.length(deltainfo.base) < deltainfo.deltalen

1096

):

1095

):

1097

return False

1096

return False

1098

1097

1099

return True

1098

return True

1100

1099

1101

1100

1102

class SnapshotCache:

1101

class SnapshotCache:

1103

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1102

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1104

1103

1105

def __init__(self):

1104

def __init__(self):

1106

self.snapshots = collections.defaultdict(set)

1105

self.snapshots = collections.defaultdict(set)

1107

self._start_rev = None

1106

self._start_rev = None

1108

self._end_rev = None

1107

self._end_rev = None

1109

1108

1110

def update(self, revlog, start_rev=0):

1109

def update(self, revlog, start_rev=0):

1111

"""find snapshots from start_rev to tip"""

1110

"""find snapshots from start_rev to tip"""

1112

nb_revs = len(revlog)

1111

nb_revs = len(revlog)

1113

end_rev = nb_revs - 1

1112

end_rev = nb_revs - 1

1114

if start_rev > end_rev:

1113

if start_rev > end_rev:

1115

return # range is empty

1114

return # range is empty

1116

1115

1117

if self._start_rev is None:

1116

if self._start_rev is None:

1118

assert self._end_rev is None

1117

assert self._end_rev is None

1119

self._update(revlog, start_rev, end_rev)

1118

self._update(revlog, start_rev, end_rev)

1120

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1119

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1121

if start_rev < self._start_rev:

1120

if start_rev < self._start_rev:

1122

self._update(revlog, start_rev, self._start_rev - 1)

1121

self._update(revlog, start_rev, self._start_rev - 1)

1123

if self._end_rev < end_rev:

1122

if self._end_rev < end_rev:

1124

self._update(revlog, self._end_rev + 1, end_rev)

1123

self._update(revlog, self._end_rev + 1, end_rev)

1125

1124

1126

if self._start_rev is None:

1125

if self._start_rev is None:

1127

assert self._end_rev is None

1126

assert self._end_rev is None

1128

self._end_rev = end_rev

1127

self._end_rev = end_rev

1129

self._start_rev = start_rev

1128

self._start_rev = start_rev

1130

else:

1129

else:

1131

self._start_rev = min(self._start_rev, start_rev)

1130

self._start_rev = min(self._start_rev, start_rev)

1132

self._end_rev = max(self._end_rev, end_rev)

1131

self._end_rev = max(self._end_rev, end_rev)

1133

assert self._start_rev <= self._end_rev, (

1132

assert self._start_rev <= self._end_rev, (

1134

self._start_rev,

1133

self._start_rev,

1135

self._end_rev,

1134

self._end_rev,

1136

)

1135

)

1137

1136

1138

def _update(self, revlog, start_rev, end_rev):

1137

def _update(self, revlog, start_rev, end_rev):

1139

"""internal method that actually do update content"""

1138

"""internal method that actually do update content"""

1140

assert self._start_rev is None or (

1139

assert self._start_rev is None or (

1141

start_rev < self._start_rev or start_rev > self._end_rev

1140

start_rev < self._start_rev or start_rev > self._end_rev

1142

), (self._start_rev, self._end_rev, start_rev, end_rev)

1141

), (self._start_rev, self._end_rev, start_rev, end_rev)

1143

assert self._start_rev is None or (

1142

assert self._start_rev is None or (

1144

end_rev < self._start_rev or end_rev > self._end_rev

1143

end_rev < self._start_rev or end_rev > self._end_rev

1145

), (self._start_rev, self._end_rev, start_rev, end_rev)

1144

), (self._start_rev, self._end_rev, start_rev, end_rev)

1146

cache = self.snapshots

1145

cache = self.snapshots

1147

if hasattr(revlog.index, 'findsnapshots'):

1146

if hasattr(revlog.index, 'findsnapshots'):

1148

revlog.index.findsnapshots(cache, start_rev, end_rev)

1147

revlog.index.findsnapshots(cache, start_rev, end_rev)

1149

else:

1148

else:

1150

deltaparent = revlog.deltaparent

1149

deltaparent = revlog.deltaparent

1151

issnapshot = revlog.issnapshot

1150

issnapshot = revlog.issnapshot

1152

for rev in revlog.revs(start_rev, end_rev):

1151

for rev in revlog.revs(start_rev, end_rev):

1153

if issnapshot(rev):

1152

if issnapshot(rev):

1154

cache[deltaparent(rev)].add(rev)

1153

cache[deltaparent(rev)].add(rev)

1155

1154

1156

1155

1157

class deltacomputer:

1156

class deltacomputer:

1158

"""object capable of computing delta and finding delta for multiple revision

1157

"""object capable of computing delta and finding delta for multiple revision

1159

1158

1160

This object is meant to compute and find multiple delta applied to the same

1159

This object is meant to compute and find multiple delta applied to the same

1161

revlog.

1160

revlog.

1162

"""

1161

"""

1163

1162

1164

def __init__(

1163

def __init__(

1165

self,

1164

self,

1166

revlog,

1165

revlog,

1167

write_debug=None,

1166

write_debug=None,

1168

debug_search=False,

1167

debug_search=False,

1169

debug_info=None,

1168

debug_info=None,

1170

):

1169

):

1171

self.revlog = revlog

1170

self.revlog = revlog

1172

self._write_debug = write_debug

1171

self._write_debug = write_debug

1173

if write_debug is None:

1172

if write_debug is None:

1174

self._debug_search = False

1173

self._debug_search = False

1175

else:

1174

else:

1176

self._debug_search = debug_search

1175

self._debug_search = debug_search

1177

self._debug_info = debug_info

1176

self._debug_info = debug_info

1178

self._snapshot_cache = SnapshotCache()

1177

self._snapshot_cache = SnapshotCache()

1179

1178

1180

@property

1179

@property

1181

def _gather_debug(self):

1180

def _gather_debug(self):

1182

return self._write_debug is not None or self._debug_info is not None

1181

return self._write_debug is not None or self._debug_info is not None

1183

1182

1184

def buildtext(self, revinfo):

1183

def buildtext(self, revinfo):

1185

"""Builds a fulltext version of a revision

1184

"""Builds a fulltext version of a revision

1186

1185

1187

revinfo: revisioninfo instance that contains all needed info

1186

revinfo: revisioninfo instance that contains all needed info

1188

"""

1187

"""

1189

btext = revinfo.btext

1188

btext = revinfo.btext

1190

if btext[0] is not None:

1189

if btext[0] is not None:

1191

return btext[0]

1190

return btext[0]

1192

1191

1193

revlog = self.revlog

1192

revlog = self.revlog

1194

cachedelta = revinfo.cachedelta

1193

cachedelta = revinfo.cachedelta

1195

baserev = cachedelta[0]

1194

baserev = cachedelta[0]

1196

delta = cachedelta[1]

1195

delta = cachedelta[1]

1197

1196

1198

fulltext = btext[0] = _textfromdelta(

1197

fulltext = btext[0] = _textfromdelta(

1199

revlog,

1198

revlog,

1200

baserev,

1199

baserev,

1201

delta,

1200

delta,

1202

revinfo.p1,

1201

revinfo.p1,

1203

revinfo.p2,

1202

revinfo.p2,

1204

revinfo.flags,

1203

revinfo.flags,

1205

revinfo.node,

1204

revinfo.node,

1206

)

1205

)

1207

return fulltext

1206

return fulltext

1208

1207

1209

def _builddeltadiff(self, base, revinfo):

1208

def _builddeltadiff(self, base, revinfo):

1210

revlog = self.revlog

1209

revlog = self.revlog

1211

t = self.buildtext(revinfo)

1210

t = self.buildtext(revinfo)

1212

if revlog.iscensored(base):

1211

if revlog.iscensored(base):

1213

# deltas based on a censored revision must replace the

1212

# deltas based on a censored revision must replace the

1214

# full content in one patch, so delta works everywhere

1213

# full content in one patch, so delta works everywhere

1215

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1214

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1216

delta = header + t

1215

delta = header + t

1217

else:

1216

else:

1218

ptext = revlog.rawdata(base)

1217

ptext = revlog.rawdata(base)

1219

delta = mdiff.textdiff(ptext, t)

1218

delta = mdiff.textdiff(ptext, t)

1220

1219

1221

return delta

1220

return delta

1222

1221

1223

def _builddeltainfo(self, revinfo, base, target_rev=None):

1222

def _builddeltainfo(self, revinfo, base, target_rev=None):

1224

# can we use the cached delta?

1223

# can we use the cached delta?

1225

revlog = self.revlog

1224

revlog = self.revlog

1226

chainbase = revlog.chainbase(base)

1225

chainbase = revlog.chainbase(base)

1227

if revlog.delta_config.general_delta:

1226

if revlog.delta_config.general_delta:

1228

deltabase = base

1227

deltabase = base

1229

else:

1228

else:

1230

if target_rev is not None and base != target_rev - 1:

1229

if target_rev is not None and base != target_rev - 1:

1231

msg = (

1230

msg = (

1232

b'general delta cannot use delta for something else '

1231

b'general delta cannot use delta for something else '

1233

b'than `prev`: %d<-%d'

1232

b'than `prev`: %d<-%d'

1234

)

1233

)

1235

msg %= (base, target_rev)

1234

msg %= (base, target_rev)

1236

raise error.ProgrammingError(msg)

1235

raise error.ProgrammingError(msg)

1237

deltabase = chainbase

1236

deltabase = chainbase

1238

snapshotdepth = None

1237

snapshotdepth = None

1239

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1238

if revlog.delta_config.sparse_revlog and deltabase == nullrev:

1240

snapshotdepth = 0

1239

snapshotdepth = 0

1241

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1240

elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):

1242

# A delta chain should always be one full snapshot,

1241

# A delta chain should always be one full snapshot,

1243

# zero or more semi-snapshots, and zero or more deltas

1242

# zero or more semi-snapshots, and zero or more deltas

1244

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1243

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1245

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1244

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1246

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1245

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1247

delta = None

1246

delta = None

1248

if revinfo.cachedelta:

1247

if revinfo.cachedelta:

1249

cachebase = revinfo.cachedelta[0]

1248

cachebase = revinfo.cachedelta[0]

1250

# check if the diff still apply

1249

# check if the diff still apply

1251

currentbase = cachebase

1250

currentbase = cachebase

1252

while (

1251

while (

1253

currentbase != nullrev

1252

currentbase != nullrev

1254

and currentbase != base

1253

and currentbase != base

1255

and self.revlog.length(currentbase) == 0

1254

and self.revlog.length(currentbase) == 0

1256

):

1255

):

1257

currentbase = self.revlog.deltaparent(currentbase)

1256

currentbase = self.revlog.deltaparent(currentbase)

1258

if self.revlog.delta_config.lazy_delta and currentbase == base:

1257

if self.revlog.delta_config.lazy_delta and currentbase == base:

1259

delta = revinfo.cachedelta[1]

1258

delta = revinfo.cachedelta[1]

1260

if delta is None:

1259

if delta is None:

1261

delta = self._builddeltadiff(base, revinfo)

1260

delta = self._builddeltadiff(base, revinfo)

1262

if self._debug_search:

1261

if self._debug_search:

1263

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1262

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1264

msg %= len(delta)

1263

msg %= len(delta)

1265

self._write_debug(msg)

1264

self._write_debug(msg)

1266

# snapshotdept need to be neither None nor 0 level snapshot

1265

# snapshotdept need to be neither None nor 0 level snapshot

1267

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1266

if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:

1268

lowestrealisticdeltalen = (

1267

lowestrealisticdeltalen = (

1269

len(delta) // revlog.delta_config.upper_bound_comp

1268

len(delta) // revlog.delta_config.upper_bound_comp

1270

)

1269

)

1271

snapshotlimit = revinfo.textlen >> snapshotdepth

1270

snapshotlimit = revinfo.textlen >> snapshotdepth

1272

if self._debug_search:

1271

if self._debug_search:

1273

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1272

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1274

msg %= lowestrealisticdeltalen

1273

msg %= lowestrealisticdeltalen

1275

self._write_debug(msg)

1274

self._write_debug(msg)

1276

if snapshotlimit < lowestrealisticdeltalen:

1275

if snapshotlimit < lowestrealisticdeltalen:

1277

if self._debug_search:

1276

if self._debug_search:

1278

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1277

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1279

self._write_debug(msg)

1278

self._write_debug(msg)

1280

return None

1279

return None

1281

if revlog.length(base) < lowestrealisticdeltalen:

1280

if revlog.length(base) < lowestrealisticdeltalen:

1282

if self._debug_search:

1281

if self._debug_search:

1283

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1282

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1284

self._write_debug(msg)

1283

self._write_debug(msg)

1285

return None

1284

return None

1286

header, data = revlog._inner.compress(delta)

1285

header, data = revlog._inner.compress(delta)

1287

deltalen = len(header) + len(data)

1286

deltalen = len(header) + len(data)

1288

offset = revlog.end(len(revlog) - 1)

1287

offset = revlog.end(len(revlog) - 1)

1289

dist = deltalen + offset - revlog.start(chainbase)

1288

dist = deltalen + offset - revlog.start(chainbase)

1290

chainlen, compresseddeltalen = revlog._chaininfo(base)

1289

chainlen, compresseddeltalen = revlog._chaininfo(base)

1291

chainlen += 1

1290

chainlen += 1

1292

compresseddeltalen += deltalen

1291

compresseddeltalen += deltalen

1293

1292

1294

return _deltainfo(

1293

return _deltainfo(

1295

dist,

1294

dist,

1296

deltalen,

1295

deltalen,

1297

(header, data),

1296

(header, data),

1298

deltabase,

1297

deltabase,

1299

chainbase,

1298

chainbase,

1300

chainlen,

1299

chainlen,

1301

compresseddeltalen,

1300

compresseddeltalen,

1302

snapshotdepth,

1301

snapshotdepth,

1303

)

1302

)

1304

1303

1305

def _fullsnapshotinfo(self, revinfo, curr):

1304

def _fullsnapshotinfo(self, revinfo, curr):

1306

rawtext = self.buildtext(revinfo)

1305

rawtext = self.buildtext(revinfo)

1307

data = self.revlog._inner.compress(rawtext)

1306

data = self.revlog._inner.compress(rawtext)

1308

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1307

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1309

deltabase = chainbase = curr

1308

deltabase = chainbase = curr

1310

snapshotdepth = 0

1309

snapshotdepth = 0

1311

chainlen = 1

1310

chainlen = 1

1312

1311

1313

return _deltainfo(

1312

return _deltainfo(

1314

dist,

1313

dist,

1315

deltalen,

1314

deltalen,

1316

data,

1315

data,

1317

deltabase,

1316

deltabase,

1318

chainbase,

1317

chainbase,

1319

chainlen,

1318

chainlen,

1320

compresseddeltalen,

1319

compresseddeltalen,

1321

snapshotdepth,

1320

snapshotdepth,

1322

)

1321

)

1323

1322

1324

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1323

def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):

1325

"""Find an acceptable delta against a candidate revision

1324

"""Find an acceptable delta against a candidate revision

1326

1325

1327

revinfo: information about the revision (instance of _revisioninfo)

1326

revinfo: information about the revision (instance of _revisioninfo)

1328

1327

1329

Returns the first acceptable candidate revision, as ordered by

1328

Returns the first acceptable candidate revision, as ordered by

1330

_candidategroups

1329

_candidategroups

1331

1330

1332

If no suitable deltabase is found, we return delta info for a full

1331

If no suitable deltabase is found, we return delta info for a full

1333

snapshot.

1332

snapshot.

1334

1333

1335

`excluded_bases` is an optional set of revision that cannot be used as

1334

`excluded_bases` is an optional set of revision that cannot be used as

1336

a delta base. Use this to recompute delta suitable in censor or strip

1335

a delta base. Use this to recompute delta suitable in censor or strip

1337

context.

1336

context.

1338

"""

1337

"""

1339

if target_rev is None:

1338

if target_rev is None:

1340

target_rev = len(self.revlog)

1339

target_rev = len(self.revlog)

1341

1340

1342

gather_debug = self._gather_debug

1341

gather_debug = self._gather_debug

1343

cachedelta = revinfo.cachedelta

1342

cachedelta = revinfo.cachedelta

1344

revlog = self.revlog

1343

revlog = self.revlog

1345

p1r = p2r = None

1344

p1r = p2r = None

1346

1345

1347

if excluded_bases is None:

1346

if excluded_bases is None:

1348

excluded_bases = set()

1347

excluded_bases = set()

1349

1348

1350

if gather_debug:

1349

if gather_debug:

1351

start = util.timer()

1350

start = util.timer()

1352

dbg = self._one_dbg_data()

1351

dbg = self._one_dbg_data()

1353

dbg['revision'] = target_rev

1352

dbg['revision'] = target_rev

1354

p1r = revlog.rev(revinfo.p1)

1353

p1r = revlog.rev(revinfo.p1)

1355

p2r = revlog.rev(revinfo.p2)

1354

p2r = revlog.rev(revinfo.p2)

1356

if p1r != nullrev:

1355

if p1r != nullrev:

1357

p1_chain_len = revlog._chaininfo(p1r)[0]

1356

p1_chain_len = revlog._chaininfo(p1r)[0]

1358

else:

1357

else:

1359

p1_chain_len = -1

1358

p1_chain_len = -1

1360

if p2r != nullrev:

1359

if p2r != nullrev:

1361

p2_chain_len = revlog._chaininfo(p2r)[0]

1360

p2_chain_len = revlog._chaininfo(p2r)[0]

1362

else:

1361

else:

1363

p2_chain_len = -1

1362

p2_chain_len = -1

1364

dbg['p1-chain-len'] = p1_chain_len

1363

dbg['p1-chain-len'] = p1_chain_len

1365

dbg['p2-chain-len'] = p2_chain_len

1364

dbg['p2-chain-len'] = p2_chain_len

1366

1365

1367

# 1) if the revision is empty, no amount of delta can beat it

1366

# 1) if the revision is empty, no amount of delta can beat it

1368

#

1367

#

1369

# 2) no delta for flag processor revision (see "candelta" for why)

1368

# 2) no delta for flag processor revision (see "candelta" for why)

1370

# not calling candelta since only one revision needs test, also to

1369

# not calling candelta since only one revision needs test, also to

1371

# avoid overhead fetching flags again.

1370

# avoid overhead fetching flags again.

1372

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1371

if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1373

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1372

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1374

if gather_debug:

1373

if gather_debug:

1375

end = util.timer()

1374

end = util.timer()

1376

dbg['duration'] = end - start

1375

dbg['duration'] = end - start

1377

dbg[

1376

dbg[

1378

'delta-base'

1377

'delta-base'

1379

] = deltainfo.base # pytype: disable=attribute-error

1378

] = deltainfo.base # pytype: disable=attribute-error

1380

dbg['search_round_count'] = 0

1379

dbg['search_round_count'] = 0

1381

dbg['using-cached-base'] = False

1380

dbg['using-cached-base'] = False

1382

dbg['delta_try_count'] = 0

1381

dbg['delta_try_count'] = 0

1383

dbg['type'] = b"full"

1382

dbg['type'] = b"full"

1384

dbg['snapshot-depth'] = 0

1383

dbg['snapshot-depth'] = 0

1385

self._dbg_process_data(dbg)

1384

self._dbg_process_data(dbg)

1386

return deltainfo

1385

return deltainfo

1387

1386

1388

deltainfo = None

1387

deltainfo = None

1389

1388

1390

# If this source delta are to be forcibly reuse, let us comply early.

1389

# If this source delta are to be forcibly reuse, let us comply early.

1391

if (

1390

if (

1392

revlog.delta_config.general_delta

1391

revlog.delta_config.general_delta

1393

and revinfo.cachedelta is not None

1392

and revinfo.cachedelta is not None

1394

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1393

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

1395

):

1394

):

1396

base = revinfo.cachedelta[0]

1395

base = revinfo.cachedelta[0]

1397

if base == nullrev:

1396

if base == nullrev:

1398

dbg_type = b"full"

1397

dbg_type = b"full"

1399

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1398

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1400

if gather_debug:

1399

if gather_debug:

1401

snapshotdepth = 0

1400

snapshotdepth = 0

1402

elif base not in excluded_bases:

1401

elif base not in excluded_bases:

1403

delta = revinfo.cachedelta[1]

1402

delta = revinfo.cachedelta[1]

1404

header, data = revlog.compress(delta)

1403

header, data = revlog.compress(delta)

1405

deltalen = len(header) + len(data)

1404

deltalen = len(header) + len(data)

1406

if gather_debug:

1405

if gather_debug:

1407

offset = revlog.end(len(revlog) - 1)

1406

offset = revlog.end(len(revlog) - 1)

1408

chainbase = revlog.chainbase(base)

1407

chainbase = revlog.chainbase(base)

1409

distance = deltalen + offset - revlog.start(chainbase)

1408

distance = deltalen + offset - revlog.start(chainbase)

1410

chainlen, compresseddeltalen = revlog._chaininfo(base)

1409

chainlen, compresseddeltalen = revlog._chaininfo(base)

1411

chainlen += 1

1410

chainlen += 1

1412

compresseddeltalen += deltalen

1411

compresseddeltalen += deltalen

1413

if base == p1r or base == p2r:

1412

if base == p1r or base == p2r:

1414

dbg_type = b"delta"

1413

dbg_type = b"delta"

1415

snapshotdepth = None

1414

snapshotdepth = None

1416

elif not revlog.issnapshot(base):

1415

elif not revlog.issnapshot(base):

1417

snapshotdepth = None

1416

snapshotdepth = None

1418

else:

1417

else:

1419

dbg_type = b"snapshot"

1418

dbg_type = b"snapshot"

1420

snapshotdepth = revlog.snapshotdepth(base) + 1

1419

snapshotdepth = revlog.snapshotdepth(base) + 1

1421

else:

1420

else:

1422

distance = None

1421

distance = None

1423

chainbase = None

1422

chainbase = None

1424

chainlen = None

1423

chainlen = None

1425

compresseddeltalen = None

1424

compresseddeltalen = None

1426

snapshotdepth = None

1425

snapshotdepth = None

1427

deltainfo = _deltainfo(

1426

deltainfo = _deltainfo(

1428

distance=distance,

1427

distance=distance,

1429

deltalen=deltalen,

1428

deltalen=deltalen,

1430

data=(header, data),

1429

data=(header, data),

1431

base=base,

1430

base=base,

1432

chainbase=chainbase,

1431

chainbase=chainbase,

1433

chainlen=chainlen,

1432

chainlen=chainlen,

1434

compresseddeltalen=compresseddeltalen,

1433

compresseddeltalen=compresseddeltalen,

1435

snapshotdepth=snapshotdepth,

1434

snapshotdepth=snapshotdepth,

1436

)

1435

)

1437

1436

1438

if deltainfo is not None:

1437

if deltainfo is not None:

1439

if gather_debug:

1438

if gather_debug:

1440

end = util.timer()

1439

end = util.timer()

1441

dbg['duration'] = end - start

1440

dbg['duration'] = end - start

1442

dbg[

1441

dbg[

1443

'delta-base'

1442

'delta-base'

1444

] = deltainfo.base # pytype: disable=attribute-error

1443

] = deltainfo.base # pytype: disable=attribute-error

1445

dbg['search_round_count'] = 0

1444

dbg['search_round_count'] = 0

1446

dbg['using-cached-base'] = True

1445

dbg['using-cached-base'] = True

1447

dbg['delta_try_count'] = 0

1446

dbg['delta_try_count'] = 0

1448

dbg['type'] = b"full"

1447

dbg['type'] = b"full"

1449

if snapshotdepth is None:

1448

if snapshotdepth is None:

1450

dbg['snapshot-depth'] = -1

1449

dbg['snapshot-depth'] = -1

1451

else:

1450

else:

1452

dbg['snapshot-depth'] = snapshotdepth

1451

dbg['snapshot-depth'] = snapshotdepth

1453

self._dbg_process_data(dbg)

1452

self._dbg_process_data(dbg)

1454

return deltainfo

1453

return deltainfo

1455

1454

1456

# count the number of different delta we tried (for debug purpose)

1455

# count the number of different delta we tried (for debug purpose)

1457

dbg_try_count = 0

1456

dbg_try_count = 0

1458

# count the number of "search round" we did. (for debug purpose)

1457

# count the number of "search round" we did. (for debug purpose)

1459

dbg_try_rounds = 0

1458

dbg_try_rounds = 0

1460

dbg_type = b'unknown'

1459

dbg_type = b'unknown'

1461

1460

1462

if p1r is None:

1461

if p1r is None:

1463

p1r = revlog.rev(revinfo.p1)

1462

p1r = revlog.rev(revinfo.p1)

1464

p2r = revlog.rev(revinfo.p2)

1463

p2r = revlog.rev(revinfo.p2)

1465

1464

1466

if self._debug_search:

1465

if self._debug_search:

1467

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1466

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1468

msg %= target_rev

1467

msg %= target_rev

1469

self._write_debug(msg)

1468

self._write_debug(msg)

1470

1469

1471

search = _DeltaSearch(

1470

search = _DeltaSearch(

1472

self.revlog,

1471

self.revlog,

1473

revinfo,

1472

revinfo,

1474

p1r,

1473

p1r,

1475

p2r,

1474

p2r,

1476

cachedelta,

1475

cachedelta,

1477

excluded_bases,

1476

excluded_bases,

1478

target_rev,

1477

target_rev,

1479

snapshot_cache=self._snapshot_cache,

1478

snapshot_cache=self._snapshot_cache,

1480

)

1479

)

1481

1480

1482

while not search.done:

1481

while not search.done:

1483

current_group = search.current_group

1482

current_group = search.current_group

1484

# current_group can be `None`, but not is search.done is False

1483

# current_group can be `None`, but not is search.done is False

1485

# We add this assert to help pytype

1484

# We add this assert to help pytype

1486

assert current_group is not None

1485

assert current_group is not None

1487

candidaterevs = current_group

1486

candidaterevs = current_group

1488

dbg_try_rounds += 1

1487

dbg_try_rounds += 1

1489

if self._debug_search:

1488

if self._debug_search:

1490

prev = None

1489

prev = None

1491

if deltainfo is not None:

1490

if deltainfo is not None:

1492

prev = deltainfo.base

1491

prev = deltainfo.base

1493

1492

1494

if (

1493

if (

1495

cachedelta is not None

1494

cachedelta is not None

1496

and len(candidaterevs) == 1

1495

and len(candidaterevs) == 1

1497

and cachedelta[0] in candidaterevs

1496

and cachedelta[0] in candidaterevs

1498

):

1497

):

1499

round_type = b"cached-delta"

1498

round_type = b"cached-delta"

1500

elif p1r in candidaterevs or p2r in candidaterevs:

1499

elif p1r in candidaterevs or p2r in candidaterevs:

1501

round_type = b"parents"

1500

round_type = b"parents"

1502

elif prev is not None and all(c < prev for c in candidaterevs):

1501

elif prev is not None and all(c < prev for c in candidaterevs):

1503

round_type = b"refine-down"

1502

round_type = b"refine-down"

1504

elif prev is not None and all(c > prev for c in candidaterevs):

1503

elif prev is not None and all(c > prev for c in candidaterevs):

1505

round_type = b"refine-up"

1504

round_type = b"refine-up"

1506

else:

1505

else:

1507

round_type = b"search-down"

1506

round_type = b"search-down"

1508

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1507

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1509

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1508

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1510

self._write_debug(msg)

1509

self._write_debug(msg)

1511

nominateddeltas = []

1510

nominateddeltas = []

1512

if deltainfo is not None:

1511

if deltainfo is not None:

1513

if self._debug_search:

1512

if self._debug_search:

1514

msg = (

1513

msg = (

1515

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1514

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1516

)

1515

)

1517

msg %= (deltainfo.base, deltainfo.deltalen)

1516

msg %= (deltainfo.base, deltainfo.deltalen)

1518

self._write_debug(msg)

1517

self._write_debug(msg)

1519

# if we already found a good delta,

1518

# if we already found a good delta,

1520

# challenge it against refined candidates

1519

# challenge it against refined candidates

1521

nominateddeltas.append(deltainfo)

1520

nominateddeltas.append(deltainfo)

1522

for candidaterev in candidaterevs:

1521

for candidaterev in candidaterevs:

1523

if self._debug_search:

1522

if self._debug_search:

1524

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1523

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1525

msg %= candidaterev

1524

msg %= candidaterev

1526

self._write_debug(msg)

1525

self._write_debug(msg)

1527

candidate_type = None

1526

candidate_type = None

1528

if candidaterev == p1r:

1527

if candidaterev == p1r:

1529

candidate_type = b"p1"

1528

candidate_type = b"p1"

1530

elif candidaterev == p2r:

1529

elif candidaterev == p2r:

1531

candidate_type = b"p2"

1530

candidate_type = b"p2"

1532

elif self.revlog.issnapshot(candidaterev):

1531

elif self.revlog.issnapshot(candidaterev):

1533

candidate_type = b"snapshot-%d"

1532

candidate_type = b"snapshot-%d"

1534

candidate_type %= self.revlog.snapshotdepth(

1533

candidate_type %= self.revlog.snapshotdepth(

1535

candidaterev

1534

candidaterev

1536

)

1535

)

1537

1536

1538

if candidate_type is not None:

1537

if candidate_type is not None:

1539

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1538

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1540

msg %= candidate_type

1539

msg %= candidate_type

1541

self._write_debug(msg)

1540

self._write_debug(msg)

1542

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1541

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1543

msg %= self.revlog.length(candidaterev)

1542

msg %= self.revlog.length(candidaterev)

1544

self._write_debug(msg)

1543

self._write_debug(msg)

1545

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1544

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1546

msg %= self.revlog.deltaparent(candidaterev)

1545

msg %= self.revlog.deltaparent(candidaterev)

1547

self._write_debug(msg)

1546

self._write_debug(msg)

1548

1547

1549

dbg_try_count += 1

1548

dbg_try_count += 1

1550

1549

1551

if self._debug_search:

1550

if self._debug_search:

1552

delta_start = util.timer()

1551

delta_start = util.timer()

1553

candidatedelta = self._builddeltainfo(

1552

candidatedelta = self._builddeltainfo(

1554

revinfo,

1553

revinfo,

1555

candidaterev,

1554

candidaterev,

1556

target_rev=target_rev,

1555

target_rev=target_rev,

1557

)

1556

)

1558

if self._debug_search:

1557

if self._debug_search:

1559

delta_end = util.timer()

1558

delta_end = util.timer()

1560

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1559

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1561

msg %= delta_end - delta_start

1560

msg %= delta_end - delta_start

1562

self._write_debug(msg)

1561

self._write_debug(msg)

1563

if candidatedelta is not None:

1562

if candidatedelta is not None:

1564

if search.is_good_delta_info(candidatedelta):

1563

if search.is_good_delta_info(candidatedelta):

1565

if self._debug_search:

1564

if self._debug_search:

1566

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1565

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1567

msg %= candidatedelta.deltalen

1566

msg %= candidatedelta.deltalen

1568

self._write_debug(msg)

1567

self._write_debug(msg)

1569

nominateddeltas.append(candidatedelta)

1568

nominateddeltas.append(candidatedelta)

1570

elif self._debug_search:

1569

elif self._debug_search:

1571

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1570

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1572

msg %= candidatedelta.deltalen

1571

msg %= candidatedelta.deltalen

1573

self._write_debug(msg)

1572

self._write_debug(msg)

1574

elif self._debug_search:

1573

elif self._debug_search:

1575

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1574

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1576

self._write_debug(msg)

1575

self._write_debug(msg)

1577

if nominateddeltas:

1576

if nominateddeltas:

1578

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1577

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1579

search.next_group(deltainfo)

1578

search.next_group(deltainfo)

1580

1579

1581

if deltainfo is None:

1580

if deltainfo is None:

1582

dbg_type = b"full"

1581

dbg_type = b"full"

1583

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1582

deltainfo = self._fullsnapshotinfo(revinfo, target_rev)

1584

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1583

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1585

dbg_type = b"snapshot"

1584

dbg_type = b"snapshot"

1586

else:

1585

else:

1587

dbg_type = b"delta"

1586

dbg_type = b"delta"

1588

1587

1589

if gather_debug:

1588

if gather_debug:

1590

end = util.timer()

1589

end = util.timer()

1591

if dbg_type == b'full':

1590

if dbg_type == b'full':

1592

used_cached = (

1591

used_cached = (

1593

cachedelta is not None

1592

cachedelta is not None

1594

and dbg_try_rounds == 0

1593

and dbg_try_rounds == 0

1595

and dbg_try_count == 0

1594

and dbg_try_count == 0

1596

and cachedelta[0] == nullrev

1595

and cachedelta[0] == nullrev

1597

)

1596

)

1598

else:

1597

else:

1599

used_cached = (

1598

used_cached = (

1600

cachedelta is not None

1599

cachedelta is not None

1601

and dbg_try_rounds == 1

1600

and dbg_try_rounds == 1

1602

and dbg_try_count == 1

1601

and dbg_try_count == 1

1603

and deltainfo.base == cachedelta[0]

1602

and deltainfo.base == cachedelta[0]

1604

)

1603

)

1605

dbg['duration'] = end - start

1604

dbg['duration'] = end - start

1606

dbg[

1605

dbg[

1607

'delta-base'

1606

'delta-base'

1608

] = deltainfo.base # pytype: disable=attribute-error

1607

] = deltainfo.base # pytype: disable=attribute-error

1609

dbg['search_round_count'] = dbg_try_rounds

1608

dbg['search_round_count'] = dbg_try_rounds

1610

dbg['using-cached-base'] = used_cached

1609

dbg['using-cached-base'] = used_cached

1611

dbg['delta_try_count'] = dbg_try_count

1610

dbg['delta_try_count'] = dbg_try_count

1612

dbg['type'] = dbg_type

1611

dbg['type'] = dbg_type

1613

if (

1612

if (

1614

deltainfo.snapshotdepth # pytype: disable=attribute-error

1613

deltainfo.snapshotdepth # pytype: disable=attribute-error

1615

is not None

1614

is not None

1616

):

1615

):

1617

dbg[

1616

dbg[

1618

'snapshot-depth'

1617

'snapshot-depth'

1619

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1618

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1620

else:

1619

else:

1621

dbg['snapshot-depth'] = -1

1620

dbg['snapshot-depth'] = -1

1622

self._dbg_process_data(dbg)

1621

self._dbg_process_data(dbg)

1623

return deltainfo

1622

return deltainfo

1624

1623

1625

def _one_dbg_data(self):

1624

def _one_dbg_data(self):

1626

dbg = {

1625

dbg = {

1627

'duration': None,

1626

'duration': None,

1628

'revision': None,

1627

'revision': None,

1629

'delta-base': None,

1628

'delta-base': None,

1630

'search_round_count': None,

1629

'search_round_count': None,

1631

'using-cached-base': None,

1630

'using-cached-base': None,

1632

'delta_try_count': None,

1631

'delta_try_count': None,

1633

'type': None,

1632

'type': None,

1634

'p1-chain-len': None,

1633

'p1-chain-len': None,

1635

'p2-chain-len': None,

1634

'p2-chain-len': None,

1636

'snapshot-depth': None,

1635

'snapshot-depth': None,

1637

'target-revlog': None,

1636

'target-revlog': None,

1638

}

1637

}

1639

target_revlog = b"UNKNOWN"

1638

target_revlog = b"UNKNOWN"

1640

target_type = self.revlog.target[0]

1639

target_type = self.revlog.target[0]

1641

target_key = self.revlog.target[1]

1640

target_key = self.revlog.target[1]

1642

if target_type == KIND_CHANGELOG:

1641

if target_type == KIND_CHANGELOG:

1643

target_revlog = b'CHANGELOG:'

1642

target_revlog = b'CHANGELOG:'

1644

elif target_type == KIND_MANIFESTLOG:

1643

elif target_type == KIND_MANIFESTLOG:

1645

target_revlog = b'MANIFESTLOG:'

1644

target_revlog = b'MANIFESTLOG:'

1646

if target_key:

1645

if target_key:

1647

target_revlog += b'%s:' % target_key

1646

target_revlog += b'%s:' % target_key

1648

elif target_type == KIND_FILELOG:

1647

elif target_type == KIND_FILELOG:

1649

target_revlog = b'FILELOG:'

1648

target_revlog = b'FILELOG:'

1650

if target_key:

1649

if target_key:

1651

target_revlog += b'%s:' % target_key

1650

target_revlog += b'%s:' % target_key

1652

dbg['target-revlog'] = target_revlog

1651

dbg['target-revlog'] = target_revlog

1653

return dbg

1652

return dbg

1654

1653

1655

def _dbg_process_data(self, dbg):

1654

def _dbg_process_data(self, dbg):

1656

if self._debug_info is not None:

1655

if self._debug_info is not None:

1657

self._debug_info.append(dbg)

1656

self._debug_info.append(dbg)

1658

1657

1659

if self._write_debug is not None:

1658

if self._write_debug is not None:

1660

msg = (

1659

msg = (

1661

b"DBG-DELTAS:"

1660

b"DBG-DELTAS:"

1662

b" %-12s"

1661

b" %-12s"

1663

b" rev=%d:"

1662

b" rev=%d:"

1664

b" delta-base=%d"

1663

b" delta-base=%d"

1665

b" is-cached=%d"

1664

b" is-cached=%d"

1666

b" - search-rounds=%d"

1665

b" - search-rounds=%d"

1667

b" try-count=%d"

1666

b" try-count=%d"

1668

b" - delta-type=%-6s"

1667

b" - delta-type=%-6s"

1669

b" snap-depth=%d"

1668

b" snap-depth=%d"

1670

b" - p1-chain-length=%d"

1669

b" - p1-chain-length=%d"

1671

b" p2-chain-length=%d"

1670

b" p2-chain-length=%d"

1672

b" - duration=%f"

1671

b" - duration=%f"

1673

b"\n"

1672

b"\n"

1674

)

1673

)

1675

msg %= (

1674

msg %= (

1676

dbg["target-revlog"],

1675

dbg["target-revlog"],

1677

dbg["revision"],

1676

dbg["revision"],

1678

dbg["delta-base"],

1677

dbg["delta-base"],

1679

dbg["using-cached-base"],

1678

dbg["using-cached-base"],

1680

dbg["search_round_count"],

1679

dbg["search_round_count"],

1681

dbg["delta_try_count"],

1680

dbg["delta_try_count"],

1682

dbg["type"],

1681

dbg["type"],

1683

dbg["snapshot-depth"],

1682

dbg["snapshot-depth"],

1684

dbg["p1-chain-len"],

1683

dbg["p1-chain-len"],

1685

dbg["p2-chain-len"],

1684

dbg["p2-chain-len"],

1686

dbg["duration"],

1685

dbg["duration"],

1687

)

1686

)

1688

self._write_debug(msg)

1687

self._write_debug(msg)

1689

1688

1690

1689

1691

def delta_compression(default_compression_header, deltainfo):

1690

def delta_compression(default_compression_header, deltainfo):

1692

"""return (COMPRESSION_MODE, deltainfo)

1691

"""return (COMPRESSION_MODE, deltainfo)

1693

1692

1694

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1693

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1695

compression.

1694

compression.

1696

"""

1695

"""

1697

h, d = deltainfo.data

1696

h, d = deltainfo.data

1698

compression_mode = COMP_MODE_INLINE

1697

compression_mode = COMP_MODE_INLINE

1699

if not h and not d:

1698

if not h and not d:

1700

# not data to store at all... declare them uncompressed

1699

# not data to store at all... declare them uncompressed

1701

compression_mode = COMP_MODE_PLAIN

1700

compression_mode = COMP_MODE_PLAIN

1702

elif not h:

1701

elif not h:

1703

t = d[0:1]

1702

t = d[0:1]

1704

if t == b'\0':

1703

if t == b'\0':

1705

compression_mode = COMP_MODE_PLAIN

1704

compression_mode = COMP_MODE_PLAIN

1706

elif t == default_compression_header:

1705

elif t == default_compression_header:

1707

compression_mode = COMP_MODE_DEFAULT

1706

compression_mode = COMP_MODE_DEFAULT

1708

elif h == b'u':

1707

elif h == b'u':

1709

# we have a more efficient way to declare uncompressed

1708

# we have a more efficient way to declare uncompressed

1710

h = b''

1709

h = b''

1711

compression_mode = COMP_MODE_PLAIN

1710

compression_mode = COMP_MODE_PLAIN

1712

deltainfo = drop_u_compression(deltainfo)

1711

deltainfo = drop_u_compression(deltainfo)

1713

return compression_mode, deltainfo

1712

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     from .. import revlog
                     self._data = data
                     self.data_config = revlog.DataConfig()
                     self.data_config.sr_density_threshold = density
                     self.data_config.sr_min_gap_size = mingap
                     self.delta_config = revlog.DeltaConfig()
                     self.feature_config = revlog.FeatureConfig()
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
                 than `revlog.data_config.sr_min_gap_size` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog.data_config.sr_min_gap_size`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs,
                     revlog.data_config.sr_density_threshold,
                     revlog.data_config.sr_min_gap_size,
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             class _DeltaSearch:
                 """perform the search of a good delta for a single revlog revision
                 note: some of the deltacomputer.finddeltainfo logic should probably move
                 here.
                 """
                 def __init__(
                     self,
                     revlog,
                     revinfo,
                     p1,
                     p2,
                     cachedelta,
                     excluded_bases=None,
                     target_rev=None,
                     snapshot_cache=None,
                 ):
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         cachedelta is None
                         or cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not revlog.delta_config.general_delta
                     )
                     self.revlog = revlog
                     self.revinfo = revinfo
                     self.textlen = revinfo.textlen
                     self.p1 = p1
                     self.p2 = p2
                     self.cachedelta = cachedelta
                     self.excluded_bases = excluded_bases
                     if target_rev is None:
                         self.target_rev = len(self.revlog)
                     self.target_rev = target_rev
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     self.snapshot_cache = snapshot_cache
                     self.tested = {nullrev}
                     self._candidates_iterator = self._candidate_groups()
                     self._last_good = None
                     self.current_group = self._candidates_iterator.send(self._last_good)
                 @property
                 def done(self):
                     """True when all possible candidate have been tested"""
                     return self.current_group is None
                 def next_group(self, good_delta=None):
                     """move to the next group to test
                     The group of revision to test will be available in
                     `self.current_group`.  If the previous group had any good delta, the
                     best one can be passed as the `good_delta` parameter to help selecting
                     the next group.
                     If not revision remains to be, `self.done` will be True and
                     `self.current_group` will be None.
                     """
                     if good_delta is not None:
                         self._last_good = good_delta.base
                     self.current_group = self._candidates_iterator.send(self._last_good)
                 def _candidate_groups(self):
                     """Provides group of revision to be tested as delta base
                     This top level function focus on emitting groups with unique and
                     worthwhile content. See _raw_candidate_groups for details about the
                     group order.
                     """
                     # should we try to build a delta?
                     if not (len(self.revlog) and self.revlog._storedeltachains):
                         yield None
                         return
                     if not self.revlog.delta_config.general_delta:
                         # before general delta, there is only one possible delta base
                         yield (self.target_rev - 1,)
                         yield None
                         return
                     good = None
                     group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
                     tested = self.tested  # prefetch for speed and code compactness
                     candidates = self._refined_groups()
                     while True:
                         temptative = candidates.send(good)
                         if temptative is None:
                             break
                         group = self._pre_filter_candidate_revs(temptative)
                         if group:
                             # When the size of the candidate group is big, it can result in
                             # a quite significant performance impact. To reduce this, we
                             # can send them in smaller batches until the new batch does not
                             # provide any improvements.
                             #
                             # This might reduce the overall efficiency of the compression
                             # in some corner cases, but that should also prevent very
                             # pathological cases from being an issue. (eg. 20 000
                             # candidates).
                             #
                             # XXX note that the ordering of the group becomes important as
                             # it now impacts the final result. The current order is
                             # unprocessed and can be improved.
                             if group_chunk_size == 0:
                                 tested.update(group)
                                 good = yield tuple(group)
                             else:
                                 prev_good = good
                                 for start in range(0, len(group), group_chunk_size):
                                     sub_group = group[start : start + group_chunk_size]
                                     tested.update(sub_group)
                                     good = yield tuple(sub_group)
                                     if prev_good == good:
                                         break
                     yield None
                 def _pre_filter_candidate_revs(self, temptative):
                     """filter possible candidate before computing a delta
                     This function use various criteria to pre-filter candidate delta base
                     before we compute a delta and evaluate its quality.
                     Such pre-filter limit the number of computed delta, an expensive operation.
                     return the updated list of revision to test
                     """
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     tested = self.tested
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         if self._pre_filter_rev(rev):
                             group.append(rev)
                         else:
                             self.tested.add(rev)
                     return group
                 def _pre_filter_rev(self, rev):
                     """return True if it seems okay to test a rev, False otherwise"""
-                    if True:
+                    # no need to try a delta against nullrev, this will be done as
-                        # no need to try a delta against nullrev, this will be done as
+                    # a last resort.
-                        # a last resort.
+                    if rev == nullrev:
-                        if rev == nullrev:
+                        return False
-                            return False
+                    # filter out revision we tested already
-                        # filter out revision we tested already
+                    if rev in self.tested:
-                        if rev in self.tested:
+                        return False
-                            return False
-                        # an higher authority deamed the base unworthy (e.g. censored)
+                    # an higher authority deamed the base unworthy (e.g. censored)
-                        if self.excluded_bases is not None and rev in self.excluded_bases:
+                    if self.excluded_bases is not None and rev in self.excluded_bases:
-                            return False
+                        return False
-                        # We are in some recomputation cases and that rev is too high
+                    # We are in some recomputation cases and that rev is too high
-                        # in the revlog
+                    # in the revlog
-                        if self.target_rev is not None and rev >= self.target_rev:
+                    if self.target_rev is not None and rev >= self.target_rev:
-                            return False
+                        return False
-                        deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
+                    deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
-                        # filter out delta base that will never produce good delta
+                    # filter out delta base that will never produce good delta
+                    #
-                        # if the delta of that base is already bigger than the limit
+                    # if the delta of that base is already bigger than the limit
-                        # for the delta chain size, doing a delta is hopeless.
+                    # for the delta chain size, doing a delta is hopeless.
-                        if deltas_limit < self.revlog.length(rev):
+                    if deltas_limit < self.revlog.length(rev):
-                            return False
+                        return False
-                        sparse = self.revlog.delta_config.sparse_revlog
+                    sparse = self.revlog.delta_config.sparse_revlog
-                        # if the revision we test again is too small, the resulting delta
+                    # if the revision we test again is too small, the resulting delta
-                        # will be large anyway as that amount of data to be added is big
+                    # will be large anyway as that amount of data to be added is big
-                        if sparse and self.revlog.rawsize(rev) < (
+                    if sparse and self.revlog.rawsize(rev) < (
-                            self.textlen // LIMIT_BASE2TEXT
+                        self.textlen // LIMIT_BASE2TEXT
-                        ):
+                    ):
-                            return False
+                        return False
-                        # no delta for rawtext-changing revs (see "candelta" for why)
+                    # no delta for rawtext-changing revs (see "candelta" for why)
-                        if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
+                    if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
-                            return False
+                        return False
-                        # If we reach here, we are about to build and test a delta.
+                    # If we reach here, we are about to build and test a delta.
-                        # The delta building process will compute the chaininfo in all
+                    # The delta building process will compute the chaininfo in all
-                        # case, since that computation is cached, it is fine to access
+                    # case, since that computation is cached, it is fine to access
-                        # it here too.
+                    # it here too.
-                        chainlen, chainsize = self.revlog._chaininfo(rev)
+                    chainlen, chainsize = self.revlog._chaininfo(rev)
-                        # if chain will be too long, skip base
+                    # if chain will be too long, skip base
-                        if (
+                    if (
-                            self.revlog.delta_config.max_chain_len
+                        self.revlog.delta_config.max_chain_len
-                            and chainlen >= self.revlog.delta_config.max_chain_len
+                        and chainlen >= self.revlog.delta_config.max_chain_len
-                        ):
+                    ):
-                            return False
+                        return False
-                        # if chain already have too much data, skip base
+                    # if chain already have too much data, skip base
-                        if deltas_limit < chainsize:
+                    if deltas_limit < chainsize:
-                            return False
+                        return False
-                        if sparse and self.revlog.delta_config.upper_bound_comp is not None:
+                    if sparse and self.revlog.delta_config.upper_bound_comp is not None:
-                            maxcomp = self.revlog.delta_config.upper_bound_comp
+                        maxcomp = self.revlog.delta_config.upper_bound_comp
-                            basenotsnap = (self.p1, self.p2, nullrev)
+                        basenotsnap = (self.p1, self.p2, nullrev)
-                            if rev not in basenotsnap and self.revlog.issnapshot(rev):
+                        if rev not in basenotsnap and self.revlog.issnapshot(rev):
-                                snapshotdepth = self.revlog.snapshotdepth(rev)
+                            snapshotdepth = self.revlog.snapshotdepth(rev)
-                                # If text is significantly larger than the base, we can
+                            # If text is significantly larger than the base, we can
-                                # expect the resulting delta to be proportional to the size
+                            # expect the resulting delta to be proportional to the size
-                                # difference
+                            # difference
-                                revsize = self.revlog.rawsize(rev)
+                            revsize = self.revlog.rawsize(rev)
-                                rawsizedistance = max(self.textlen - revsize, 0)
+                            rawsizedistance = max(self.textlen - revsize, 0)
-                                # use an estimate of the compression upper bound.
+                            # use an estimate of the compression upper bound.
-                                lowestrealisticdeltalen = rawsizedistance // maxcomp
+                            lowestrealisticdeltalen = rawsizedistance // maxcomp
-                                # check the absolute constraint on the delta size
+                            # check the absolute constraint on the delta size
-                                snapshotlimit = self.textlen >> snapshotdepth
+                            snapshotlimit = self.textlen >> snapshotdepth
-                                if snapshotlimit < lowestrealisticdeltalen:
+                            if snapshotlimit < lowestrealisticdeltalen:
-                                    # delta lower bound is larger than accepted upper
+                                # delta lower bound is larger than accepted upper
-                                    # bound
+                                # bound
-                                    return False
+                                return False
-                                # check the relative constraint on the delta size
+                            # check the relative constraint on the delta size
-                                revlength = self.revlog.length(rev)
+                            revlength = self.revlog.length(rev)
-                                if revlength < lowestrealisticdeltalen:
+                            if revlength < lowestrealisticdeltalen:
-                                    # delta probable lower bound is larger than target
+                                # delta probable lower bound is larger than target
-                                    # base
+                                # base
-                                    return False
+                                return False
                     return True
                 def _refined_groups(self):
                     good = None
                     # First we try to reuse a the delta contained in the bundle.  (or from
                     # the source revlog)
                     #
                     # This logic only applies to general delta repositories and can be
                     # disabled through configuration. Disabling reuse source delta is
                     # useful when we want to make sure we recomputed "optimal" deltas.
                     debug_info = None
                     if (
                         self.cachedelta is not None
                         and self.cachedelta[2] > DELTA_BASE_REUSE_NO
                     ):
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         if debug_info is not None:
                             debug_info['cached-delta.tested'] += 1
                         good = yield (self.cachedelta[0],)
                         if good is not None:
                             if debug_info is not None:
                                 debug_info['cached-delta.accepted'] += 1
                             yield None
                             return
                     groups = self._raw_groups()
                     for candidates in groups:
                         good = yield candidates
                         if good is not None:
                             break
                     # If sparse revlog is enabled, we can try to refine the available
                     # deltas
                     if not self.revlog.delta_config.sparse_revlog:
                         yield None
                         return
                     # if we have a refinable value, try to refine it
                     if (
                         good is not None
                         and good not in (self.p1, self.p2)
                         and self.revlog.issnapshot(good)
                     ):
                         # refine snapshot down
                         previous = None
                         while previous != good:
                             previous = good
                             base = self.revlog.deltaparent(good)
                             if base == nullrev:
                                 break
                             good = yield (base,)
                         # refine snapshot up
                         if not self.snapshot_cache.snapshots:
                             self.snapshot_cache.update(self.revlog, good + 1)
                         previous = None
                         while good != previous:
                             previous = good
                             children = tuple(
                                 sorted(c for c in self.snapshot_cache.snapshots[good])
                             )
                             good = yield children
                     if debug_info is not None:
                         if good is None:
                             debug_info['no-solution'] += 1
                     yield None
                 def _raw_groups(self):
                     """Provides group of revision to be tested as delta base
                     This lower level function focus on emitting delta theorically
                     interresting without looking it any practical details.
                     The group order aims at providing fast or small candidates first.
                     """
                     # Why search for delta base if we cannot use a delta base ?
                     assert self.revlog.delta_config.general_delta
                     # also see issue6056
                     sparse = self.revlog.delta_config.sparse_revlog
                     prev = self.target_rev - 1
                     deltachain = lambda rev: self.revlog._deltachain(rev)[0]
                     # exclude already lazy tested base if any
                     parents = [p for p in (self.p1, self.p2) if p != nullrev]
                     if (
                         not self.revlog.delta_config.delta_both_parents
                         and len(parents) == 2
                     ):
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                     if sparse and parents:
                         # See if we can use an existing snapshot in the parent chains to
                         # use as a base for a new intermediate-snapshot
                         #
                         # search for snapshot in parents delta chain map: snapshot-level:
                         # snapshot-rev
                         parents_snaps = collections.defaultdict(set)
                         candidate_chains = [deltachain(p) for p in parents]
                         for chain in candidate_chains:
                             for idx, s in enumerate(chain):
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[idx].add(s)
                         snapfloor = min(parents_snaps[0]) + 1
                         self.snapshot_cache.update(self.revlog, snapfloor)
                         # search for the highest "unrelated" revision
                         #
                         # Adding snapshots used by "unrelated" revision increase the odd we
                         # reuse an independant, yet better snapshot chain.
                         #
                         # XXX instead of building a set of revisions, we could lazily
                         # enumerate over the chains. That would be more efficient, however
                         # we stick to simple code for now.
                         all_revs = set()
                         for chain in candidate_chains:
                             all_revs.update(chain)
                         other = None
                         for r in self.revlog.revs(prev, snapfloor):
                             if r not in all_revs:
                                 other = r
                                 break
                         if other is not None:
                             # To avoid unfair competition, we won't use unrelated
                             # intermediate snapshot that are deeper than the ones from the
                             # parent delta chain.
                             max_depth = max(parents_snaps.keys())
                             chain = deltachain(other)
                             for depth, s in enumerate(chain):
                                 if s < snapfloor:
                                     continue
                                 if max_depth < depth:
                                     break
                                 if not self.revlog.issnapshot(s):
                                     break
                                 parents_snaps[depth].add(s)
                         # Test them as possible intermediate snapshot base We test them
                         # from highest to lowest level. High level one are more likely to
                         # result in small delta
                         floor = None
                         for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                             siblings = set()
                             for s in snaps:
                                 siblings.update(self.snapshot_cache.snapshots[s])
                             # Before considering making a new intermediate snapshot, we
                             # check if an existing snapshot, children of base we consider,
                             # would be suitable.
                             #
                             # It give a change to reuse a delta chain "unrelated" to the
                             # current revision instead of starting our own. Without such
                             # re-use, topological branches would keep reopening new chains.
                             # Creating more and more snapshot as the repository grow.
                             if floor is not None:
                                 # We only do this for siblings created after the one in our
                                 # parent's delta chain. Those created before has less
                                 # chances to be valid base since our ancestors had to
                                 # create a new snapshot.
                                 siblings = [r for r in siblings if floor < r]
                             yield tuple(sorted(siblings))
                             # then test the base from our parent's delta chain.
                             yield tuple(sorted(snaps))
                             floor = min(snaps)
                         # No suitable base found in the parent chain, search if any full
                         # snapshots emitted since parent's base would be a suitable base
                         # for an intermediate snapshot.
                         #
                         # It give a chance to reuse a delta chain unrelated to the current
                         # revisions instead of starting our own. Without such re-use,
                         # topological branches would keep reopening new full chains.
                         # Creating more and more snapshot as the repository grow.
                         full = [
                             r
                             for r in self.snapshot_cache.snapshots[nullrev]
                             if snapfloor <= r
                         ]
                         yield tuple(sorted(full))
                     if not sparse:
                         # other approach failed try against prev to hopefully save us a
                         # fulltext.
                         yield (prev,)
                 def is_good_delta_info(self, deltainfo):
                     """Returns True if the given delta is good. Good means that it is
                     within the disk span, disk size, and chain length bounds that we know
                     to be performant."""
                     if deltainfo is None:
                         return False
                     # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
                     # so we should never end up asking such question. Adding the assert as
                     # a safe-guard to detect anything that would be fishy in this regard.
                     assert (
                         self.revinfo.cachedelta is None
                         or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
                         or not self.revlog.delta_config.general_delta
                     )
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     textlen = self.revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self.revlog.delta_config.max_deltachain_span
                     if not maxdist:
                         maxdist = deltainfo.distance  # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     # Bad delta from read span:
                     #
                     #   If the span of data read is larger than the maximum allowed.
                     #
                     #   In the sparse-revlog case, we rely on the associated "sparse
                     #   reading" to avoid issue related to the span of data. In theory, it
                     #   would be possible to build pathological revlog where delta pattern
                     #   would lead to too many reads. However, they do not happen in
                     #   practice at all. So we skip the span check entirely.
                     if (
                         not self.revlog.delta_config.sparse_revlog
                         and maxdist < deltainfo.distance
                     ):
                         return False
                     # Bad delta from new delta size:
                     #
                     #   If the delta size is larger than the target text, storing the delta
                     #   will be inefficient.
                     if textlen < deltainfo.deltalen:
                         return False
                     # Bad delta from cumulated payload size:
                     #
                     #   If the sum of delta get larger than K * target text length.
                     if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                         return False
                     # Bad delta from chain length:
                     #
                     #   If the number of delta in the chain gets too high.
                     if (
                         self.revlog.delta_config.max_chain_len
                         and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
                     ):
                         return False
                     # bad delta from intermediate snapshot size limit
                     #
                     #   If an intermediate snapshot size is higher than the limit.  The
                     #   limit exist to prevent endless chain of intermediate delta to be
                     #   created.
                     if (
                         deltainfo.snapshotdepth is not None
                         and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                     ):
                         return False
                     # bad delta if new intermediate snapshot is larger than the previous
                     # snapshot
                     if (
                         deltainfo.snapshotdepth
                         and self.revlog.length(deltainfo.base) < deltainfo.deltalen
                     ):
                         return False
                     return True
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if hasattr(revlog.index, 'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 """object capable of computing delta and finding delta for multiple revision
                 This object is meant to compute and find multiple delta applied to the same
                 revlog.
                 """
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     if write_debug is None:
                         self._debug_search = False
                     else:
                         self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 @property
                 def _gather_debug(self):
                     return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo):
                     revlog = self.revlog
                     t = self.buildtext(revinfo)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog.delta_config.general_delta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog.delta_config.sparse_revlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog.delta_config.lazy_delta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
                         lowestrealisticdeltalen = (
                             len(delta) // revlog.delta_config.upper_bound_comp
                         )
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog._inner.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, revinfo, curr):
                     rawtext = self.buildtext(revinfo)
                     data = self.revlog._inner.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     gather_debug = self._gather_debug
                     cachedelta = revinfo.cachedelta
                     revlog = self.revlog
                     p1r = p2r = None
                     if excluded_bases is None:
                         excluded_bases = set()
                     if gather_debug:
                         start = util.timer()
                         dbg = self._one_dbg_data()
                         dbg['revision'] = target_rev
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                         dbg['p1-chain-len'] = p1_chain_len
                         dbg['p2-chain-len'] = p2_chain_len
                     # 1) if the revision is empty, no amount of delta can beat it
                     #
                     # 2) no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                         if gather_debug:
                             end = util.timer()
                             dbg['duration'] = end - start
                             dbg[
                                 'delta-base'
                             ] = deltainfo.base  # pytype: disable=attribute-error
                             dbg['search_round_count'] = 0
                             dbg['using-cached-base'] = False
                             dbg['delta_try_count'] = 0
                             dbg['type'] = b"full"
                             dbg['snapshot-depth'] = 0
                             self._dbg_process_data(dbg)
                         return deltainfo
                     deltainfo = None
                     # If this source delta are to be forcibly reuse, let us comply early.
                     if (
                         revlog.delta_config.general_delta
                         and revinfo.cachedelta is not None
                         and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                     ):
                         base = revinfo.cachedelta[0]
                         if base == nullrev:
                             dbg_type = b"full"
                             deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                             if gather_debug:
                                 snapshotdepth = 0
                         elif base not in excluded_bases:
                             delta = revinfo.cachedelta[1]
                             header, data = revlog.compress(delta)
                             deltalen = len(header) + len(data)
                             if gather_debug:
                                 offset = revlog.end(len(revlog) - 1)
                                 chainbase = revlog.chainbase(base)
                                 distance = deltalen + offset - revlog.start(chainbase)
                                 chainlen, compresseddeltalen = revlog._chaininfo(base)
                                 chainlen += 1
                                 compresseddeltalen += deltalen
                                 if base == p1r or base == p2r:
                                     dbg_type = b"delta"
                                     snapshotdepth = None
                                 elif not revlog.issnapshot(base):
                                     snapshotdepth = None
                                 else:
                                     dbg_type = b"snapshot"
                                     snapshotdepth = revlog.snapshotdepth(base) + 1
                             else:
                                 distance = None
                                 chainbase = None
                                 chainlen = None
                                 compresseddeltalen = None
                                 snapshotdepth = None
                             deltainfo = _deltainfo(
                                 distance=distance,
                                 deltalen=deltalen,
                                 data=(header, data),
                                 base=base,
                                 chainbase=chainbase,
                                 chainlen=chainlen,
                                 compresseddeltalen=compresseddeltalen,
                                 snapshotdepth=snapshotdepth,
                             )
                         if deltainfo is not None:
                             if gather_debug:
                                 end = util.timer()
                                 dbg['duration'] = end - start
                                 dbg[
                                     'delta-base'
                                 ] = deltainfo.base  # pytype: disable=attribute-error
                                 dbg['search_round_count'] = 0
                                 dbg['using-cached-base'] = True
                                 dbg['delta_try_count'] = 0
                                 dbg['type'] = b"full"
                                 if snapshotdepth is None:
                                     dbg['snapshot-depth'] = -1
                                 else:
                                     dbg['snapshot-depth'] = snapshotdepth
                                 self._dbg_process_data(dbg)
                             return deltainfo
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     if p1r is None:
                         p1r = revlog.rev(revinfo.p1)
                         p2r = revlog.rev(revinfo.p2)
                     if self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     search = _DeltaSearch(
                         self.revlog,
                         revinfo,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     while not search.done:
                         current_group = search.current_group
                         # current_group can be `None`, but not is search.done is False
                         # We add this assert to help pytype
                         assert current_group is not None
                         candidaterevs = current_group
                         dbg_try_rounds += 1
                         if self._debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1r in candidaterevs or p2r in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if self._debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1r:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2r:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if self._debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 target_rev=target_rev,
                             )
                             if self._debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if search.is_good_delta_info(candidatedelta):
                                     if self._debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif self._debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif self._debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         search.next_group(deltainfo)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg['duration'] = end - start
                         dbg[
                             'delta-base'
                         ] = deltainfo.base  # pytype: disable=attribute-error
                         dbg['search_round_count'] = dbg_try_rounds
                         dbg['using-cached-base'] = used_cached
                         dbg['delta_try_count'] = dbg_try_count
                         dbg['type'] = dbg_type
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = -1
                         self._dbg_process_data(dbg)
                     return deltainfo
                 def _one_dbg_data(self):
                     dbg = {
                         'duration': None,
                         'revision': None,
                         'delta-base': None,
                         'search_round_count': None,
                         'using-cached-base': None,
                         'delta_try_count': None,
                         'type': None,
                         'p1-chain-len': None,
                         'p2-chain-len': None,
                         'snapshot-depth': None,
                         'target-revlog': None,
                     }
                     target_revlog = b"UNKNOWN"
                     target_type = self.revlog.target[0]
                     target_key = self.revlog.target[1]
                     if target_type == KIND_CHANGELOG:
                         target_revlog = b'CHANGELOG:'
                     elif target_type == KIND_MANIFESTLOG:
                         target_revlog = b'MANIFESTLOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     elif target_type == KIND_FILELOG:
                         target_revlog = b'FILELOG:'
                         if target_key:
                             target_revlog += b'%s:' % target_key
                     dbg['target-revlog'] = target_revlog
                     return dbg
                 def _dbg_process_data(self, dbg):
                     if self._debug_info is not None:
                         self._debug_info.append(dbg)
                     if self._write_debug is not None:
                         msg = (
                             b"DBG-DELTAS:"
                             b" %-12s"
                             b" rev=%d:"
                             b" delta-base=%d"
                             b" is-cached=%d"
                             b" - search-rounds=%d"
                             b" try-count=%d"
                             b" - delta-type=%-6s"
                             b" snap-depth=%d"
                             b" - p1-chain-length=%d"
                             b" p2-chain-length=%d"
                             b" - duration=%f"
                             b"\n"
                         )
                         msg %= (
                             dbg["target-revlog"],
                             dbg["revision"],
                             dbg["delta-base"],
                             dbg["using-cached-base"],
                             dbg["search_round_count"],
                             dbg["delta_try_count"],
                             dbg["type"],
                             dbg["snapshot-depth"],
                             dbg["p1-chain-len"],
                             dbg["p2-chain-len"],
                             dbg["duration"],
                         )
                         self._write_debug(msg)
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo