upstream/mercurial-mirror Commit - r51541:5d210ff4

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_FORCE,

24

DELTA_BASE_REUSE_NO,

24

DELTA_BASE_REUSE_NO,

25

KIND_CHANGELOG,

25

KIND_CHANGELOG,

26

KIND_FILELOG,

26

KIND_FILELOG,

27

KIND_MANIFESTLOG,

27

KIND_MANIFESTLOG,

28

REVIDX_ISCENSORED,

28

REVIDX_ISCENSORED,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

30

)

30

)

31

32

from ..thirdparty import attr

32

from ..thirdparty import attr

33

34

from .. import (

34

from .. import (

35

error,

35

error,

36

mdiff,

36

mdiff,

37

util,

37

util,

38

)

38

)

39

40

from . import flagutil

40

from . import flagutil

41

42

# maximum <delta-chain-data>/<revision-text-length> ratio

42

# maximum <delta-chain-data>/<revision-text-length> ratio

43

LIMIT_DELTA2TEXT = 2

43

LIMIT_DELTA2TEXT = 2

44

45

46

class _testrevlog:

46

class _testrevlog:

47

"""minimalist fake revlog to use in doctests"""

47

"""minimalist fake revlog to use in doctests"""

48

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

50

"""data is an list of revision payload boundaries"""

50

"""data is an list of revision payload boundaries"""

51

self._data = data

51

self._data = data

52

self._srdensitythreshold = density

52

self._srdensitythreshold = density

53

self._srmingapsize = mingap

53

self._srmingapsize = mingap

54

self._snapshot = set(snapshot)

54

self._snapshot = set(snapshot)

55

self.index = None

55

self.index = None

56

57

def start(self, rev):

57

def start(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

if rev == 0:

60

if rev == 0:

61

return 0

61

return 0

62

return self._data[rev - 1]

62

return self._data[rev - 1]

63

64

def end(self, rev):

64

def end(self, rev):

65

if rev == nullrev:

65

if rev == nullrev:

66

return 0

66

return 0

67

return self._data[rev]

67

return self._data[rev]

68

69

def length(self, rev):

69

def length(self, rev):

70

return self.end(rev) - self.start(rev)

70

return self.end(rev) - self.start(rev)

71

72

def __len__(self):

72

def __len__(self):

73

return len(self._data)

73

return len(self._data)

74

75

def issnapshot(self, rev):

75

def issnapshot(self, rev):

76

if rev == nullrev:

76

if rev == nullrev:

77

return True

77

return True

78

return rev in self._snapshot

78

return rev in self._snapshot

79

80

81

def slicechunk(revlog, revs, targetsize=None):

81

def slicechunk(revlog, revs, targetsize=None):

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

83

84

``revs`` is sliced into groups that should be read in one time.

84

``revs`` is sliced into groups that should be read in one time.

85

Assume that revs are sorted.

85

Assume that revs are sorted.

86

87

The initial chunk is sliced until the overall density (payload/chunks-span

87

The initial chunk is sliced until the overall density (payload/chunks-span

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

89

`revlog._srmingapsize` is skipped.

89

`revlog._srmingapsize` is skipped.

90

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

92

For consistency with other slicing choice, this limit won't go lower than

92

For consistency with other slicing choice, this limit won't go lower than

93

`revlog._srmingapsize`.

93

`revlog._srmingapsize`.

94

95

If individual revisions chunk are larger than this limit, they will still

95

If individual revisions chunk are larger than this limit, they will still

96

be raised individually.

96

be raised individually.

97

98

>>> data = [

98

>>> data = [

99

... 5, #00 (5)

99

... 5, #00 (5)

100

... 10, #01 (5)

100

... 10, #01 (5)

101

... 12, #02 (2)

101

... 12, #02 (2)

102

... 12, #03 (empty)

102

... 12, #03 (empty)

103

... 27, #04 (15)

103

... 27, #04 (15)

104

... 31, #05 (4)

104

... 31, #05 (4)

105

... 31, #06 (empty)

105

... 31, #06 (empty)

106

... 42, #07 (11)

106

... 42, #07 (11)

107

... 47, #08 (5)

107

... 47, #08 (5)

108

... 47, #09 (empty)

108

... 47, #09 (empty)

109

... 48, #10 (1)

109

... 48, #10 (1)

110

... 51, #11 (3)

110

... 51, #11 (3)

111

... 74, #12 (23)

111

... 74, #12 (23)

112

... 85, #13 (11)

112

... 85, #13 (11)

113

... 86, #14 (1)

113

... 86, #14 (1)

114

... 91, #15 (5)

114

... 91, #15 (5)

115

... ]

115

... ]

116

>>> revlog = _testrevlog(data, snapshot=range(16))

116

>>> revlog = _testrevlog(data, snapshot=range(16))

117

118

>>> list(slicechunk(revlog, list(range(16))))

118

>>> list(slicechunk(revlog, list(range(16))))

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

120

>>> list(slicechunk(revlog, [0, 15]))

120

>>> list(slicechunk(revlog, [0, 15]))

121

[[0], [15]]

121

[[0], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 15]))

123

[[0], [11], [15]]

123

[[0], [11], [15]]

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

125

[[0], [11, 13, 15]]

125

[[0], [11, 13, 15]]

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

127

[[1, 2], [5, 8, 10, 11], [14]]

127

[[1, 2], [5, 8, 10, 11], [14]]

128

129

Slicing with a maximum chunk size

129

Slicing with a maximum chunk size

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

131

[[0], [11], [13], [15]]

131

[[0], [11], [13], [15]]

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

133

[[0], [11], [13, 15]]

133

[[0], [11], [13, 15]]

134

135

Slicing involving nullrev

135

Slicing involving nullrev

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

137

[[-1, 0], [11], [13, 15]]

137

[[-1, 0], [11], [13, 15]]

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

139

[[-1], [13], [15]]

139

[[-1], [13], [15]]

140

"""

140

"""

141

if targetsize is not None:

141

if targetsize is not None:

142

targetsize = max(targetsize, revlog._srmingapsize)

142

targetsize = max(targetsize, revlog._srmingapsize)

143

# targetsize should not be specified when evaluating delta candidates:

143

# targetsize should not be specified when evaluating delta candidates:

144

# * targetsize is used to ensure we stay within specification when reading,

144

# * targetsize is used to ensure we stay within specification when reading,

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

146

if densityslicing is None:

146

if densityslicing is None:

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

148

for chunk in densityslicing(

148

for chunk in densityslicing(

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

150

):

150

):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

152

yield subchunk

152

yield subchunk

153

154

155

def _slicechunktosize(revlog, revs, targetsize=None):

155

def _slicechunktosize(revlog, revs, targetsize=None):

156

"""slice revs to match the target size

156

"""slice revs to match the target size

157

158

This is intended to be used on chunk that density slicing selected by that

158

This is intended to be used on chunk that density slicing selected by that

159

are still too large compared to the read garantee of revlog. This might

159

are still too large compared to the read garantee of revlog. This might

160

happens when "minimal gap size" interrupted the slicing or when chain are

160

happens when "minimal gap size" interrupted the slicing or when chain are

161

built in a way that create large blocks next to each other.

161

built in a way that create large blocks next to each other.

162

163

>>> data = [

163

>>> data = [

164

... 3, #0 (3)

164

... 3, #0 (3)

165

... 5, #1 (2)

165

... 5, #1 (2)

166

... 6, #2 (1)

166

... 6, #2 (1)

167

... 8, #3 (2)

167

... 8, #3 (2)

168

... 8, #4 (empty)

168

... 8, #4 (empty)

169

... 11, #5 (3)

169

... 11, #5 (3)

170

... 12, #6 (1)

170

... 12, #6 (1)

171

... 13, #7 (1)

171

... 13, #7 (1)

172

... 14, #8 (1)

172

... 14, #8 (1)

173

... ]

173

... ]

174

175

== All snapshots cases ==

175

== All snapshots cases ==

176

>>> revlog = _testrevlog(data, snapshot=range(9))

176

>>> revlog = _testrevlog(data, snapshot=range(9))

177

178

Cases where chunk is already small enough

178

Cases where chunk is already small enough

179

>>> list(_slicechunktosize(revlog, [0], 3))

179

>>> list(_slicechunktosize(revlog, [0], 3))

180

[[0]]

180

[[0]]

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

182

[[6, 7]]

182

[[6, 7]]

183

>>> list(_slicechunktosize(revlog, [0], None))

183

>>> list(_slicechunktosize(revlog, [0], None))

184

[[0]]

184

[[0]]

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

186

[[6, 7]]

186

[[6, 7]]

187

188

cases where we need actual slicing

188

cases where we need actual slicing

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

190

[[0], [1]]

190

[[0], [1]]

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

192

[[1], [3]]

192

[[1], [3]]

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

194

[[1, 2], [3]]

194

[[1, 2], [3]]

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

198

[[3], [5]]

198

[[3], [5]]

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

200

[[5], [6, 7, 8]]

200

[[5], [6, 7, 8]]

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

203

204

Case with too large individual chunk (must return valid chunk)

204

Case with too large individual chunk (must return valid chunk)

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

206

[[0], [1]]

206

[[0], [1]]

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

208

[[1], [3]]

208

[[1], [3]]

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

210

[[3], [5]]

210

[[3], [5]]

211

212

== No Snapshot cases ==

212

== No Snapshot cases ==

213

>>> revlog = _testrevlog(data)

213

>>> revlog = _testrevlog(data)

214

215

Cases where chunk is already small enough

215

Cases where chunk is already small enough

216

>>> list(_slicechunktosize(revlog, [0], 3))

216

>>> list(_slicechunktosize(revlog, [0], 3))

217

[[0]]

217

[[0]]

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

219

[[6, 7]]

219

[[6, 7]]

220

>>> list(_slicechunktosize(revlog, [0], None))

220

>>> list(_slicechunktosize(revlog, [0], None))

221

[[0]]

221

[[0]]

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

223

[[6, 7]]

223

[[6, 7]]

224

225

cases where we need actual slicing

225

cases where we need actual slicing

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

227

[[0], [1]]

227

[[0], [1]]

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

229

[[1], [3]]

229

[[1], [3]]

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

231

[[1], [2, 3]]

231

[[1], [2, 3]]

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

233

[[3], [5]]

233

[[3], [5]]

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

235

[[3], [4, 5]]

235

[[3], [4, 5]]

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

237

[[5], [6, 7, 8]]

237

[[5], [6, 7, 8]]

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

240

241

Case with too large individual chunk (must return valid chunk)

241

Case with too large individual chunk (must return valid chunk)

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

243

[[0], [1]]

243

[[0], [1]]

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

245

[[1], [3]]

245

[[1], [3]]

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

247

[[3], [5]]

247

[[3], [5]]

248

249

== mixed case ==

249

== mixed case ==

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

253

"""

253

"""

254

assert targetsize is None or 0 <= targetsize

254

assert targetsize is None or 0 <= targetsize

255

startdata = revlog.start(revs[0])

255

startdata = revlog.start(revs[0])

256

enddata = revlog.end(revs[-1])

256

enddata = revlog.end(revs[-1])

257

fullspan = enddata - startdata

257

fullspan = enddata - startdata

258

if targetsize is None or fullspan <= targetsize:

258

if targetsize is None or fullspan <= targetsize:

259

yield revs

259

yield revs

260

return

260

return

261

262

startrevidx = 0

262

startrevidx = 0

263

endrevidx = 1

263

endrevidx = 1

264

iterrevs = enumerate(revs)

264

iterrevs = enumerate(revs)

265

next(iterrevs) # skip first rev.

265

next(iterrevs) # skip first rev.

266

# first step: get snapshots out of the way

266

# first step: get snapshots out of the way

267

for idx, r in iterrevs:

267

for idx, r in iterrevs:

268

span = revlog.end(r) - startdata

268

span = revlog.end(r) - startdata

269

snapshot = revlog.issnapshot(r)

269

snapshot = revlog.issnapshot(r)

270

if span <= targetsize and snapshot:

270

if span <= targetsize and snapshot:

271

endrevidx = idx + 1

271

endrevidx = idx + 1

272

else:

272

else:

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

274

if chunk:

274

if chunk:

275

yield chunk

275

yield chunk

276

startrevidx = idx

276

startrevidx = idx

277

startdata = revlog.start(r)

277

startdata = revlog.start(r)

278

endrevidx = idx + 1

278

endrevidx = idx + 1

279

if not snapshot:

279

if not snapshot:

280

break

280

break

281

282

# for the others, we use binary slicing to quickly converge toward valid

282

# for the others, we use binary slicing to quickly converge toward valid

283

# chunks (otherwise, we might end up looking for start/end of many

283

# chunks (otherwise, we might end up looking for start/end of many

284

# revisions). This logic is not looking for the perfect slicing point, it

284

# revisions). This logic is not looking for the perfect slicing point, it

285

# focuses on quickly converging toward valid chunks.

285

# focuses on quickly converging toward valid chunks.

286

nbitem = len(revs)

286

nbitem = len(revs)

287

while (enddata - startdata) > targetsize:

287

while (enddata - startdata) > targetsize:

288

endrevidx = nbitem

288

endrevidx = nbitem

289

if nbitem - startrevidx <= 1:

289

if nbitem - startrevidx <= 1:

290

break # protect against individual chunk larger than limit

290

break # protect against individual chunk larger than limit

291

localenddata = revlog.end(revs[endrevidx - 1])

291

localenddata = revlog.end(revs[endrevidx - 1])

292

span = localenddata - startdata

292

span = localenddata - startdata

293

while span > targetsize:

293

while span > targetsize:

294

if endrevidx - startrevidx <= 1:

294

if endrevidx - startrevidx <= 1:

295

break # protect against individual chunk larger than limit

295

break # protect against individual chunk larger than limit

296

endrevidx -= (endrevidx - startrevidx) // 2

296

endrevidx -= (endrevidx - startrevidx) // 2

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

300

if chunk:

300

if chunk:

301

yield chunk

301

yield chunk

302

startrevidx = endrevidx

302

startrevidx = endrevidx

303

startdata = revlog.start(revs[startrevidx])

303

startdata = revlog.start(revs[startrevidx])

304

305

chunk = _trimchunk(revlog, revs, startrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

309

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

313

``revs`` is sliced into groups that should be read in one time.

313

``revs`` is sliced into groups that should be read in one time.

314

Assume that revs are sorted.

314

Assume that revs are sorted.

315

316

The initial chunk is sliced until the overall density (payload/chunks-span

316

The initial chunk is sliced until the overall density (payload/chunks-span

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

318

skipped.

318

skipped.

319

320

>>> revlog = _testrevlog([

320

>>> revlog = _testrevlog([

321

... 5, #00 (5)

321

... 5, #00 (5)

322

... 10, #01 (5)

322

... 10, #01 (5)

323

... 12, #02 (2)

323

... 12, #02 (2)

324

... 12, #03 (empty)

324

... 12, #03 (empty)

325

... 27, #04 (15)

325

... 27, #04 (15)

326

... 31, #05 (4)

326

... 31, #05 (4)

327

... 31, #06 (empty)

327

... 31, #06 (empty)

328

... 42, #07 (11)

328

... 42, #07 (11)

329

... 47, #08 (5)

329

... 47, #08 (5)

330

... 47, #09 (empty)

330

... 47, #09 (empty)

331

... 48, #10 (1)

331

... 48, #10 (1)

332

... 51, #11 (3)

332

... 51, #11 (3)

333

... 74, #12 (23)

333

... 74, #12 (23)

334

... 85, #13 (11)

334

... 85, #13 (11)

335

... 86, #14 (1)

335

... 86, #14 (1)

336

... 91, #15 (5)

336

... 91, #15 (5)

337

... ])

337

... ])

338

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

342

[[0], [15]]

342

[[0], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

344

[[0], [11], [15]]

344

[[0], [11], [15]]

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

346

[[0], [11, 13, 15]]

346

[[0], [11, 13, 15]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

348

[[1, 2], [5, 8, 10, 11], [14]]

348

[[1, 2], [5, 8, 10, 11], [14]]

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

... mingapsize=20))

350

... mingapsize=20))

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

... targetdensity=0.95))

353

... targetdensity=0.95))

354

[[1, 2], [5], [8, 10, 11], [14]]

354

[[1, 2], [5], [8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... targetdensity=0.95, mingapsize=12))

356

... targetdensity=0.95, mingapsize=12))

357

[[1, 2], [5, 8, 10, 11], [14]]

357

[[1, 2], [5, 8, 10, 11], [14]]

358

"""

358

"""

359

start = revlog.start

359

start = revlog.start

360

length = revlog.length

360

length = revlog.length

361

362

if len(revs) <= 1:

362

if len(revs) <= 1:

363

yield revs

363

yield revs

364

return

364

return

365

366

deltachainspan = segmentspan(revlog, revs)

366

deltachainspan = segmentspan(revlog, revs)

367

368

if deltachainspan < mingapsize:

368

if deltachainspan < mingapsize:

369

yield revs

369

yield revs

370

return

370

return

371

372

readdata = deltachainspan

372

readdata = deltachainspan

373

chainpayload = sum(length(r) for r in revs)

373

chainpayload = sum(length(r) for r in revs)

374

375

if deltachainspan:

375

if deltachainspan:

376

density = chainpayload / float(deltachainspan)

376

density = chainpayload / float(deltachainspan)

377

else:

377

else:

378

density = 1.0

378

density = 1.0

379

380

if density >= targetdensity:

380

if density >= targetdensity:

381

yield revs

381

yield revs

382

return

382

return

383

384

# Store the gaps in a heap to have them sorted by decreasing size

384

# Store the gaps in a heap to have them sorted by decreasing size

385

gaps = []

385

gaps = []

386

prevend = None

386

prevend = None

387

for i, rev in enumerate(revs):

387

for i, rev in enumerate(revs):

388

revstart = start(rev)

388

revstart = start(rev)

389

revlen = length(rev)

389

revlen = length(rev)

390

391

# Skip empty revisions to form larger holes

391

# Skip empty revisions to form larger holes

392

if revlen == 0:

392

if revlen == 0:

393

continue

393

continue

394

395

if prevend is not None:

395

if prevend is not None:

396

gapsize = revstart - prevend

396

gapsize = revstart - prevend

397

# only consider holes that are large enough

397

# only consider holes that are large enough

398

if gapsize > mingapsize:

398

if gapsize > mingapsize:

399

gaps.append((gapsize, i))

399

gaps.append((gapsize, i))

400

401

prevend = revstart + revlen

401

prevend = revstart + revlen

402

# sort the gaps to pop them from largest to small

402

# sort the gaps to pop them from largest to small

403

gaps.sort()

403

gaps.sort()

404

405

# Collect the indices of the largest holes until the density is acceptable

405

# Collect the indices of the largest holes until the density is acceptable

406

selected = []

406

selected = []

407

while gaps and density < targetdensity:

407

while gaps and density < targetdensity:

408

gapsize, gapidx = gaps.pop()

408

gapsize, gapidx = gaps.pop()

409

410

selected.append(gapidx)

410

selected.append(gapidx)

411

412

# the gap sizes are stored as negatives to be sorted decreasingly

412

# the gap sizes are stored as negatives to be sorted decreasingly

413

# by the heap

413

# by the heap

414

readdata -= gapsize

414

readdata -= gapsize

415

if readdata > 0:

415

if readdata > 0:

416

density = chainpayload / float(readdata)

416

density = chainpayload / float(readdata)

417

else:

417

else:

418

density = 1.0

418

density = 1.0

419

selected.sort()

419

selected.sort()

420

421

# Cut the revs at collected indices

421

# Cut the revs at collected indices

422

previdx = 0

422

previdx = 0

423

for idx in selected:

423

for idx in selected:

424

425

chunk = _trimchunk(revlog, revs, previdx, idx)

425

chunk = _trimchunk(revlog, revs, previdx, idx)

426

if chunk:

426

if chunk:

427

yield chunk

427

yield chunk

428

429

previdx = idx

429

previdx = idx

430

431

chunk = _trimchunk(revlog, revs, previdx)

431

chunk = _trimchunk(revlog, revs, previdx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

436

def _trimchunk(revlog, revs, startidx, endidx=None):

436

def _trimchunk(revlog, revs, startidx, endidx=None):

437

"""returns revs[startidx:endidx] without empty trailing revs

437

"""returns revs[startidx:endidx] without empty trailing revs

438

439

Doctest Setup

439

Doctest Setup

440

>>> revlog = _testrevlog([

440

>>> revlog = _testrevlog([

441

... 5, #0

441

... 5, #0

442

... 10, #1

442

... 10, #1

443

... 12, #2

443

... 12, #2

444

... 12, #3 (empty)

444

... 12, #3 (empty)

445

... 17, #4

445

... 17, #4

446

... 21, #5

446

... 21, #5

447

... 21, #6 (empty)

447

... 21, #6 (empty)

448

... ])

448

... ])

449

450

Contiguous cases:

450

Contiguous cases:

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

452

[0, 1, 2, 3, 4, 5]

452

[0, 1, 2, 3, 4, 5]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

454

[0, 1, 2, 3, 4]

454

[0, 1, 2, 3, 4]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

456

[0, 1, 2]

456

[0, 1, 2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

458

[2]

458

[2]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

460

[3, 4, 5]

460

[3, 4, 5]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

462

[3, 4]

462

[3, 4]

463

464

Discontiguous cases:

464

Discontiguous cases:

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

466

[1, 3, 5]

466

[1, 3, 5]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

468

[1]

468

[1]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

470

[3, 5]

470

[3, 5]

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

472

[3, 5]

472

[3, 5]

473

"""

473

"""

474

length = revlog.length

474

length = revlog.length

475

476

if endidx is None:

476

if endidx is None:

477

endidx = len(revs)

477

endidx = len(revs)

478

479

# If we have a non-emtpy delta candidate, there are nothing to trim

479

# If we have a non-emtpy delta candidate, there are nothing to trim

480

if revs[endidx - 1] < len(revlog):

480

if revs[endidx - 1] < len(revlog):

481

# Trim empty revs at the end, except the very first revision of a chain

481

# Trim empty revs at the end, except the very first revision of a chain

482

while (

482

while (

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

484

):

484

):

485

endidx -= 1

485

endidx -= 1

486

487

return revs[startidx:endidx]

487

return revs[startidx:endidx]

488

489

490

def segmentspan(revlog, revs):

490

def segmentspan(revlog, revs):

491

"""Get the byte span of a segment of revisions

491

"""Get the byte span of a segment of revisions

492

493

revs is a sorted array of revision numbers

493

revs is a sorted array of revision numbers

494

495

>>> revlog = _testrevlog([

495

>>> revlog = _testrevlog([

496

... 5, #0

496

... 5, #0

497

... 10, #1

497

... 10, #1

498

... 12, #2

498

... 12, #2

499

... 12, #3 (empty)

499

... 12, #3 (empty)

500

... 17, #4

500

... 17, #4

501

... ])

501

... ])

502

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

504

17

504

17

505

>>> segmentspan(revlog, [0, 4])

505

>>> segmentspan(revlog, [0, 4])

506

17

506

17

507

>>> segmentspan(revlog, [3, 4])

507

>>> segmentspan(revlog, [3, 4])

508

5

508

5

509

>>> segmentspan(revlog, [1, 2, 3,])

509

>>> segmentspan(revlog, [1, 2, 3,])

510

7

510

7

511

>>> segmentspan(revlog, [1, 3])

511

>>> segmentspan(revlog, [1, 3])

512

7

512

7

513

"""

513

"""

514

if not revs:

514

if not revs:

515

return 0

515

return 0

516

end = revlog.end(revs[-1])

516

end = revlog.end(revs[-1])

517

return end - revlog.start(revs[0])

517

return end - revlog.start(revs[0])

518

519

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

521

"""build full text from a (base, delta) pair and other metadata"""

521

"""build full text from a (base, delta) pair and other metadata"""

522

# special case deltas which replace entire base; no need to decode

522

# special case deltas which replace entire base; no need to decode

523

# base revision. this neatly avoids censored bases, which throw when

523

# base revision. this neatly avoids censored bases, which throw when

524

# they're decoded.

524

# they're decoded.

525

hlen = struct.calcsize(b">lll")

525

hlen = struct.calcsize(b">lll")

526

if delta[:hlen] == mdiff.replacediffheader(

526

if delta[:hlen] == mdiff.replacediffheader(

527

revlog.rawsize(baserev), len(delta) - hlen

527

revlog.rawsize(baserev), len(delta) - hlen

528

):

528

):

529

fulltext = delta[hlen:]

529

fulltext = delta[hlen:]

530

else:

530

else:

531

# deltabase is rawtext before changed by flag processors, which is

531

# deltabase is rawtext before changed by flag processors, which is

532

# equivalent to non-raw text

532

# equivalent to non-raw text

533

basetext = revlog.revision(baserev, _df=fh)

533

basetext = revlog.revision(baserev, _df=fh)

534

fulltext = mdiff.patch(basetext, delta)

534

fulltext = mdiff.patch(basetext, delta)

535

536

try:

536

try:

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

538

if validatehash:

538

if validatehash:

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

540

if flags & REVIDX_ISCENSORED:

540

if flags & REVIDX_ISCENSORED:

541

raise error.StorageError(

541

raise error.StorageError(

542

_(b'node %s is not censored') % expectednode

542

_(b'node %s is not censored') % expectednode

543

)

543

)

544

except error.CensoredNodeError:

544

except error.CensoredNodeError:

545

# must pass the censored index flag to add censored revisions

545

# must pass the censored index flag to add censored revisions

546

if not flags & REVIDX_ISCENSORED:

546

if not flags & REVIDX_ISCENSORED:

547

raise

547

raise

548

return fulltext

548

return fulltext

549

550

551

@attr.s(slots=True, frozen=True)

551

@attr.s(slots=True, frozen=True)

552

class _deltainfo:

552

class _deltainfo:

553

distance = attr.ib()

553

distance = attr.ib()

554

deltalen = attr.ib()

554

deltalen = attr.ib()

555

data = attr.ib()

555

data = attr.ib()

556

base = attr.ib()

556

base = attr.ib()

557

chainbase = attr.ib()

557

chainbase = attr.ib()

558

chainlen = attr.ib()

558

chainlen = attr.ib()

559

compresseddeltalen = attr.ib()

559

compresseddeltalen = attr.ib()

560

snapshotdepth = attr.ib()

560

snapshotdepth = attr.ib()

561

562

563

def drop_u_compression(delta):

563

def drop_u_compression(delta):

564

"""turn into a "u" (no-compression) into no-compression without header

564

"""turn into a "u" (no-compression) into no-compression without header

565

566

This is useful for revlog format that has better compression method.

566

This is useful for revlog format that has better compression method.

567

"""

567

"""

568

assert delta.data[0] == b'u', delta.data[0]

568

assert delta.data[0] == b'u', delta.data[0]

569

return _deltainfo(

569

return _deltainfo(

570

delta.distance,

570

delta.distance,

571

delta.deltalen - 1,

571

delta.deltalen - 1,

572

(b'', delta.data[1]),

572

(b'', delta.data[1]),

573

delta.base,

573

delta.base,

574

delta.chainbase,

574

delta.chainbase,

575

delta.chainlen,

575

delta.chainlen,

576

delta.compresseddeltalen,

576

delta.compresseddeltalen,

577

delta.snapshotdepth,

577

delta.snapshotdepth,

578

)

578

)

579

580

581

def is_good_delta_info(revlog, deltainfo, revinfo):

581

def is_good_delta_info(revlog, deltainfo, revinfo):

582

"""Returns True if the given delta is good. Good means that it is within

582

"""Returns True if the given delta is good. Good means that it is within

583

the disk span, disk size, and chain length bounds that we know to be

583

the disk span, disk size, and chain length bounds that we know to be

584

performant."""

584

performant."""

585

if deltainfo is None:

585

if deltainfo is None:

586

return False

586

return False

587

588

if (

588

if (

589

revinfo.cachedelta is not None

589

revinfo.cachedelta is not None

590

and deltainfo.base == revinfo.cachedelta[0]

590

and deltainfo.base == revinfo.cachedelta[0]

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

592

):

592

):

593

return True

593

return True

594

595

# - 'deltainfo.distance' is the distance from the base revision --

595

# - 'deltainfo.distance' is the distance from the base revision --

596

# bounding it limits the amount of I/O we need to do.

596

# bounding it limits the amount of I/O we need to do.

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

598

# deltas we need to apply -- bounding it limits the amount of CPU

598

# deltas we need to apply -- bounding it limits the amount of CPU

599

# we consume.

599

# we consume.

600

601

textlen = revinfo.textlen

601

textlen = revinfo.textlen

602

defaultmax = textlen * 4

602

defaultmax = textlen * 4

603

maxdist = revlog._maxdeltachainspan

603

maxdist = revlog._maxdeltachainspan

604

if not maxdist:

604

if not maxdist:

605

maxdist = deltainfo.distance # ensure the conditional pass

605

maxdist = deltainfo.distance # ensure the conditional pass

606

maxdist = max(maxdist, defaultmax)

606

maxdist = max(maxdist, defaultmax)

607

608

# Bad delta from read span:

608

# Bad delta from read span:

609

#

609

#

610

# If the span of data read is larger than the maximum allowed.

610

# If the span of data read is larger than the maximum allowed.

611

#

611

#

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

613

# to avoid issue related to the span of data. In theory, it would be

613

# to avoid issue related to the span of data. In theory, it would be

614

# possible to build pathological revlog where delta pattern would lead

614

# possible to build pathological revlog where delta pattern would lead

615

# to too many reads. However, they do not happen in practice at all. So

615

# to too many reads. However, they do not happen in practice at all. So

616

# we skip the span check entirely.

616

# we skip the span check entirely.

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

618

return False

618

return False

619

620

# Bad delta from new delta size:

620

# Bad delta from new delta size:

621

#

621

#

622

# If the delta size is larger than the target text, storing the

622

# If the delta size is larger than the target text, storing the

623

# delta will be inefficient.

623

# delta will be inefficient.

624

if textlen < deltainfo.deltalen:

624

if textlen < deltainfo.deltalen:

625

return False

625

return False

626

627

# Bad delta from cumulated payload size:

627

# Bad delta from cumulated payload size:

628

#

628

#

629

# If the sum of delta get larger than K * target text length.

629

# If the sum of delta get larger than K * target text length.

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

631

return False

631

return False

632

633

# Bad delta from chain length:

633

# Bad delta from chain length:

634

#

634

#

635

# If the number of delta in the chain gets too high.

635

# If the number of delta in the chain gets too high.

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

637

return False

637

return False

638

639

# bad delta from intermediate snapshot size limit

639

# bad delta from intermediate snapshot size limit

640

#

640

#

641

# If an intermediate snapshot size is higher than the limit. The

641

# If an intermediate snapshot size is higher than the limit. The

642

# limit exist to prevent endless chain of intermediate delta to be

642

# limit exist to prevent endless chain of intermediate delta to be

643

# created.

643

# created.

644

if (

644

if (

645

deltainfo.snapshotdepth is not None

645

deltainfo.snapshotdepth is not None

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

647

):

647

):

648

return False

648

return False

649

650

# bad delta if new intermediate snapshot is larger than the previous

650

# bad delta if new intermediate snapshot is larger than the previous

651

# snapshot

651

# snapshot

652

if (

652

if (

653

deltainfo.snapshotdepth

653

deltainfo.snapshotdepth

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

655

):

655

):

656

return False

656

return False

657

658

return True

658

return True

659

660

661

# If a revision's full text is that much bigger than a base candidate full

661

# If a revision's full text is that much bigger than a base candidate full

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

663

# consider these candidates.

663

# consider these candidates.

664

LIMIT_BASE2TEXT = 500

664

LIMIT_BASE2TEXT = 500

665

666

667

def _candidategroups(

667

def _candidategroups(

668

revlog,

668

revlog,

669

textlen,

669

textlen,

670

p1,

670

p1,

671

p2,

671

p2,

672

cachedelta,

672

cachedelta,

673

excluded_bases=None,

673

excluded_bases=None,

674

target_rev=None,

674

target_rev=None,

675

snapshot_cache=None,

675

snapshot_cache=None,

676

):

676

):

677

"""Provides group of revision to be tested as delta base

677

"""Provides group of revision to be tested as delta base

678

679

This top level function focus on emitting groups with unique and worthwhile

679

This top level function focus on emitting groups with unique and worthwhile

680

content. See _raw_candidate_groups for details about the group order.

680

content. See _raw_candidate_groups for details about the group order.

681

"""

681

"""

682

# should we try to build a delta?

682

# should we try to build a delta?

683

if not (len(revlog) and revlog._storedeltachains):

683

if not (len(revlog) and revlog._storedeltachains):

684

yield None

684

yield None

685

return

685

return

686

687

if target_rev is None:

687

if target_rev is None:

688

target_rev = len(revlog)

688

target_rev = len(revlog)

689

690

if not revlog._generaldelta:

690

if not revlog._generaldelta:

691

# before general delta, there is only one possible delta base

691

# before general delta, there is only one possible delta base

692

yield (target_rev - 1,)

692

yield (target_rev - 1,)

693

yield None

693

yield None

694

return

694

return

695

696

if (

696

if (

697

cachedelta is not None

697

cachedelta is not None

698

and nullrev == cachedelta[0]

698

and nullrev == cachedelta[0]

699

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

699

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

700

):

700

):

701

# instruction are to forcibly do a full snapshot

701

# instruction are to forcibly do a full snapshot

702

yield None

702

yield None

703

return

703

return

704

705

deltalength = revlog.length

705

deltalength = revlog.length

706

deltaparent = revlog.deltaparent

706

deltaparent = revlog.deltaparent

707

sparse = revlog._sparserevlog

707

sparse = revlog._sparserevlog

708

good = None

708

good = None

709

710

deltas_limit = textlen * LIMIT_DELTA2TEXT

710

deltas_limit = textlen * LIMIT_DELTA2TEXT

711

group_chunk_size = revlog._candidate_group_chunk_size

711

group_chunk_size = revlog._candidate_group_chunk_size

712

713

tested = {nullrev}

713

tested = {nullrev}

714

candidates = _refinedgroups(

714

candidates = _refinedgroups(

715

revlog,

715

revlog,

716

p1,

716

p1,

717

p2,

717

p2,

718

cachedelta,

718

cachedelta,

719

snapshot_cache=snapshot_cache,

719

snapshot_cache=snapshot_cache,

720

)

720

)

721

while True:

721

while True:

722

temptative = candidates.send(good)

722

temptative = candidates.send(good)

723

if temptative is None:

723

if temptative is None:

724

break

724

break

725

group = []

725

group = []

726

for rev in temptative:

726

for rev in temptative:

727

# skip over empty delta (no need to include them in a chain)

727

# skip over empty delta (no need to include them in a chain)

728

while not (rev == nullrev or rev in tested or deltalength(rev)):

728

while not (rev == nullrev or rev in tested or deltalength(rev)):

729

tested.add(rev)

729

tested.add(rev)

730

rev = deltaparent(rev)

730

rev = deltaparent(rev)

731

# no need to try a delta against nullrev, this will be done as a

731

# no need to try a delta against nullrev, this will be done as a

732

# last resort.

732

# last resort.

733

if rev == nullrev:

733

if rev == nullrev:

734

continue

734

continue

735

# filter out revision we tested already

735

# filter out revision we tested already

736

if rev in tested:

736

if rev in tested:

737

continue

737

continue

738

739

if (

739

if (

740

cachedelta is not None

740

cachedelta is not None

741

and rev == cachedelta[0]

741

and rev == cachedelta[0]

742

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

742

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

743

):

743

):

744

# instructions are to forcibly consider/use this delta base

744

# instructions are to forcibly consider/use this delta base

745

group.append(rev)

745

group.append(rev)

746

continue

746

continue

747

748

# an higher authority deamed the base unworthy (e.g. censored)

748

# an higher authority deamed the base unworthy (e.g. censored)

749

if excluded_bases is not None and rev in excluded_bases:

749

if excluded_bases is not None and rev in excluded_bases:

750

tested.add(rev)

750

tested.add(rev)

751

continue

751

continue

752

# We are in some recomputation cases and that rev is too high in

752

# We are in some recomputation cases and that rev is too high in

753

# the revlog

753

# the revlog

754

if target_rev is not None and rev >= target_rev:

754

if target_rev is not None and rev >= target_rev:

755

tested.add(rev)

755

tested.add(rev)

756

continue

756

continue

757

# filter out delta base that will never produce good delta

757

# filter out delta base that will never produce good delta

758

if deltas_limit < revlog.length(rev):

758

if deltas_limit < revlog.length(rev):

759

tested.add(rev)

759

tested.add(rev)

760

continue

760

continue

761

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

761

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

762

tested.add(rev)

762

tested.add(rev)

763

continue

763

continue

764

# no delta for rawtext-changing revs (see "candelta" for why)

764

# no delta for rawtext-changing revs (see "candelta" for why)

765

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

765

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

766

tested.add(rev)

766

tested.add(rev)

767

continue

767

continue

768

769

# If we reach here, we are about to build and test a delta.

769

# If we reach here, we are about to build and test a delta.

770

# The delta building process will compute the chaininfo in all

770

# The delta building process will compute the chaininfo in all

771

# case, since that computation is cached, it is fine to access it

771

# case, since that computation is cached, it is fine to access it

772

# here too.

772

# here too.

773

chainlen, chainsize = revlog._chaininfo(rev)

773

chainlen, chainsize = revlog._chaininfo(rev)

774

# if chain will be too long, skip base

774

# if chain will be too long, skip base

775

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

775

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

776

tested.add(rev)

776

tested.add(rev)

777

continue

777

continue

778

# if chain already have too much data, skip base

778

# if chain already have too much data, skip base

779

if deltas_limit < chainsize:

779

if deltas_limit < chainsize:

780

tested.add(rev)

780

tested.add(rev)

781

continue

781

continue

782

if sparse and revlog.upperboundcomp is not None:

782

if sparse and revlog.upperboundcomp is not None:

783

maxcomp = revlog.upperboundcomp

783

maxcomp = revlog.upperboundcomp

784

basenotsnap = (p1, p2, nullrev)

784

basenotsnap = (p1, p2, nullrev)

785

if rev not in basenotsnap and revlog.issnapshot(rev):

785

if rev not in basenotsnap and revlog.issnapshot(rev):

786

snapshotdepth = revlog.snapshotdepth(rev)

786

snapshotdepth = revlog.snapshotdepth(rev)

787

# If text is significantly larger than the base, we can

787

# If text is significantly larger than the base, we can

788

# expect the resulting delta to be proportional to the size

788

# expect the resulting delta to be proportional to the size

789

# difference

789

# difference

790

revsize = revlog.rawsize(rev)

790

revsize = revlog.rawsize(rev)

791

rawsizedistance = max(textlen - revsize, 0)

791

rawsizedistance = max(textlen - revsize, 0)

792

# use an estimate of the compression upper bound.

792

# use an estimate of the compression upper bound.

793

lowestrealisticdeltalen = rawsizedistance // maxcomp

793

lowestrealisticdeltalen = rawsizedistance // maxcomp

794

795

# check the absolute constraint on the delta size

795

# check the absolute constraint on the delta size

796

snapshotlimit = textlen >> snapshotdepth

796

snapshotlimit = textlen >> snapshotdepth

797

if snapshotlimit < lowestrealisticdeltalen:

797

if snapshotlimit < lowestrealisticdeltalen:

798

# delta lower bound is larger than accepted upper bound

798

# delta lower bound is larger than accepted upper bound

799

tested.add(rev)

799

tested.add(rev)

800

continue

800

continue

801

802

# check the relative constraint on the delta size

802

# check the relative constraint on the delta size

803

revlength = revlog.length(rev)

803

revlength = revlog.length(rev)

804

if revlength < lowestrealisticdeltalen:

804

if revlength < lowestrealisticdeltalen:

805

# delta probable lower bound is larger than target base

805

# delta probable lower bound is larger than target base

806

tested.add(rev)

806

tested.add(rev)

807

continue

807

continue

808

809

group.append(rev)

809

group.append(rev)

810

if group:

810

if group:

811

# When the size of the candidate group is big, it can result in a

811

# When the size of the candidate group is big, it can result in a

812

# quite significant performance impact. To reduce this, we can send

812

# quite significant performance impact. To reduce this, we can send

813

# them in smaller batches until the new batch does not provide any

813

# them in smaller batches until the new batch does not provide any

814

# improvements.

814

# improvements.

815

#

815

#

816

# This might reduce the overall efficiency of the compression in

816

# This might reduce the overall efficiency of the compression in

817

# some corner cases, but that should also prevent very pathological

817

# some corner cases, but that should also prevent very pathological

818

# cases from being an issue. (eg. 20 000 candidates).

818

# cases from being an issue. (eg. 20 000 candidates).

819

#

819

#

820

# XXX note that the ordering of the group becomes important as it

820

# XXX note that the ordering of the group becomes important as it

821

# now impacts the final result. The current order is unprocessed

821

# now impacts the final result. The current order is unprocessed

822

# and can be improved.

822

# and can be improved.

823

if group_chunk_size == 0:

823

if group_chunk_size == 0:

824

tested.update(group)

824

tested.update(group)

825

good = yield tuple(group)

825

good = yield tuple(group)

826

else:

826

else:

827

prev_good = good

827

prev_good = good

828

for start in range(0, len(group), group_chunk_size):

828

for start in range(0, len(group), group_chunk_size):

829

sub_group = group[start : start + group_chunk_size]

829

sub_group = group[start : start + group_chunk_size]

830

tested.update(sub_group)

830

tested.update(sub_group)

831

good = yield tuple(sub_group)

831

good = yield tuple(sub_group)

832

if prev_good == good:

832

if prev_good == good:

833

break

833

break

834

835

yield None

835

yield None

836

837

838

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

838

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

839

good = None

839

good = None

840

# First we try to reuse a the delta contained in the bundle.

840

# First we try to reuse a the delta contained in the bundle.

841

# (or from the source revlog)

841

# (or from the source revlog)

842

#

842

#

843

# This logic only applies to general delta repositories and can be disabled

843

# This logic only applies to general delta repositories and can be disabled

844

# through configuration. Disabling reuse source delta is useful when

844

# through configuration. Disabling reuse source delta is useful when

845

# we want to make sure we recomputed "optimal" deltas.

845

# we want to make sure we recomputed "optimal" deltas.

846

debug_info = None

846

debug_info = None

847

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

847

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

848

# Assume what we received from the server is a good choice

848

# Assume what we received from the server is a good choice

849

# build delta will reuse the cache

849

# build delta will reuse the cache

850

if debug_info is not None:

850

if debug_info is not None:

851

debug_info['cached-delta.tested'] += 1

851

debug_info['cached-delta.tested'] += 1

852

good = yield (cachedelta[0],)

852

good = yield (cachedelta[0],)

853

if good is not None:

853

if good is not None:

854

if debug_info is not None:

854

if debug_info is not None:

855

debug_info['cached-delta.accepted'] += 1

855

debug_info['cached-delta.accepted'] += 1

856

yield None

856

yield None

857

return

857

return

858

if snapshot_cache is None:

858

if snapshot_cache is None:

859

snapshot_cache = SnapshotCache()

859

snapshot_cache = SnapshotCache()

860

groups = _rawgroups(

860

groups = _rawgroups(

861

revlog,

861

revlog,

862

p1,

862

p1,

863

p2,

863

p2,

864

cachedelta,

864

cachedelta,

865

snapshot_cache,

865

snapshot_cache,

866

)

866

)

867

for candidates in groups:

867

for candidates in groups:

868

good = yield candidates

868

good = yield candidates

869

if good is not None:

869

if good is not None:

870

break

870

break

871

872

# If sparse revlog is enabled, we can try to refine the available deltas

872

# If sparse revlog is enabled, we can try to refine the available deltas

873

if not revlog._sparserevlog:

873

if not revlog._sparserevlog:

874

yield None

874

yield None

875

return

875

return

876

877

# if we have a refinable value, try to refine it

877

# if we have a refinable value, try to refine it

878

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

878

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

879

# refine snapshot down

879

# refine snapshot down

880

previous = None

880

previous = None

881

while previous != good:

881

while previous != good:

882

previous = good

882

previous = good

883

base = revlog.deltaparent(good)

883

base = revlog.deltaparent(good)

884

if base == nullrev:

884

if base == nullrev:

885

break

885

break

886

good = yield (base,)

886

good = yield (base,)

887

# refine snapshot up

887

# refine snapshot up

888

if not snapshot_cache.snapshots:

888

if not snapshot_cache.snapshots:

889

snapshot_cache.update(revlog, good + 1)

889

snapshot_cache.update(revlog, good + 1)

890

previous = None

890

previous = None

891

while good != previous:

891

while good != previous:

892

previous = good

892

previous = good

893

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

893

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

894

good = yield children

894

good = yield children

895

896

if debug_info is not None:

896

if debug_info is not None:

897

if good is None:

897

if good is None:

898

debug_info['no-solution'] += 1

898

debug_info['no-solution'] += 1

899

900

yield None

900

yield None

901

902

903

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

903

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

904

"""Provides group of revision to be tested as delta base

904

"""Provides group of revision to be tested as delta base

905

906

This lower level function focus on emitting delta theorically interresting

906

This lower level function focus on emitting delta theorically interresting

907

without looking it any practical details.

907

without looking it any practical details.

908

909

The group order aims at providing fast or small candidates first.

909

The group order aims at providing fast or small candidates first.

910

"""

910

"""

911

# Why search for delta base if we cannot use a delta base ?

911

# Why search for delta base if we cannot use a delta base ?

912

assert revlog._generaldelta

912

assert revlog._generaldelta

913

# also see issue6056

913

# also see issue6056

914

sparse = revlog._sparserevlog

914

sparse = revlog._sparserevlog

915

curr = len(revlog)

915

curr = len(revlog)

916

prev = curr - 1

916

prev = curr - 1

917

deltachain = lambda rev: revlog._deltachain(rev)[0]

917

deltachain = lambda rev: revlog._deltachain(rev)[0]

918

919

# exclude already lazy tested base if any

919

# exclude already lazy tested base if any

920

parents = [p for p in (p1, p2) if p != nullrev]

920

parents = [p for p in (p1, p2) if p != nullrev]

921

922

if not revlog._deltabothparents and len(parents) == 2:

922

if not revlog._deltabothparents and len(parents) == 2:

923

parents.sort()

923

parents.sort()

924

# To minimize the chance of having to build a fulltext,

924

# To minimize the chance of having to build a fulltext,

925

# pick first whichever parent is closest to us (max rev)

925

# pick first whichever parent is closest to us (max rev)

926

yield (parents[1],)

926

yield (parents[1],)

927

# then the other one (min rev) if the first did not fit

927

# then the other one (min rev) if the first did not fit

928

yield (parents[0],)

928

yield (parents[0],)

929

elif len(parents) > 0:

929

elif len(parents) > 0:

930

# Test all parents (1 or 2), and keep the best candidate

930

# Test all parents (1 or 2), and keep the best candidate

931

yield parents

931

yield parents

932

933

if sparse and parents:

933

if sparse and parents:

934

if snapshot_cache is None:

934

if snapshot_cache is None:

935

# map: base-rev: [snapshot-revs]

935

# map: base-rev: [snapshot-revs]

936

snapshot_cache = SnapshotCache()

936

snapshot_cache = SnapshotCache()

937

# See if we can use an existing snapshot in the parent chains to use as

937

# See if we can use an existing snapshot in the parent chains to use as

938

# a base for a new intermediate-snapshot

938

# a base for a new intermediate-snapshot

939

#

939

#

940

# search for snapshot in parents delta chain

940

# search for snapshot in parents delta chain

941

# map: snapshot-level: snapshot-rev

941

# map: snapshot-level: snapshot-rev

942

parents_snaps = collections.defaultdict(set)

942

parents_snaps = collections.defaultdict(set)

943

candidate_chains = [deltachain(p) for p in parents]

943

candidate_chains = [deltachain(p) for p in parents]

944

for chain in candidate_chains:

944

for chain in candidate_chains:

945

for idx, s in enumerate(chain):

945

for idx, s in enumerate(chain):

946

if not revlog.issnapshot(s):

946

if not revlog.issnapshot(s):

947

break

947

break

948

parents_snaps[idx].add(s)

948

parents_snaps[idx].add(s)

949

snapfloor = min(parents_snaps[0]) + 1

949

snapfloor = min(parents_snaps[0]) + 1

950

snapshot_cache.update(revlog, snapfloor)

950

snapshot_cache.update(revlog, snapfloor)

951

# search for the highest "unrelated" revision

951

# search for the highest "unrelated" revision

952

#

952

#

953

# Adding snapshots used by "unrelated" revision increase the odd we

953

# Adding snapshots used by "unrelated" revision increase the odd we

954

# reuse an independant, yet better snapshot chain.

954

# reuse an independant, yet better snapshot chain.

955

#

955

#

956

# XXX instead of building a set of revisions, we could lazily enumerate

956

# XXX instead of building a set of revisions, we could lazily enumerate

957

# over the chains. That would be more efficient, however we stick to

957

# over the chains. That would be more efficient, however we stick to

958

# simple code for now.

958

# simple code for now.

959

all_revs = set()

959

all_revs = set()

960

for chain in candidate_chains:

960

for chain in candidate_chains:

961

all_revs.update(chain)

961

all_revs.update(chain)

962

other = None

962

other = None

963

for r in revlog.revs(prev, snapfloor):

963

for r in revlog.revs(prev, snapfloor):

964

if r not in all_revs:

964

if r not in all_revs:

965

other = r

965

other = r

966

break

966

break

967

if other is not None:

967

if other is not None:

968

# To avoid unfair competition, we won't use unrelated intermediate

968

# To avoid unfair competition, we won't use unrelated intermediate

969

# snapshot that are deeper than the ones from the parent delta

969

# snapshot that are deeper than the ones from the parent delta

970

# chain.

970

# chain.

971

max_depth = max(parents_snaps.keys())

971

max_depth = max(parents_snaps.keys())

972

chain = deltachain(other)

972

chain = deltachain(other)

973

for depth, s in enumerate(chain):

973

for depth, s in enumerate(chain):

974

if s < snapfloor:

974

if s < snapfloor:

975

continue

975

continue

976

if max_depth < depth:

976

if max_depth < depth:

977

break

977

break

978

if not revlog.issnapshot(s):

978

if not revlog.issnapshot(s):

979

break

979

break

980

parents_snaps[depth].add(s)

980

parents_snaps[depth].add(s)

981

# Test them as possible intermediate snapshot base

981

# Test them as possible intermediate snapshot base

982

# We test them from highest to lowest level. High level one are more

982

# We test them from highest to lowest level. High level one are more

983

# likely to result in small delta

983

# likely to result in small delta

984

floor = None

984

floor = None

985

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

985

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

986

siblings = set()

986

siblings = set()

987

for s in snaps:

987

for s in snaps:

988

siblings.update(snapshot_cache.snapshots[s])

988

siblings.update(snapshot_cache.snapshots[s])

989

# Before considering making a new intermediate snapshot, we check

989

# Before considering making a new intermediate snapshot, we check

990

# if an existing snapshot, children of base we consider, would be

990

# if an existing snapshot, children of base we consider, would be

991

# suitable.

991

# suitable.

992

#

992

#

993

# It give a change to reuse a delta chain "unrelated" to the

993

# It give a change to reuse a delta chain "unrelated" to the

994

# current revision instead of starting our own. Without such

994

# current revision instead of starting our own. Without such

995

# re-use, topological branches would keep reopening new chains.

995

# re-use, topological branches would keep reopening new chains.

996

# Creating more and more snapshot as the repository grow.

996

# Creating more and more snapshot as the repository grow.

997

998

if floor is not None:

998

if floor is not None:

999

# We only do this for siblings created after the one in our

999

# We only do this for siblings created after the one in our

1000

# parent's delta chain. Those created before has less chances

1000

# parent's delta chain. Those created before has less chances

1001

# to be valid base since our ancestors had to create a new

1001

# to be valid base since our ancestors had to create a new

1002

# snapshot.

1002

# snapshot.

1003

siblings = [r for r in siblings if floor < r]

1003

siblings = [r for r in siblings if floor < r]

1004

yield tuple(sorted(siblings))

1004

yield tuple(sorted(siblings))

1005

# then test the base from our parent's delta chain.

1005

# then test the base from our parent's delta chain.

1006

yield tuple(sorted(snaps))

1006

yield tuple(sorted(snaps))

1007

floor = min(snaps)

1007

floor = min(snaps)

1008

# No suitable base found in the parent chain, search if any full

1008

# No suitable base found in the parent chain, search if any full

1009

# snapshots emitted since parent's base would be a suitable base for an

1009

# snapshots emitted since parent's base would be a suitable base for an

1010

# intermediate snapshot.

1010

# intermediate snapshot.

1011

#

1011

#

1012

# It give a chance to reuse a delta chain unrelated to the current

1012

# It give a chance to reuse a delta chain unrelated to the current

1013

# revisions instead of starting our own. Without such re-use,

1013

# revisions instead of starting our own. Without such re-use,

1014

# topological branches would keep reopening new full chains. Creating

1014

# topological branches would keep reopening new full chains. Creating

1015

# more and more snapshot as the repository grow.

1015

# more and more snapshot as the repository grow.

1016

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1016

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1017

yield tuple(sorted(full))

1017

yield tuple(sorted(full))

1018

1019

if not sparse:

1019

if not sparse:

1020

# other approach failed try against prev to hopefully save us a

1020

# other approach failed try against prev to hopefully save us a

1021

# fulltext.

1021

# fulltext.

1022

yield (prev,)

1022

yield (prev,)

1023

1024

1025

class SnapshotCache:

1025

class SnapshotCache:

1026

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1026

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1027

1028

def __init__(self):

1028

def __init__(self):

1029

self.snapshots = collections.defaultdict(set)

1029

self.snapshots = collections.defaultdict(set)

1030

self._start_rev = None

1030

self._start_rev = None

1031

self._end_rev = None

1031

self._end_rev = None

1032

1033

def update(self, revlog, start_rev=0):

1033

def update(self, revlog, start_rev=0):

1034

"""find snapshots from start_rev to tip"""

1034

"""find snapshots from start_rev to tip"""

1035

nb_revs = len(revlog)

1035

nb_revs = len(revlog)

1036

end_rev = nb_revs - 1

1036

end_rev = nb_revs - 1

1037

if start_rev > end_rev:

1037

if start_rev > end_rev:

1038

return # range is empty

1038

return # range is empty

1039

1040

if self._start_rev is None:

1040

if self._start_rev is None:

1041

assert self._end_rev is None

1041

assert self._end_rev is None

1042

self._update(revlog, start_rev, end_rev)

1042

self._update(revlog, start_rev, end_rev)

1043

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1043

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1044

if start_rev < self._start_rev:

1044

if start_rev < self._start_rev:

1045

self._update(revlog, start_rev, self._start_rev - 1)

1045

self._update(revlog, start_rev, self._start_rev - 1)

1046

if self._end_rev < end_rev:

1046

if self._end_rev < end_rev:

1047

self._update(revlog, self._end_rev + 1, end_rev)

1047

self._update(revlog, self._end_rev + 1, end_rev)

1048

1049

if self._start_rev is None:

1049

if self._start_rev is None:

1050

assert self._end_rev is None

1050

assert self._end_rev is None

1051

self._end_rev = end_rev

1051

self._end_rev = end_rev

1052

self._start_rev = start_rev

1052

self._start_rev = start_rev

1053

else:

1053

else:

1054

self._start_rev = min(self._start_rev, start_rev)

1054

self._start_rev = min(self._start_rev, start_rev)

1055

self._end_rev = max(self._end_rev, end_rev)

1055

self._end_rev = max(self._end_rev, end_rev)

1056

assert self._start_rev <= self._end_rev, (

1056

assert self._start_rev <= self._end_rev, (

1057

self._start_rev,

1057

self._start_rev,

1058

self._end_rev,

1058

self._end_rev,

1059

)

1059

)

1060

1061

def _update(self, revlog, start_rev, end_rev):

1061

def _update(self, revlog, start_rev, end_rev):

1062

"""internal method that actually do update content"""

1062

"""internal method that actually do update content"""

1063

assert self._start_rev is None or (

1063

assert self._start_rev is None or (

1064

start_rev < self._start_rev or start_rev > self._end_rev

1064

start_rev < self._start_rev or start_rev > self._end_rev

1065

), (self._start_rev, self._end_rev, start_rev, end_rev)

1065

), (self._start_rev, self._end_rev, start_rev, end_rev)

1066

assert self._start_rev is None or (

1066

assert self._start_rev is None or (

1067

end_rev < self._start_rev or end_rev > self._end_rev

1067

end_rev < self._start_rev or end_rev > self._end_rev

1068

), (self._start_rev, self._end_rev, start_rev, end_rev)

1068

), (self._start_rev, self._end_rev, start_rev, end_rev)

1069

cache = self.snapshots

1069

cache = self.snapshots

1070

if util.safehasattr(revlog.index, b'findsnapshots'):

1070

if util.safehasattr(revlog.index, b'findsnapshots'):

1071

revlog.index.findsnapshots(cache, start_rev, end_rev)

1071

revlog.index.findsnapshots(cache, start_rev, end_rev)

1072

else:

1072

else:

1073

deltaparent = revlog.deltaparent

1073

deltaparent = revlog.deltaparent

1074

issnapshot = revlog.issnapshot

1074

issnapshot = revlog.issnapshot

1075

for rev in revlog.revs(start_rev, end_rev):

1075

for rev in revlog.revs(start_rev, end_rev):

1076

if issnapshot(rev):

1076

if issnapshot(rev):

1077

cache[deltaparent(rev)].add(rev)

1077

cache[deltaparent(rev)].add(rev)

1078

1079

1080

class deltacomputer:

1080

class deltacomputer:

1081

def __init__(

1081

def __init__(

1082

self,

1082

self,

1083

revlog,

1083

revlog,

1084

write_debug=None,

1084

write_debug=None,

1085

debug_search=False,

1085

debug_search=False,

1086

debug_info=None,

1086

debug_info=None,

1087

):

1087

):

1088

self.revlog = revlog

1088

self.revlog = revlog

1089

self._write_debug = write_debug

1089

self._write_debug = write_debug

1090

self._debug_search = debug_search

1090

self._debug_search = debug_search

1091

self._debug_info = debug_info

1091

self._debug_info = debug_info

1092

self._snapshot_cache = SnapshotCache()

1092

self._snapshot_cache = SnapshotCache()

1093

1094

@property

1095

def _gather_debug(self):

1096

return self._write_debug is not None or self._debug_info is not None

1097

1094

def buildtext(self, revinfo, fh):

1098

def buildtext(self, revinfo, fh):

1095

"""Builds a fulltext version of a revision

1099

"""Builds a fulltext version of a revision

1096

1100

1097

revinfo: revisioninfo instance that contains all needed info

1101

revinfo: revisioninfo instance that contains all needed info

1098

fh: file handle to either the .i or the .d revlog file,

1102

fh: file handle to either the .i or the .d revlog file,

1099

depending on whether it is inlined or not

1103

depending on whether it is inlined or not

1100

"""

1104

"""

1101

btext = revinfo.btext

1105

btext = revinfo.btext

1102

if btext[0] is not None:

1106

if btext[0] is not None:

1103

return btext[0]

1107

return btext[0]

1104

1108

1105

revlog = self.revlog

1109

revlog = self.revlog

1106

cachedelta = revinfo.cachedelta

1110

cachedelta = revinfo.cachedelta

1107

baserev = cachedelta[0]

1111

baserev = cachedelta[0]

1108

delta = cachedelta[1]

1112

delta = cachedelta[1]

1109

1113

1110

fulltext = btext[0] = _textfromdelta(

1114

fulltext = btext[0] = _textfromdelta(

1111

fh,

1115

fh,

1112

revlog,

1116

revlog,

1113

baserev,

1117

baserev,

1114

delta,

1118

delta,

1115

revinfo.p1,

1119

revinfo.p1,

1116

revinfo.p2,

1120

revinfo.p2,

1117

revinfo.flags,

1121

revinfo.flags,

1118

revinfo.node,

1122

revinfo.node,

1119

)

1123

)

1120

return fulltext

1124

return fulltext

1121

1125

1122

def _builddeltadiff(self, base, revinfo, fh):

1126

def _builddeltadiff(self, base, revinfo, fh):

1123

revlog = self.revlog

1127

revlog = self.revlog

1124

t = self.buildtext(revinfo, fh)

1128

t = self.buildtext(revinfo, fh)

1125

if revlog.iscensored(base):

1129

if revlog.iscensored(base):

1126

# deltas based on a censored revision must replace the

1130

# deltas based on a censored revision must replace the

1127

# full content in one patch, so delta works everywhere

1131

# full content in one patch, so delta works everywhere

1128

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1132

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1129

delta = header + t

1133

delta = header + t

1130

else:

1134

else:

1131

ptext = revlog.rawdata(base, _df=fh)

1135

ptext = revlog.rawdata(base, _df=fh)

1132

delta = mdiff.textdiff(ptext, t)

1136

delta = mdiff.textdiff(ptext, t)

1133

1137

1134

return delta

1138

return delta

1135

1139

1136

def _builddeltainfo(self, revinfo, base, fh, target_rev=None):

1140

def _builddeltainfo(self, revinfo, base, fh, target_rev=None):

1137

# can we use the cached delta?

1141

# can we use the cached delta?

1138

revlog = self.revlog

1142

revlog = self.revlog

1139

debug_search = self._write_debug is not None and self._debug_search

1143

debug_search = self._write_debug is not None and self._debug_search

1140

chainbase = revlog.chainbase(base)

1144

chainbase = revlog.chainbase(base)

1141

if revlog._generaldelta:

1145

if revlog._generaldelta:

1142

deltabase = base

1146

deltabase = base

1143

else:

1147

else:

1144

if target_rev is not None and base != target_rev - 1:

1148

if target_rev is not None and base != target_rev - 1:

1145

msg = (

1149

msg = (

1146

b'general delta cannot use delta for something else '

1150

b'general delta cannot use delta for something else '

1147

b'than `prev`: %d<-%d'

1151

b'than `prev`: %d<-%d'

1148

)

1152

)

1149

msg %= (base, target_rev)

1153

msg %= (base, target_rev)

1150

raise error.ProgrammingError(msg)

1154

raise error.ProgrammingError(msg)

1151

deltabase = chainbase

1155

deltabase = chainbase

1152

snapshotdepth = None

1156

snapshotdepth = None

1153

if revlog._sparserevlog and deltabase == nullrev:

1157

if revlog._sparserevlog and deltabase == nullrev:

1154

snapshotdepth = 0

1158

snapshotdepth = 0

1155

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1159

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1156

# A delta chain should always be one full snapshot,

1160

# A delta chain should always be one full snapshot,

1157

# zero or more semi-snapshots, and zero or more deltas

1161

# zero or more semi-snapshots, and zero or more deltas

1158

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1162

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1159

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1163

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1160

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1164

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1161

delta = None

1165

delta = None

1162

if revinfo.cachedelta:

1166

if revinfo.cachedelta:

1163

cachebase = revinfo.cachedelta[0]

1167

cachebase = revinfo.cachedelta[0]

1164

# check if the diff still apply

1168

# check if the diff still apply

1165

currentbase = cachebase

1169

currentbase = cachebase

1166

while (

1170

while (

1167

currentbase != nullrev

1171

currentbase != nullrev

1168

and currentbase != base

1172

and currentbase != base

1169

and self.revlog.length(currentbase) == 0

1173

and self.revlog.length(currentbase) == 0

1170

):

1174

):

1171

currentbase = self.revlog.deltaparent(currentbase)

1175

currentbase = self.revlog.deltaparent(currentbase)

1172

if self.revlog._lazydelta and currentbase == base:

1176

if self.revlog._lazydelta and currentbase == base:

1173

delta = revinfo.cachedelta[1]

1177

delta = revinfo.cachedelta[1]

1174

if delta is None:

1178

if delta is None:

1175

delta = self._builddeltadiff(base, revinfo, fh)

1179

delta = self._builddeltadiff(base, revinfo, fh)

1176

if debug_search:

1180

if debug_search:

1177

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1181

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1178

msg %= len(delta)

1182

msg %= len(delta)

1179

self._write_debug(msg)

1183

self._write_debug(msg)

1180

# snapshotdept need to be neither None nor 0 level snapshot

1184

# snapshotdept need to be neither None nor 0 level snapshot

1181

if revlog.upperboundcomp is not None and snapshotdepth:

1185

if revlog.upperboundcomp is not None and snapshotdepth:

1182

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1186

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1183

snapshotlimit = revinfo.textlen >> snapshotdepth

1187

snapshotlimit = revinfo.textlen >> snapshotdepth

1184

if debug_search:

1188

if debug_search:

1185

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1189

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1186

msg %= lowestrealisticdeltalen

1190

msg %= lowestrealisticdeltalen

1187

self._write_debug(msg)

1191

self._write_debug(msg)

1188

if snapshotlimit < lowestrealisticdeltalen:

1192

if snapshotlimit < lowestrealisticdeltalen:

1189

if debug_search:

1193

if debug_search:

1190

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1194

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1191

self._write_debug(msg)

1195

self._write_debug(msg)

1192

return None

1196

return None

1193

if revlog.length(base) < lowestrealisticdeltalen:

1197

if revlog.length(base) < lowestrealisticdeltalen:

1194

if debug_search:

1198

if debug_search:

1195

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1199

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1196

self._write_debug(msg)

1200

self._write_debug(msg)

1197

return None

1201

return None

1198

header, data = revlog.compress(delta)

1202

header, data = revlog.compress(delta)

1199

deltalen = len(header) + len(data)

1203

deltalen = len(header) + len(data)

1200

offset = revlog.end(len(revlog) - 1)

1204

offset = revlog.end(len(revlog) - 1)

1201

dist = deltalen + offset - revlog.start(chainbase)

1205

dist = deltalen + offset - revlog.start(chainbase)

1202

chainlen, compresseddeltalen = revlog._chaininfo(base)

1206

chainlen, compresseddeltalen = revlog._chaininfo(base)

1203

chainlen += 1

1207

chainlen += 1

1204

compresseddeltalen += deltalen

1208

compresseddeltalen += deltalen

1205

1209

1206

return _deltainfo(

1210

return _deltainfo(

1207

dist,

1211

dist,

1208

deltalen,

1212

deltalen,

1209

(header, data),

1213

(header, data),

1210

deltabase,

1214

deltabase,

1211

chainbase,

1215

chainbase,

1212

chainlen,

1216

chainlen,

1213

compresseddeltalen,

1217

compresseddeltalen,

1214

snapshotdepth,

1218

snapshotdepth,

1215

)

1219

)

1216

1220

1217

def _fullsnapshotinfo(self, fh, revinfo, curr):

1221

def _fullsnapshotinfo(self, fh, revinfo, curr):

1218

rawtext = self.buildtext(revinfo, fh)

1222

rawtext = self.buildtext(revinfo, fh)

1219

data = self.revlog.compress(rawtext)

1223

data = self.revlog.compress(rawtext)

1220

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1224

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1221

deltabase = chainbase = curr

1225

deltabase = chainbase = curr

1222

snapshotdepth = 0

1226

snapshotdepth = 0

1223

chainlen = 1

1227

chainlen = 1

1224

1228

1225

return _deltainfo(

1229

return _deltainfo(

1226

dist,

1230

dist,

1227

deltalen,

1231

deltalen,

1228

data,

1232

data,

1229

deltabase,

1233

deltabase,

1230

chainbase,

1234

chainbase,

1231

chainlen,

1235

chainlen,

1232

compresseddeltalen,

1236

compresseddeltalen,

1233

snapshotdepth,

1237

snapshotdepth,

1234

)

1238

)

1235

1239

1236

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1240

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1237

"""Find an acceptable delta against a candidate revision

1241

"""Find an acceptable delta against a candidate revision

1238

1242

1239

revinfo: information about the revision (instance of _revisioninfo)

1243

revinfo: information about the revision (instance of _revisioninfo)

1240

fh: file handle to either the .i or the .d revlog file,

1244

fh: file handle to either the .i or the .d revlog file,

1241

depending on whether it is inlined or not

1245

depending on whether it is inlined or not

1242

1246

1243

Returns the first acceptable candidate revision, as ordered by

1247

Returns the first acceptable candidate revision, as ordered by

1244

_candidategroups

1248

_candidategroups

1245

1249

1246

If no suitable deltabase is found, we return delta info for a full

1250

If no suitable deltabase is found, we return delta info for a full

1247

snapshot.

1251

snapshot.

1248

1252

1249

`excluded_bases` is an optional set of revision that cannot be used as

1253

`excluded_bases` is an optional set of revision that cannot be used as

1250

a delta base. Use this to recompute delta suitable in censor or strip

1254

a delta base. Use this to recompute delta suitable in censor or strip

1251

context.

1255

context.

1252

"""

1256

"""

1253

if target_rev is None:

1257

if target_rev is None:

1254

target_rev = len(self.revlog)

1258

target_rev = len(self.revlog)

1255

1259

1256

if not revinfo.textlen:

1260

if not revinfo.textlen:

1257

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1261

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1258

1262

1259

if excluded_bases is None:

1263

if excluded_bases is None:

1260

excluded_bases = set()

1264

excluded_bases = set()

1261

1265

1262

# no delta for flag processor revision (see "candelta" for why)

1266

# no delta for flag processor revision (see "candelta" for why)

1263

# not calling candelta since only one revision needs test, also to

1267

# not calling candelta since only one revision needs test, also to

1264

# avoid overhead fetching flags again.

1268

# avoid overhead fetching flags again.

1265

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1269

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1266

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1270

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1267

1271

1268

gather_debug = (

1269

self._write_debug is not None or self._debug_info is not None

1270

)

1271

debug_search = self._write_debug is not None and self._debug_search

1272

debug_search = self._write_debug is not None and self._debug_search

1273

gather_debug = self._gather_debug

1272

1274

1273

if gather_debug:

1275

if gather_debug:

1274

start = util.timer()

1276

start = util.timer()

1275

1277

1276

# count the number of different delta we tried (for debug purpose)

1278

# count the number of different delta we tried (for debug purpose)

1277

dbg_try_count = 0

1279

dbg_try_count = 0

1278

# count the number of "search round" we did. (for debug purpose)

1280

# count the number of "search round" we did. (for debug purpose)

1279

dbg_try_rounds = 0

1281

dbg_try_rounds = 0

1280

dbg_type = b'unknown'

1282

dbg_type = b'unknown'

1281

1283

1282

cachedelta = revinfo.cachedelta

1284

cachedelta = revinfo.cachedelta

1283

p1 = revinfo.p1

1285

p1 = revinfo.p1

1284

p2 = revinfo.p2

1286

p2 = revinfo.p2

1285

revlog = self.revlog

1287

revlog = self.revlog

1286

1288

1287

deltainfo = None

1289

deltainfo = None

1288

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1290

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1289

1291

1290

if gather_debug:

1292

if gather_debug:

1291

if p1r != nullrev:

1293

if p1r != nullrev:

1292

p1_chain_len = revlog._chaininfo(p1r)[0]

1294

p1_chain_len = revlog._chaininfo(p1r)[0]

1293

else:

1295

else:

1294

p1_chain_len = -1

1296

p1_chain_len = -1

1295

if p2r != nullrev:

1297

if p2r != nullrev:

1296

p2_chain_len = revlog._chaininfo(p2r)[0]

1298

p2_chain_len = revlog._chaininfo(p2r)[0]

1297

else:

1299

else:

1298

p2_chain_len = -1

1300

p2_chain_len = -1

1299

if debug_search:

1301

if debug_search:

1300

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1302

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1301

msg %= target_rev

1303

msg %= target_rev

1302

self._write_debug(msg)

1304

self._write_debug(msg)

1303

1305

1304

groups = _candidategroups(

1306

groups = _candidategroups(

1305

self.revlog,

1307

self.revlog,

1306

revinfo.textlen,

1308

revinfo.textlen,

1307

p1r,

1309

p1r,

1308

p2r,

1310

p2r,

1309

cachedelta,

1311

cachedelta,

1310

excluded_bases,

1312

excluded_bases,

1311

target_rev,

1313

target_rev,

1312

snapshot_cache=self._snapshot_cache,

1314

snapshot_cache=self._snapshot_cache,

1313

)

1315

)

1314

candidaterevs = next(groups)

1316

candidaterevs = next(groups)

1315

while candidaterevs is not None:

1317

while candidaterevs is not None:

1316

dbg_try_rounds += 1

1318

dbg_try_rounds += 1

1317

if debug_search:

1319

if debug_search:

1318

prev = None

1320

prev = None

1319

if deltainfo is not None:

1321

if deltainfo is not None:

1320

prev = deltainfo.base

1322

prev = deltainfo.base

1321

1323

1322

if (

1324

if (

1323

cachedelta is not None

1325

cachedelta is not None

1324

and len(candidaterevs) == 1

1326

and len(candidaterevs) == 1

1325

and cachedelta[0] in candidaterevs

1327

and cachedelta[0] in candidaterevs

1326

):

1328

):

1327

round_type = b"cached-delta"

1329

round_type = b"cached-delta"

1328

elif p1 in candidaterevs or p2 in candidaterevs:

1330

elif p1 in candidaterevs or p2 in candidaterevs:

1329

round_type = b"parents"

1331

round_type = b"parents"

1330

elif prev is not None and all(c < prev for c in candidaterevs):

1332

elif prev is not None and all(c < prev for c in candidaterevs):

1331

round_type = b"refine-down"

1333

round_type = b"refine-down"

1332

elif prev is not None and all(c > prev for c in candidaterevs):

1334

elif prev is not None and all(c > prev for c in candidaterevs):

1333

round_type = b"refine-up"

1335

round_type = b"refine-up"

1334

else:

1336

else:

1335

round_type = b"search-down"

1337

round_type = b"search-down"

1336

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1338

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1337

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1339

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1338

self._write_debug(msg)

1340

self._write_debug(msg)

1339

nominateddeltas = []

1341

nominateddeltas = []

1340

if deltainfo is not None:

1342

if deltainfo is not None:

1341

if debug_search:

1343

if debug_search:

1342

msg = (

1344

msg = (

1343

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1345

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1344

)

1346

)

1345

msg %= (deltainfo.base, deltainfo.deltalen)

1347

msg %= (deltainfo.base, deltainfo.deltalen)

1346

self._write_debug(msg)

1348

self._write_debug(msg)

1347

# if we already found a good delta,

1349

# if we already found a good delta,

1348

# challenge it against refined candidates

1350

# challenge it against refined candidates

1349

nominateddeltas.append(deltainfo)

1351

nominateddeltas.append(deltainfo)

1350

for candidaterev in candidaterevs:

1352

for candidaterev in candidaterevs:

1351

if debug_search:

1353

if debug_search:

1352

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1354

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1353

msg %= candidaterev

1355

msg %= candidaterev

1354

self._write_debug(msg)

1356

self._write_debug(msg)

1355

candidate_type = None

1357

candidate_type = None

1356

if candidaterev == p1:

1358

if candidaterev == p1:

1357

candidate_type = b"p1"

1359

candidate_type = b"p1"

1358

elif candidaterev == p2:

1360

elif candidaterev == p2:

1359

candidate_type = b"p2"

1361

candidate_type = b"p2"

1360

elif self.revlog.issnapshot(candidaterev):

1362

elif self.revlog.issnapshot(candidaterev):

1361

candidate_type = b"snapshot-%d"

1363

candidate_type = b"snapshot-%d"

1362

candidate_type %= self.revlog.snapshotdepth(

1364

candidate_type %= self.revlog.snapshotdepth(

1363

candidaterev

1365

candidaterev

1364

)

1366

)

1365

1367

1366

if candidate_type is not None:

1368

if candidate_type is not None:

1367

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1369

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1368

msg %= candidate_type

1370

msg %= candidate_type

1369

self._write_debug(msg)

1371

self._write_debug(msg)

1370

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1372

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1371

msg %= self.revlog.length(candidaterev)

1373

msg %= self.revlog.length(candidaterev)

1372

self._write_debug(msg)

1374

self._write_debug(msg)

1373

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1375

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1374

msg %= self.revlog.deltaparent(candidaterev)

1376

msg %= self.revlog.deltaparent(candidaterev)

1375

self._write_debug(msg)

1377

self._write_debug(msg)

1376

1378

1377

dbg_try_count += 1

1379

dbg_try_count += 1

1378

1380

1379

if debug_search:

1381

if debug_search:

1380

delta_start = util.timer()

1382

delta_start = util.timer()

1381

candidatedelta = self._builddeltainfo(

1383

candidatedelta = self._builddeltainfo(

1382

revinfo,

1384

revinfo,

1383

candidaterev,

1385

candidaterev,

1384

fh,

1386

fh,

1385

target_rev=target_rev,

1387

target_rev=target_rev,

1386

)

1388

)

1387

if debug_search:

1389

if debug_search:

1388

delta_end = util.timer()

1390

delta_end = util.timer()

1389

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1391

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1390

msg %= delta_end - delta_start

1392

msg %= delta_end - delta_start

1391

self._write_debug(msg)

1393

self._write_debug(msg)

1392

if candidatedelta is not None:

1394

if candidatedelta is not None:

1393

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1395

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1394

if debug_search:

1396

if debug_search:

1395

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1397

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1396

msg %= candidatedelta.deltalen

1398

msg %= candidatedelta.deltalen

1397

self._write_debug(msg)

1399

self._write_debug(msg)

1398

nominateddeltas.append(candidatedelta)

1400

nominateddeltas.append(candidatedelta)

1399

elif debug_search:

1401

elif debug_search:

1400

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1402

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1401

msg %= candidatedelta.deltalen

1403

msg %= candidatedelta.deltalen

1402

self._write_debug(msg)

1404

self._write_debug(msg)

1403

elif debug_search:

1405

elif debug_search:

1404

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1406

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1405

self._write_debug(msg)

1407

self._write_debug(msg)

1406

if nominateddeltas:

1408

if nominateddeltas:

1407

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1409

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1408

if deltainfo is not None:

1410

if deltainfo is not None:

1409

candidaterevs = groups.send(deltainfo.base)

1411

candidaterevs = groups.send(deltainfo.base)

1410

else:

1412

else:

1411

candidaterevs = next(groups)

1413

candidaterevs = next(groups)

1412

1414

1413

if deltainfo is None:

1415

if deltainfo is None:

1414

dbg_type = b"full"

1416

dbg_type = b"full"

1415

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1417

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1416

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1418

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1417

dbg_type = b"snapshot"

1419

dbg_type = b"snapshot"

1418

else:

1420

else:

1419

dbg_type = b"delta"

1421

dbg_type = b"delta"

1420

1422

1421

if gather_debug:

1423

if gather_debug:

1422

end = util.timer()

1424

end = util.timer()

1423

if dbg_type == b'full':

1425

if dbg_type == b'full':

1424

used_cached = (

1426

used_cached = (

1425

cachedelta is not None

1427

cachedelta is not None

1426

and dbg_try_rounds == 0

1428

and dbg_try_rounds == 0

1427

and dbg_try_count == 0

1429

and dbg_try_count == 0

1428

and cachedelta[0] == nullrev

1430

and cachedelta[0] == nullrev

1429

)

1431

)

1430

else:

1432

else:

1431

used_cached = (

1433

used_cached = (

1432

cachedelta is not None

1434

cachedelta is not None

1433

and dbg_try_rounds == 1

1435

and dbg_try_rounds == 1

1434

and dbg_try_count == 1

1436

and dbg_try_count == 1

1435

and deltainfo.base == cachedelta[0]

1437

and deltainfo.base == cachedelta[0]

1436

)

1438

)

1437

dbg = {

1439

dbg = {

1438

'duration': end - start,

1440

'duration': end - start,

1439

'revision': target_rev,

1441

'revision': target_rev,

1440

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1442

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1441

'search_round_count': dbg_try_rounds,

1443

'search_round_count': dbg_try_rounds,

1442

'using-cached-base': used_cached,

1444

'using-cached-base': used_cached,

1443

'delta_try_count': dbg_try_count,

1445

'delta_try_count': dbg_try_count,

1444

'type': dbg_type,

1446

'type': dbg_type,

1445

'p1-chain-len': p1_chain_len,

1447

'p1-chain-len': p1_chain_len,

1446

'p2-chain-len': p2_chain_len,

1448

'p2-chain-len': p2_chain_len,

1447

}

1449

}

1448

if (

1450

if (

1449

deltainfo.snapshotdepth # pytype: disable=attribute-error

1451

deltainfo.snapshotdepth # pytype: disable=attribute-error

1450

is not None

1452

is not None

1451

):

1453

):

1452

dbg[

1454

dbg[

1453

'snapshot-depth'

1455

'snapshot-depth'

1454

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1456

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1455

else:

1457

else:

1456

dbg['snapshot-depth'] = 0

1458

dbg['snapshot-depth'] = 0

1457

target_revlog = b"UNKNOWN"

1459

target_revlog = b"UNKNOWN"

1458

target_type = self.revlog.target[0]

1460

target_type = self.revlog.target[0]

1459

target_key = self.revlog.target[1]

1461

target_key = self.revlog.target[1]

1460

if target_type == KIND_CHANGELOG:

1462

if target_type == KIND_CHANGELOG:

1461

target_revlog = b'CHANGELOG:'

1463

target_revlog = b'CHANGELOG:'

1462

elif target_type == KIND_MANIFESTLOG:

1464

elif target_type == KIND_MANIFESTLOG:

1463

target_revlog = b'MANIFESTLOG:'

1465

target_revlog = b'MANIFESTLOG:'

1464

if target_key:

1466

if target_key:

1465

target_revlog += b'%s:' % target_key

1467

target_revlog += b'%s:' % target_key

1466

elif target_type == KIND_FILELOG:

1468

elif target_type == KIND_FILELOG:

1467

target_revlog = b'FILELOG:'

1469

target_revlog = b'FILELOG:'

1468

if target_key:

1470

if target_key:

1469

target_revlog += b'%s:' % target_key

1471

target_revlog += b'%s:' % target_key

1470

dbg['target-revlog'] = target_revlog

1472

dbg['target-revlog'] = target_revlog

1471

1473

1472

if self._debug_info is not None:

1474

if self._debug_info is not None:

1473

self._debug_info.append(dbg)

1475

self._debug_info.append(dbg)

1474

1476

1475

if self._write_debug is not None:

1477

if self._write_debug is not None:

1476

msg = (

1478

msg = (

1477

b"DBG-DELTAS:"

1479

b"DBG-DELTAS:"

1478

b" %-12s"

1480

b" %-12s"

1479

b" rev=%d:"

1481

b" rev=%d:"

1480

b" delta-base=%d"

1482

b" delta-base=%d"

1481

b" is-cached=%d"

1483

b" is-cached=%d"

1482

b" - search-rounds=%d"

1484

b" - search-rounds=%d"

1483

b" try-count=%d"

1485

b" try-count=%d"

1484

b" - delta-type=%-6s"

1486

b" - delta-type=%-6s"

1485

b" snap-depth=%d"

1487

b" snap-depth=%d"

1486

b" - p1-chain-length=%d"

1488

b" - p1-chain-length=%d"

1487

b" p2-chain-length=%d"

1489

b" p2-chain-length=%d"

1488

b" - duration=%f"

1490

b" - duration=%f"

1489

b"\n"

1491

b"\n"

1490

)

1492

)

1491

msg %= (

1493

msg %= (

1492

dbg["target-revlog"],

1494

dbg["target-revlog"],

1493

dbg["revision"],

1495

dbg["revision"],

1494

dbg["delta-base"],

1496

dbg["delta-base"],

1495

dbg["using-cached-base"],

1497

dbg["using-cached-base"],

1496

dbg["search_round_count"],

1498

dbg["search_round_count"],

1497

dbg["delta_try_count"],

1499

dbg["delta_try_count"],

1498

dbg["type"],

1500

dbg["type"],

1499

dbg["snapshot-depth"],

1501

dbg["snapshot-depth"],

1500

dbg["p1-chain-len"],

1502

dbg["p1-chain-len"],

1501

dbg["p2-chain-len"],

1503

dbg["p2-chain-len"],

1502

dbg["duration"],

1504

dbg["duration"],

1503

)

1505

)

1504

self._write_debug(msg)

1506

self._write_debug(msg)

1505

return deltainfo

1507

return deltainfo

1506

1508

1507

1509

1508

def delta_compression(default_compression_header, deltainfo):

1510

def delta_compression(default_compression_header, deltainfo):

1509

"""return (COMPRESSION_MODE, deltainfo)

1511

"""return (COMPRESSION_MODE, deltainfo)

1510

1512

1511

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1513

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1512

compression.

1514

compression.

1513

"""

1515

"""

1514

h, d = deltainfo.data

1516

h, d = deltainfo.data

1515

compression_mode = COMP_MODE_INLINE

1517

compression_mode = COMP_MODE_INLINE

1516

if not h and not d:

1518

if not h and not d:

1517

# not data to store at all... declare them uncompressed

1519

# not data to store at all... declare them uncompressed

1518

compression_mode = COMP_MODE_PLAIN

1520

compression_mode = COMP_MODE_PLAIN

1519

elif not h:

1521

elif not h:

1520

t = d[0:1]

1522

t = d[0:1]

1521

if t == b'\0':

1523

if t == b'\0':

1522

compression_mode = COMP_MODE_PLAIN

1524

compression_mode = COMP_MODE_PLAIN

1523

elif t == default_compression_header:

1525

elif t == default_compression_header:

1524

compression_mode = COMP_MODE_DEFAULT

1526

compression_mode = COMP_MODE_DEFAULT

1525

elif h == b'u':

1527

elif h == b'u':

1526

# we have a more efficient way to declare uncompressed

1528

# we have a more efficient way to declare uncompressed

1527

h = b''

1529

h = b''

1528

compression_mode = COMP_MODE_PLAIN

1530

compression_mode = COMP_MODE_PLAIN

1529

deltainfo = drop_u_compression(deltainfo)

1531

deltainfo = drop_u_compression(deltainfo)

1530

return compression_mode, deltainfo

1532

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 if (
                     revinfo.cachedelta is not None
                     and deltainfo.base == revinfo.cachedelta[0]
                     and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                 ):
                     return True
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
                 snapshot_cache=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 if target_rev is None:
                     target_rev = len(revlog)
                 if not revlog._generaldelta:
                     # before general delta, there is only one possible delta base
                     yield (target_rev - 1,)
                     yield None
                     return
                 if (
                     cachedelta is not None
                     and nullrev == cachedelta[0]
                     and cachedelta[2] == DELTA_BASE_REUSE_FORCE
                 ):
                     # instruction are to forcibly do a full snapshot
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 group_chunk_size = revlog._candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache=snapshot_cache,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         if (
                             cachedelta is not None
                             and rev == cachedelta[0]
                             and cachedelta[2] == DELTA_BASE_REUSE_FORCE
                         ):
                             # instructions are to forcibly consider/use this delta base
                             group.append(rev)
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
             def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 if snapshot_cache is None:
                     snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 # Why search for delta base if we cannot use a delta base ?
                 assert revlog._generaldelta
                 # also see issue6056
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 # exclude already lazy tested base if any
                 parents = [p for p in (p1, p2) if p != nullrev]
                 if not revlog._deltabothparents and len(parents) == 2:
                     parents.sort()
                     # To minimize the chance of having to build a fulltext,
                     # pick first whichever parent is closest to us (max rev)
                     yield (parents[1],)
                     # then the other one (min rev) if the first did not fit
                     yield (parents[0],)
                 elif len(parents) > 0:
                     # Test all parents (1 or 2), and keep the best candidate
                     yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
                     yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if util.safehasattr(revlog.index, b'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
+                @property
+                def _gather_debug(self):
+                    return self._write_debug is not None or self._debug_info is not None
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         if target_rev is not None and base != target_rev - 1:
                             msg = (
                                 b'general delta cannot use delta for something else '
                                 b'than `prev`: %d<-%d'
                             )
                             msg %= (base, target_rev)
                             raise error.ProgrammingError(msg)
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
-                    gather_debug = (
-                        self._write_debug is not None or self._debug_info is not None
                     debug_search = self._write_debug is not None and self._debug_search
+                    gather_debug = self._gather_debug
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(
                                 revinfo,
                                 candidaterev,
                                 fh,
                                 target_rev=target_rev,
                             )
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo