upstream/mercurial-mirror Commit - r50511:4956942c

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

KIND_CHANGELOG,

23

KIND_CHANGELOG,

24

KIND_FILELOG,

24

KIND_FILELOG,

25

KIND_MANIFESTLOG,

25

KIND_MANIFESTLOG,

26

REVIDX_ISCENSORED,

26

REVIDX_ISCENSORED,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

)

28

)

29

30

from ..thirdparty import attr

30

from ..thirdparty import attr

31

32

from .. import (

32

from .. import (

33

error,

33

error,

34

mdiff,

34

mdiff,

35

util,

35

util,

36

)

36

)

37

38

from . import flagutil

38

from . import flagutil

39

40

# maximum <delta-chain-data>/<revision-text-length> ratio

40

# maximum <delta-chain-data>/<revision-text-length> ratio

41

LIMIT_DELTA2TEXT = 2

41

LIMIT_DELTA2TEXT = 2

42

43

44

class _testrevlog:

44

class _testrevlog:

45

"""minimalist fake revlog to use in doctests"""

45

"""minimalist fake revlog to use in doctests"""

46

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

"""data is an list of revision payload boundaries"""

48

"""data is an list of revision payload boundaries"""

49

self._data = data

49

self._data = data

50

self._srdensitythreshold = density

50

self._srdensitythreshold = density

51

self._srmingapsize = mingap

51

self._srmingapsize = mingap

52

self._snapshot = set(snapshot)

52

self._snapshot = set(snapshot)

53

self.index = None

53

self.index = None

54

55

def start(self, rev):

55

def start(self, rev):

56

if rev == nullrev:

56

if rev == nullrev:

57

return 0

57

return 0

58

if rev == 0:

58

if rev == 0:

59

return 0

59

return 0

60

return self._data[rev - 1]

60

return self._data[rev - 1]

61

62

def end(self, rev):

62

def end(self, rev):

63

if rev == nullrev:

63

if rev == nullrev:

64

return 0

64

return 0

65

return self._data[rev]

65

return self._data[rev]

66

67

def length(self, rev):

67

def length(self, rev):

68

return self.end(rev) - self.start(rev)

68

return self.end(rev) - self.start(rev)

69

70

def __len__(self):

70

def __len__(self):

71

return len(self._data)

71

return len(self._data)

72

73

def issnapshot(self, rev):

73

def issnapshot(self, rev):

74

if rev == nullrev:

74

if rev == nullrev:

75

return True

75

return True

76

return rev in self._snapshot

76

return rev in self._snapshot

77

78

79

def slicechunk(revlog, revs, targetsize=None):

79

def slicechunk(revlog, revs, targetsize=None):

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

81

82

``revs`` is sliced into groups that should be read in one time.

82

``revs`` is sliced into groups that should be read in one time.

83

Assume that revs are sorted.

83

Assume that revs are sorted.

84

85

The initial chunk is sliced until the overall density (payload/chunks-span

85

The initial chunk is sliced until the overall density (payload/chunks-span

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

87

`revlog._srmingapsize` is skipped.

87

`revlog._srmingapsize` is skipped.

88

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

90

For consistency with other slicing choice, this limit won't go lower than

90

For consistency with other slicing choice, this limit won't go lower than

91

`revlog._srmingapsize`.

91

`revlog._srmingapsize`.

92

93

If individual revisions chunk are larger than this limit, they will still

93

If individual revisions chunk are larger than this limit, they will still

94

be raised individually.

94

be raised individually.

95

96

>>> data = [

96

>>> data = [

97

... 5, #00 (5)

97

... 5, #00 (5)

98

... 10, #01 (5)

98

... 10, #01 (5)

99

... 12, #02 (2)

99

... 12, #02 (2)

100

... 12, #03 (empty)

100

... 12, #03 (empty)

101

... 27, #04 (15)

101

... 27, #04 (15)

102

... 31, #05 (4)

102

... 31, #05 (4)

103

... 31, #06 (empty)

103

... 31, #06 (empty)

104

... 42, #07 (11)

104

... 42, #07 (11)

105

... 47, #08 (5)

105

... 47, #08 (5)

106

... 47, #09 (empty)

106

... 47, #09 (empty)

107

... 48, #10 (1)

107

... 48, #10 (1)

108

... 51, #11 (3)

108

... 51, #11 (3)

109

... 74, #12 (23)

109

... 74, #12 (23)

110

... 85, #13 (11)

110

... 85, #13 (11)

111

... 86, #14 (1)

111

... 86, #14 (1)

112

... 91, #15 (5)

112

... 91, #15 (5)

113

... ]

113

... ]

114

>>> revlog = _testrevlog(data, snapshot=range(16))

114

>>> revlog = _testrevlog(data, snapshot=range(16))

115

116

>>> list(slicechunk(revlog, list(range(16))))

116

>>> list(slicechunk(revlog, list(range(16))))

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

118

>>> list(slicechunk(revlog, [0, 15]))

118

>>> list(slicechunk(revlog, [0, 15]))

119

[[0], [15]]

119

[[0], [15]]

120

>>> list(slicechunk(revlog, [0, 11, 15]))

120

>>> list(slicechunk(revlog, [0, 11, 15]))

121

[[0], [11], [15]]

121

[[0], [11], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

123

[[0], [11, 13, 15]]

123

[[0], [11, 13, 15]]

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

125

[[1, 2], [5, 8, 10, 11], [14]]

125

[[1, 2], [5, 8, 10, 11], [14]]

126

127

Slicing with a maximum chunk size

127

Slicing with a maximum chunk size

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

129

[[0], [11], [13], [15]]

129

[[0], [11], [13], [15]]

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

131

[[0], [11], [13, 15]]

131

[[0], [11], [13, 15]]

132

133

Slicing involving nullrev

133

Slicing involving nullrev

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

135

[[-1, 0], [11], [13, 15]]

135

[[-1, 0], [11], [13, 15]]

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

137

[[-1], [13], [15]]

137

[[-1], [13], [15]]

138

"""

138

"""

139

if targetsize is not None:

139

if targetsize is not None:

140

targetsize = max(targetsize, revlog._srmingapsize)

140

targetsize = max(targetsize, revlog._srmingapsize)

141

# targetsize should not be specified when evaluating delta candidates:

141

# targetsize should not be specified when evaluating delta candidates:

142

# * targetsize is used to ensure we stay within specification when reading,

142

# * targetsize is used to ensure we stay within specification when reading,

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

144

if densityslicing is None:

144

if densityslicing is None:

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

146

for chunk in densityslicing(

146

for chunk in densityslicing(

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

148

):

148

):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

150

yield subchunk

150

yield subchunk

151

152

153

def _slicechunktosize(revlog, revs, targetsize=None):

153

def _slicechunktosize(revlog, revs, targetsize=None):

154

"""slice revs to match the target size

154

"""slice revs to match the target size

155

156

This is intended to be used on chunk that density slicing selected by that

156

This is intended to be used on chunk that density slicing selected by that

157

are still too large compared to the read garantee of revlog. This might

157

are still too large compared to the read garantee of revlog. This might

158

happens when "minimal gap size" interrupted the slicing or when chain are

158

happens when "minimal gap size" interrupted the slicing or when chain are

159

built in a way that create large blocks next to each other.

159

built in a way that create large blocks next to each other.

160

161

>>> data = [

161

>>> data = [

162

... 3, #0 (3)

162

... 3, #0 (3)

163

... 5, #1 (2)

163

... 5, #1 (2)

164

... 6, #2 (1)

164

... 6, #2 (1)

165

... 8, #3 (2)

165

... 8, #3 (2)

166

... 8, #4 (empty)

166

... 8, #4 (empty)

167

... 11, #5 (3)

167

... 11, #5 (3)

168

... 12, #6 (1)

168

... 12, #6 (1)

169

... 13, #7 (1)

169

... 13, #7 (1)

170

... 14, #8 (1)

170

... 14, #8 (1)

171

... ]

171

... ]

172

173

== All snapshots cases ==

173

== All snapshots cases ==

174

>>> revlog = _testrevlog(data, snapshot=range(9))

174

>>> revlog = _testrevlog(data, snapshot=range(9))

175

176

Cases where chunk is already small enough

176

Cases where chunk is already small enough

177

>>> list(_slicechunktosize(revlog, [0], 3))

177

>>> list(_slicechunktosize(revlog, [0], 3))

178

[[0]]

178

[[0]]

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

180

[[6, 7]]

180

[[6, 7]]

181

>>> list(_slicechunktosize(revlog, [0], None))

181

>>> list(_slicechunktosize(revlog, [0], None))

182

[[0]]

182

[[0]]

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

184

[[6, 7]]

184

[[6, 7]]

185

186

cases where we need actual slicing

186

cases where we need actual slicing

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

188

[[0], [1]]

188

[[0], [1]]

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

190

[[1], [3]]

190

[[1], [3]]

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

192

[[1, 2], [3]]

192

[[1, 2], [3]]

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

194

[[3], [5]]

194

[[3], [5]]

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

198

[[5], [6, 7, 8]]

198

[[5], [6, 7, 8]]

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

201

202

Case with too large individual chunk (must return valid chunk)

202

Case with too large individual chunk (must return valid chunk)

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

204

[[0], [1]]

204

[[0], [1]]

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

206

[[1], [3]]

206

[[1], [3]]

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

208

[[3], [5]]

208

[[3], [5]]

209

210

== No Snapshot cases ==

210

== No Snapshot cases ==

211

>>> revlog = _testrevlog(data)

211

>>> revlog = _testrevlog(data)

212

213

Cases where chunk is already small enough

213

Cases where chunk is already small enough

214

>>> list(_slicechunktosize(revlog, [0], 3))

214

>>> list(_slicechunktosize(revlog, [0], 3))

215

[[0]]

215

[[0]]

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

217

[[6, 7]]

217

[[6, 7]]

218

>>> list(_slicechunktosize(revlog, [0], None))

218

>>> list(_slicechunktosize(revlog, [0], None))

219

[[0]]

219

[[0]]

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

221

[[6, 7]]

221

[[6, 7]]

222

223

cases where we need actual slicing

223

cases where we need actual slicing

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

225

[[0], [1]]

225

[[0], [1]]

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

227

[[1], [3]]

227

[[1], [3]]

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

229

[[1], [2, 3]]

229

[[1], [2, 3]]

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

231

[[3], [5]]

231

[[3], [5]]

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

233

[[3], [4, 5]]

233

[[3], [4, 5]]

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

235

[[5], [6, 7, 8]]

235

[[5], [6, 7, 8]]

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

238

239

Case with too large individual chunk (must return valid chunk)

239

Case with too large individual chunk (must return valid chunk)

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

241

[[0], [1]]

241

[[0], [1]]

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

243

[[1], [3]]

243

[[1], [3]]

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

245

[[3], [5]]

245

[[3], [5]]

246

247

== mixed case ==

247

== mixed case ==

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

251

"""

251

"""

252

assert targetsize is None or 0 <= targetsize

252

assert targetsize is None or 0 <= targetsize

253

startdata = revlog.start(revs[0])

253

startdata = revlog.start(revs[0])

254

enddata = revlog.end(revs[-1])

254

enddata = revlog.end(revs[-1])

255

fullspan = enddata - startdata

255

fullspan = enddata - startdata

256

if targetsize is None or fullspan <= targetsize:

256

if targetsize is None or fullspan <= targetsize:

257

yield revs

257

yield revs

258

return

258

return

259

260

startrevidx = 0

260

startrevidx = 0

261

endrevidx = 1

261

endrevidx = 1

262

iterrevs = enumerate(revs)

262

iterrevs = enumerate(revs)

263

next(iterrevs) # skip first rev.

263

next(iterrevs) # skip first rev.

264

# first step: get snapshots out of the way

264

# first step: get snapshots out of the way

265

for idx, r in iterrevs:

265

for idx, r in iterrevs:

266

span = revlog.end(r) - startdata

266

span = revlog.end(r) - startdata

267

snapshot = revlog.issnapshot(r)

267

snapshot = revlog.issnapshot(r)

268

if span <= targetsize and snapshot:

268

if span <= targetsize and snapshot:

269

endrevidx = idx + 1

269

endrevidx = idx + 1

270

else:

270

else:

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

272

if chunk:

272

if chunk:

273

yield chunk

273

yield chunk

274

startrevidx = idx

274

startrevidx = idx

275

startdata = revlog.start(r)

275

startdata = revlog.start(r)

276

endrevidx = idx + 1

276

endrevidx = idx + 1

277

if not snapshot:

277

if not snapshot:

278

break

278

break

279

280

# for the others, we use binary slicing to quickly converge toward valid

280

# for the others, we use binary slicing to quickly converge toward valid

281

# chunks (otherwise, we might end up looking for start/end of many

281

# chunks (otherwise, we might end up looking for start/end of many

282

# revisions). This logic is not looking for the perfect slicing point, it

282

# revisions). This logic is not looking for the perfect slicing point, it

283

# focuses on quickly converging toward valid chunks.

283

# focuses on quickly converging toward valid chunks.

284

nbitem = len(revs)

284

nbitem = len(revs)

285

while (enddata - startdata) > targetsize:

285

while (enddata - startdata) > targetsize:

286

endrevidx = nbitem

286

endrevidx = nbitem

287

if nbitem - startrevidx <= 1:

287

if nbitem - startrevidx <= 1:

288

break # protect against individual chunk larger than limit

288

break # protect against individual chunk larger than limit

289

localenddata = revlog.end(revs[endrevidx - 1])

289

localenddata = revlog.end(revs[endrevidx - 1])

290

span = localenddata - startdata

290

span = localenddata - startdata

291

while span > targetsize:

291

while span > targetsize:

292

if endrevidx - startrevidx <= 1:

292

if endrevidx - startrevidx <= 1:

293

break # protect against individual chunk larger than limit

293

break # protect against individual chunk larger than limit

294

endrevidx -= (endrevidx - startrevidx) // 2

294

endrevidx -= (endrevidx - startrevidx) // 2

295

localenddata = revlog.end(revs[endrevidx - 1])

295

localenddata = revlog.end(revs[endrevidx - 1])

296

span = localenddata - startdata

296

span = localenddata - startdata

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

298

if chunk:

298

if chunk:

299

yield chunk

299

yield chunk

300

startrevidx = endrevidx

300

startrevidx = endrevidx

301

startdata = revlog.start(revs[startrevidx])

301

startdata = revlog.start(revs[startrevidx])

302

303

chunk = _trimchunk(revlog, revs, startrevidx)

303

chunk = _trimchunk(revlog, revs, startrevidx)

304

if chunk:

304

if chunk:

305

yield chunk

305

yield chunk

306

307

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

310

311

``revs`` is sliced into groups that should be read in one time.

311

``revs`` is sliced into groups that should be read in one time.

312

Assume that revs are sorted.

312

Assume that revs are sorted.

313

314

The initial chunk is sliced until the overall density (payload/chunks-span

314

The initial chunk is sliced until the overall density (payload/chunks-span

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

316

skipped.

316

skipped.

317

318

>>> revlog = _testrevlog([

318

>>> revlog = _testrevlog([

319

... 5, #00 (5)

319

... 5, #00 (5)

320

... 10, #01 (5)

320

... 10, #01 (5)

321

... 12, #02 (2)

321

... 12, #02 (2)

322

... 12, #03 (empty)

322

... 12, #03 (empty)

323

... 27, #04 (15)

323

... 27, #04 (15)

324

... 31, #05 (4)

324

... 31, #05 (4)

325

... 31, #06 (empty)

325

... 31, #06 (empty)

326

... 42, #07 (11)

326

... 42, #07 (11)

327

... 47, #08 (5)

327

... 47, #08 (5)

328

... 47, #09 (empty)

328

... 47, #09 (empty)

329

... 48, #10 (1)

329

... 48, #10 (1)

330

... 51, #11 (3)

330

... 51, #11 (3)

331

... 74, #12 (23)

331

... 74, #12 (23)

332

... 85, #13 (11)

332

... 85, #13 (11)

333

... 86, #14 (1)

333

... 86, #14 (1)

334

... 91, #15 (5)

334

... 91, #15 (5)

335

... ])

335

... ])

336

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

340

[[0], [15]]

340

[[0], [15]]

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

342

[[0], [11], [15]]

342

[[0], [11], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

344

[[0], [11, 13, 15]]

344

[[0], [11, 13, 15]]

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

346

[[1, 2], [5, 8, 10, 11], [14]]

346

[[1, 2], [5, 8, 10, 11], [14]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

... mingapsize=20))

348

... mingapsize=20))

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

... targetdensity=0.95))

351

... targetdensity=0.95))

352

[[1, 2], [5], [8, 10, 11], [14]]

352

[[1, 2], [5], [8, 10, 11], [14]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

354

... targetdensity=0.95, mingapsize=12))

354

... targetdensity=0.95, mingapsize=12))

355

[[1, 2], [5, 8, 10, 11], [14]]

355

[[1, 2], [5, 8, 10, 11], [14]]

356

"""

356

"""

357

start = revlog.start

357

start = revlog.start

358

length = revlog.length

358

length = revlog.length

359

360

if len(revs) <= 1:

360

if len(revs) <= 1:

361

yield revs

361

yield revs

362

return

362

return

363

364

deltachainspan = segmentspan(revlog, revs)

364

deltachainspan = segmentspan(revlog, revs)

365

366

if deltachainspan < mingapsize:

366

if deltachainspan < mingapsize:

367

yield revs

367

yield revs

368

return

368

return

369

370

readdata = deltachainspan

370

readdata = deltachainspan

371

chainpayload = sum(length(r) for r in revs)

371

chainpayload = sum(length(r) for r in revs)

372

373

if deltachainspan:

373

if deltachainspan:

374

density = chainpayload / float(deltachainspan)

374

density = chainpayload / float(deltachainspan)

375

else:

375

else:

376

density = 1.0

376

density = 1.0

377

378

if density >= targetdensity:

378

if density >= targetdensity:

379

yield revs

379

yield revs

380

return

380

return

381

382

# Store the gaps in a heap to have them sorted by decreasing size

382

# Store the gaps in a heap to have them sorted by decreasing size

383

gaps = []

383

gaps = []

384

prevend = None

384

prevend = None

385

for i, rev in enumerate(revs):

385

for i, rev in enumerate(revs):

386

revstart = start(rev)

386

revstart = start(rev)

387

revlen = length(rev)

387

revlen = length(rev)

388

389

# Skip empty revisions to form larger holes

389

# Skip empty revisions to form larger holes

390

if revlen == 0:

390

if revlen == 0:

391

continue

391

continue

392

393

if prevend is not None:

393

if prevend is not None:

394

gapsize = revstart - prevend

394

gapsize = revstart - prevend

395

# only consider holes that are large enough

395

# only consider holes that are large enough

396

if gapsize > mingapsize:

396

if gapsize > mingapsize:

397

gaps.append((gapsize, i))

397

gaps.append((gapsize, i))

398

399

prevend = revstart + revlen

399

prevend = revstart + revlen

400

# sort the gaps to pop them from largest to small

400

# sort the gaps to pop them from largest to small

401

gaps.sort()

401

gaps.sort()

402

403

# Collect the indices of the largest holes until the density is acceptable

403

# Collect the indices of the largest holes until the density is acceptable

404

selected = []

404

selected = []

405

while gaps and density < targetdensity:

405

while gaps and density < targetdensity:

406

gapsize, gapidx = gaps.pop()

406

gapsize, gapidx = gaps.pop()

407

408

selected.append(gapidx)

408

selected.append(gapidx)

409

410

# the gap sizes are stored as negatives to be sorted decreasingly

410

# the gap sizes are stored as negatives to be sorted decreasingly

411

# by the heap

411

# by the heap

412

readdata -= gapsize

412

readdata -= gapsize

413

if readdata > 0:

413

if readdata > 0:

414

density = chainpayload / float(readdata)

414

density = chainpayload / float(readdata)

415

else:

415

else:

416

density = 1.0

416

density = 1.0

417

selected.sort()

417

selected.sort()

418

419

# Cut the revs at collected indices

419

# Cut the revs at collected indices

420

previdx = 0

420

previdx = 0

421

for idx in selected:

421

for idx in selected:

422

423

chunk = _trimchunk(revlog, revs, previdx, idx)

423

chunk = _trimchunk(revlog, revs, previdx, idx)

424

if chunk:

424

if chunk:

425

yield chunk

425

yield chunk

426

427

previdx = idx

427

previdx = idx

428

429

chunk = _trimchunk(revlog, revs, previdx)

429

chunk = _trimchunk(revlog, revs, previdx)

430

if chunk:

430

if chunk:

431

yield chunk

431

yield chunk

432

433

434

def _trimchunk(revlog, revs, startidx, endidx=None):

434

def _trimchunk(revlog, revs, startidx, endidx=None):

435

"""returns revs[startidx:endidx] without empty trailing revs

435

"""returns revs[startidx:endidx] without empty trailing revs

436

437

Doctest Setup

437

Doctest Setup

438

>>> revlog = _testrevlog([

438

>>> revlog = _testrevlog([

439

... 5, #0

439

... 5, #0

440

... 10, #1

440

... 10, #1

441

... 12, #2

441

... 12, #2

442

... 12, #3 (empty)

442

... 12, #3 (empty)

443

... 17, #4

443

... 17, #4

444

... 21, #5

444

... 21, #5

445

... 21, #6 (empty)

445

... 21, #6 (empty)

446

... ])

446

... ])

447

448

Contiguous cases:

448

Contiguous cases:

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

450

[0, 1, 2, 3, 4, 5]

450

[0, 1, 2, 3, 4, 5]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

452

[0, 1, 2, 3, 4]

452

[0, 1, 2, 3, 4]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

454

[0, 1, 2]

454

[0, 1, 2]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

456

[2]

456

[2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

458

[3, 4, 5]

458

[3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

460

[3, 4]

460

[3, 4]

461

462

Discontiguous cases:

462

Discontiguous cases:

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

464

[1, 3, 5]

464

[1, 3, 5]

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

466

[1]

466

[1]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

468

[3, 5]

468

[3, 5]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

470

[3, 5]

470

[3, 5]

471

"""

471

"""

472

length = revlog.length

472

length = revlog.length

473

474

if endidx is None:

474

if endidx is None:

475

endidx = len(revs)

475

endidx = len(revs)

476

477

# If we have a non-emtpy delta candidate, there are nothing to trim

477

# If we have a non-emtpy delta candidate, there are nothing to trim

478

if revs[endidx - 1] < len(revlog):

478

if revs[endidx - 1] < len(revlog):

479

# Trim empty revs at the end, except the very first revision of a chain

479

# Trim empty revs at the end, except the very first revision of a chain

480

while (

480

while (

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

482

):

482

):

483

endidx -= 1

483

endidx -= 1

484

485

return revs[startidx:endidx]

485

return revs[startidx:endidx]

486

487

488

def segmentspan(revlog, revs):

488

def segmentspan(revlog, revs):

489

"""Get the byte span of a segment of revisions

489

"""Get the byte span of a segment of revisions

490

491

revs is a sorted array of revision numbers

491

revs is a sorted array of revision numbers

492

493

>>> revlog = _testrevlog([

493

>>> revlog = _testrevlog([

494

... 5, #0

494

... 5, #0

495

... 10, #1

495

... 10, #1

496

... 12, #2

496

... 12, #2

497

... 12, #3 (empty)

497

... 12, #3 (empty)

498

... 17, #4

498

... 17, #4

499

... ])

499

... ])

500

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

502

17

502

17

503

>>> segmentspan(revlog, [0, 4])

503

>>> segmentspan(revlog, [0, 4])

504

17

504

17

505

>>> segmentspan(revlog, [3, 4])

505

>>> segmentspan(revlog, [3, 4])

506

5

506

5

507

>>> segmentspan(revlog, [1, 2, 3,])

507

>>> segmentspan(revlog, [1, 2, 3,])

508

7

508

7

509

>>> segmentspan(revlog, [1, 3])

509

>>> segmentspan(revlog, [1, 3])

510

7

510

7

511

"""

511

"""

512

if not revs:

512

if not revs:

513

return 0

513

return 0

514

end = revlog.end(revs[-1])

514

end = revlog.end(revs[-1])

515

return end - revlog.start(revs[0])

515

return end - revlog.start(revs[0])

516

517

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

519

"""build full text from a (base, delta) pair and other metadata"""

519

"""build full text from a (base, delta) pair and other metadata"""

520

# special case deltas which replace entire base; no need to decode

520

# special case deltas which replace entire base; no need to decode

521

# base revision. this neatly avoids censored bases, which throw when

521

# base revision. this neatly avoids censored bases, which throw when

522

# they're decoded.

522

# they're decoded.

523

hlen = struct.calcsize(b">lll")

523

hlen = struct.calcsize(b">lll")

524

if delta[:hlen] == mdiff.replacediffheader(

524

if delta[:hlen] == mdiff.replacediffheader(

525

revlog.rawsize(baserev), len(delta) - hlen

525

revlog.rawsize(baserev), len(delta) - hlen

526

):

526

):

527

fulltext = delta[hlen:]

527

fulltext = delta[hlen:]

528

else:

528

else:

529

# deltabase is rawtext before changed by flag processors, which is

529

# deltabase is rawtext before changed by flag processors, which is

530

# equivalent to non-raw text

530

# equivalent to non-raw text

531

basetext = revlog.revision(baserev, _df=fh)

531

basetext = revlog.revision(baserev, _df=fh)

532

fulltext = mdiff.patch(basetext, delta)

532

fulltext = mdiff.patch(basetext, delta)

533

534

try:

534

try:

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

536

if validatehash:

536

if validatehash:

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

538

if flags & REVIDX_ISCENSORED:

538

if flags & REVIDX_ISCENSORED:

539

raise error.StorageError(

539

raise error.StorageError(

540

_(b'node %s is not censored') % expectednode

540

_(b'node %s is not censored') % expectednode

541

)

541

)

542

except error.CensoredNodeError:

542

except error.CensoredNodeError:

543

# must pass the censored index flag to add censored revisions

543

# must pass the censored index flag to add censored revisions

544

if not flags & REVIDX_ISCENSORED:

544

if not flags & REVIDX_ISCENSORED:

545

raise

545

raise

546

return fulltext

546

return fulltext

547

548

549

@attr.s(slots=True, frozen=True)

549

@attr.s(slots=True, frozen=True)

550

class _deltainfo:

550

class _deltainfo:

551

distance = attr.ib()

551

distance = attr.ib()

552

deltalen = attr.ib()

552

deltalen = attr.ib()

553

data = attr.ib()

553

data = attr.ib()

554

base = attr.ib()

554

base = attr.ib()

555

chainbase = attr.ib()

555

chainbase = attr.ib()

556

chainlen = attr.ib()

556

chainlen = attr.ib()

557

compresseddeltalen = attr.ib()

557

compresseddeltalen = attr.ib()

558

snapshotdepth = attr.ib()

558

snapshotdepth = attr.ib()

559

560

561

def drop_u_compression(delta):

561

def drop_u_compression(delta):

562

"""turn into a "u" (no-compression) into no-compression without header

562

"""turn into a "u" (no-compression) into no-compression without header

563

564

This is useful for revlog format that has better compression method.

564

This is useful for revlog format that has better compression method.

565

"""

565

"""

566

assert delta.data[0] == b'u', delta.data[0]

566

assert delta.data[0] == b'u', delta.data[0]

567

return _deltainfo(

567

return _deltainfo(

568

delta.distance,

568

delta.distance,

569

delta.deltalen - 1,

569

delta.deltalen - 1,

570

(b'', delta.data[1]),

570

(b'', delta.data[1]),

571

delta.base,

571

delta.base,

572

delta.chainbase,

572

delta.chainbase,

573

delta.chainlen,

573

delta.chainlen,

574

delta.compresseddeltalen,

574

delta.compresseddeltalen,

575

delta.snapshotdepth,

575

delta.snapshotdepth,

576

)

576

)

577

578

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

580

"""Returns True if the given delta is good. Good means that it is within

580

"""Returns True if the given delta is good. Good means that it is within

581

the disk span, disk size, and chain length bounds that we know to be

581

the disk span, disk size, and chain length bounds that we know to be

582

performant."""

582

performant."""

583

if deltainfo is None:

583

if deltainfo is None:

584

return False

584

return False

585

586

# - 'deltainfo.distance' is the distance from the base revision --

586

# - 'deltainfo.distance' is the distance from the base revision --

587

# bounding it limits the amount of I/O we need to do.

587

# bounding it limits the amount of I/O we need to do.

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

589

# deltas we need to apply -- bounding it limits the amount of CPU

589

# deltas we need to apply -- bounding it limits the amount of CPU

590

# we consume.

590

# we consume.

591

592

textlen = revinfo.textlen

592

textlen = revinfo.textlen

593

defaultmax = textlen * 4

593

defaultmax = textlen * 4

594

maxdist = revlog._maxdeltachainspan

594

maxdist = revlog._maxdeltachainspan

595

if not maxdist:

595

if not maxdist:

596

maxdist = deltainfo.distance # ensure the conditional pass

596

maxdist = deltainfo.distance # ensure the conditional pass

597

maxdist = max(maxdist, defaultmax)

597

maxdist = max(maxdist, defaultmax)

598

599

# Bad delta from read span:

599

# Bad delta from read span:

600

#

600

#

601

# If the span of data read is larger than the maximum allowed.

601

# If the span of data read is larger than the maximum allowed.

602

#

602

#

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

604

# to avoid issue related to the span of data. In theory, it would be

604

# to avoid issue related to the span of data. In theory, it would be

605

# possible to build pathological revlog where delta pattern would lead

605

# possible to build pathological revlog where delta pattern would lead

606

# to too many reads. However, they do not happen in practice at all. So

606

# to too many reads. However, they do not happen in practice at all. So

607

# we skip the span check entirely.

607

# we skip the span check entirely.

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

609

return False

609

return False

610

611

# Bad delta from new delta size:

611

# Bad delta from new delta size:

612

#

612

#

613

# If the delta size is larger than the target text, storing the

613

# If the delta size is larger than the target text, storing the

614

# delta will be inefficient.

614

# delta will be inefficient.

615

if textlen < deltainfo.deltalen:

615

if textlen < deltainfo.deltalen:

616

return False

616

return False

617

618

# Bad delta from cumulated payload size:

618

# Bad delta from cumulated payload size:

619

#

619

#

620

# If the sum of delta get larger than K * target text length.

620

# If the sum of delta get larger than K * target text length.

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

622

return False

622

return False

623

624

# Bad delta from chain length:

624

# Bad delta from chain length:

625

#

625

#

626

# If the number of delta in the chain gets too high.

626

# If the number of delta in the chain gets too high.

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

628

return False

628

return False

629

630

# bad delta from intermediate snapshot size limit

630

# bad delta from intermediate snapshot size limit

631

#

631

#

632

# If an intermediate snapshot size is higher than the limit. The

632

# If an intermediate snapshot size is higher than the limit. The

633

# limit exist to prevent endless chain of intermediate delta to be

633

# limit exist to prevent endless chain of intermediate delta to be

634

# created.

634

# created.

635

if (

635

if (

636

deltainfo.snapshotdepth is not None

636

deltainfo.snapshotdepth is not None

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

638

):

638

):

639

return False

639

return False

640

641

# bad delta if new intermediate snapshot is larger than the previous

641

# bad delta if new intermediate snapshot is larger than the previous

642

# snapshot

642

# snapshot

643

if (

643

if (

644

deltainfo.snapshotdepth

644

deltainfo.snapshotdepth

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

646

):

646

):

647

return False

647

return False

648

649

return True

649

return True

650

651

652

# If a revision's full text is that much bigger than a base candidate full

652

# If a revision's full text is that much bigger than a base candidate full

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

654

# consider these candidates.

654

# consider these candidates.

655

LIMIT_BASE2TEXT = 500

655

LIMIT_BASE2TEXT = 500

656

657

658

def _candidategroups(

658

def _candidategroups(

659

revlog,

659

revlog,

660

textlen,

660

textlen,

661

p1,

661

p1,

662

p2,

662

p2,

663

cachedelta,

663

cachedelta,

664

excluded_bases=None,

664

excluded_bases=None,

665

target_rev=None,

665

target_rev=None,

666

):

666

):

667

"""Provides group of revision to be tested as delta base

667

"""Provides group of revision to be tested as delta base

668

669

This top level function focus on emitting groups with unique and worthwhile

669

This top level function focus on emitting groups with unique and worthwhile

670

content. See _raw_candidate_groups for details about the group order.

670

content. See _raw_candidate_groups for details about the group order.

671

"""

671

"""

672

# should we try to build a delta?

672

# should we try to build a delta?

673

if not (len(revlog) and revlog._storedeltachains):

673

if not (len(revlog) and revlog._storedeltachains):

674

yield None

674

yield None

675

return

675

return

676

677

deltalength = revlog.length

677

deltalength = revlog.length

678

deltaparent = revlog.deltaparent

678

deltaparent = revlog.deltaparent

679

sparse = revlog._sparserevlog

679

sparse = revlog._sparserevlog

680

good = None

680

good = None

681

682

deltas_limit = textlen * LIMIT_DELTA2TEXT

682

deltas_limit = textlen * LIMIT_DELTA2TEXT

683

684

tested = {nullrev}

684

tested = {nullrev}

685

candidates = _refinedgroups(

685

candidates = _refinedgroups(

686

revlog,

686

revlog,

687

p1,

687

p1,

688

p2,

688

p2,

689

cachedelta,

689

cachedelta,

690

)

690

)

691

while True:

691

while True:

692

temptative = candidates.send(good)

692

temptative = candidates.send(good)

693

if temptative is None:

693

if temptative is None:

694

break

694

break

695

group = []

695

group = []

696

for rev in temptative:

696

for rev in temptative:

697

# skip over empty delta (no need to include them in a chain)

697

# skip over empty delta (no need to include them in a chain)

698

while revlog._generaldelta and not (

698

while revlog._generaldelta and not (

699

rev == nullrev or rev in tested or deltalength(rev)

699

rev == nullrev or rev in tested or deltalength(rev)

700

):

700

):

701

tested.add(rev)

701

tested.add(rev)

702

rev = deltaparent(rev)

702

rev = deltaparent(rev)

703

# no need to try a delta against nullrev, this will be done as a

703

# no need to try a delta against nullrev, this will be done as a

704

# last resort.

704

# last resort.

705

if rev == nullrev:

705

if rev == nullrev:

706

continue

706

continue

707

# filter out revision we tested already

707

# filter out revision we tested already

708

if rev in tested:

708

if rev in tested:

709

continue

709

continue

710

tested.add(rev)

711

# an higher authority deamed the base unworthy (e.g. censored)

710

# an higher authority deamed the base unworthy (e.g. censored)

712

if excluded_bases is not None and rev in excluded_bases:

711

if excluded_bases is not None and rev in excluded_bases:

712

tested.add(rev)

713

continue

713

continue

714

# We are in some recomputation cases and that rev is too high in

714

# We are in some recomputation cases and that rev is too high in

715

# the revlog

715

# the revlog

716

if target_rev is not None and rev >= target_rev:

716

if target_rev is not None and rev >= target_rev:

717

tested.add(rev)

717

continue

718

continue

718

# filter out delta base that will never produce good delta

719

# filter out delta base that will never produce good delta

719

if deltas_limit < revlog.length(rev):

720

if deltas_limit < revlog.length(rev):

721

tested.add(rev)

720

continue

722

continue

721

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

723

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

724

tested.add(rev)

722

continue

725

continue

723

# no delta for rawtext-changing revs (see "candelta" for why)

726

# no delta for rawtext-changing revs (see "candelta" for why)

724

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

727

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

728

tested.add(rev)

725

continue

729

continue

726

730

727

# If we reach here, we are about to build and test a delta.

731

# If we reach here, we are about to build and test a delta.

728

# The delta building process will compute the chaininfo in all

732

# The delta building process will compute the chaininfo in all

729

# case, since that computation is cached, it is fine to access it

733

# case, since that computation is cached, it is fine to access it

730

# here too.

734

# here too.

731

chainlen, chainsize = revlog._chaininfo(rev)

735

chainlen, chainsize = revlog._chaininfo(rev)

732

# if chain will be too long, skip base

736

# if chain will be too long, skip base

733

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

737

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

738

tested.add(rev)

734

continue

739

continue

735

# if chain already have too much data, skip base

740

# if chain already have too much data, skip base

736

if deltas_limit < chainsize:

741

if deltas_limit < chainsize:

742

tested.add(rev)

737

continue

743

continue

738

if sparse and revlog.upperboundcomp is not None:

744

if sparse and revlog.upperboundcomp is not None:

739

maxcomp = revlog.upperboundcomp

745

maxcomp = revlog.upperboundcomp

740

basenotsnap = (p1, p2, nullrev)

746

basenotsnap = (p1, p2, nullrev)

741

if rev not in basenotsnap and revlog.issnapshot(rev):

747

if rev not in basenotsnap and revlog.issnapshot(rev):

742

snapshotdepth = revlog.snapshotdepth(rev)

748

snapshotdepth = revlog.snapshotdepth(rev)

743

# If text is significantly larger than the base, we can

749

# If text is significantly larger than the base, we can

744

# expect the resulting delta to be proportional to the size

750

# expect the resulting delta to be proportional to the size

745

# difference

751

# difference

746

revsize = revlog.rawsize(rev)

752

revsize = revlog.rawsize(rev)

747

rawsizedistance = max(textlen - revsize, 0)

753

rawsizedistance = max(textlen - revsize, 0)

748

# use an estimate of the compression upper bound.

754

# use an estimate of the compression upper bound.

749

lowestrealisticdeltalen = rawsizedistance // maxcomp

755

lowestrealisticdeltalen = rawsizedistance // maxcomp

750

756

751

# check the absolute constraint on the delta size

757

# check the absolute constraint on the delta size

752

snapshotlimit = textlen >> snapshotdepth

758

snapshotlimit = textlen >> snapshotdepth

753

if snapshotlimit < lowestrealisticdeltalen:

759

if snapshotlimit < lowestrealisticdeltalen:

754

# delta lower bound is larger than accepted upper bound

760

# delta lower bound is larger than accepted upper bound

761

tested.add(rev)

755

continue

762

continue

756

763

757

# check the relative constraint on the delta size

764

# check the relative constraint on the delta size

758

revlength = revlog.length(rev)

765

revlength = revlog.length(rev)

759

if revlength < lowestrealisticdeltalen:

766

if revlength < lowestrealisticdeltalen:

760

# delta probable lower bound is larger than target base

767

# delta probable lower bound is larger than target base

768

tested.add(rev)

761

continue

769

continue

762

770

763

group.append(rev)

771

group.append(rev)

764

if group:

772

if group:

765

# XXX: in the sparse revlog case, group can become large,

773

# XXX: in the sparse revlog case, group can become large,

766

# impacting performances. Some bounding or slicing mecanism

774

# impacting performances. Some bounding or slicing mecanism

767

# would help to reduce this impact.

775

# would help to reduce this impact.

776

tested.update(group)

768

good = yield tuple(group)

777

good = yield tuple(group)

769

yield None

778

yield None

770

779

771

780

772

def _findsnapshots(revlog, cache, start_rev):

781

def _findsnapshots(revlog, cache, start_rev):

773

"""find snapshot from start_rev to tip"""

782

"""find snapshot from start_rev to tip"""

774

if util.safehasattr(revlog.index, b'findsnapshots'):

783

if util.safehasattr(revlog.index, b'findsnapshots'):

775

revlog.index.findsnapshots(cache, start_rev)

784

revlog.index.findsnapshots(cache, start_rev)

776

else:

785

else:

777

deltaparent = revlog.deltaparent

786

deltaparent = revlog.deltaparent

778

issnapshot = revlog.issnapshot

787

issnapshot = revlog.issnapshot

779

for rev in revlog.revs(start_rev):

788

for rev in revlog.revs(start_rev):

780

if issnapshot(rev):

789

if issnapshot(rev):

781

cache[deltaparent(rev)].append(rev)

790

cache[deltaparent(rev)].append(rev)

782

791

783

792

784

def _refinedgroups(revlog, p1, p2, cachedelta):

793

def _refinedgroups(revlog, p1, p2, cachedelta):

785

good = None

794

good = None

786

# First we try to reuse a the delta contained in the bundle.

795

# First we try to reuse a the delta contained in the bundle.

787

# (or from the source revlog)

796

# (or from the source revlog)

788

#

797

#

789

# This logic only applies to general delta repositories and can be disabled

798

# This logic only applies to general delta repositories and can be disabled

790

# through configuration. Disabling reuse source delta is useful when

799

# through configuration. Disabling reuse source delta is useful when

791

# we want to make sure we recomputed "optimal" deltas.

800

# we want to make sure we recomputed "optimal" deltas.

792

debug_info = None

801

debug_info = None

793

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

802

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

794

# Assume what we received from the server is a good choice

803

# Assume what we received from the server is a good choice

795

# build delta will reuse the cache

804

# build delta will reuse the cache

796

if debug_info is not None:

805

if debug_info is not None:

797

debug_info['cached-delta.tested'] += 1

806

debug_info['cached-delta.tested'] += 1

798

good = yield (cachedelta[0],)

807

good = yield (cachedelta[0],)

799

if good is not None:

808

if good is not None:

800

if debug_info is not None:

809

if debug_info is not None:

801

debug_info['cached-delta.accepted'] += 1

810

debug_info['cached-delta.accepted'] += 1

802

yield None

811

yield None

803

return

812

return

804

# XXX cache me higher

813

# XXX cache me higher

805

snapshots = collections.defaultdict(list)

814

snapshots = collections.defaultdict(list)

806

groups = _rawgroups(

815

groups = _rawgroups(

807

revlog,

816

revlog,

808

p1,

817

p1,

809

p2,

818

p2,

810

cachedelta,

819

cachedelta,

811

snapshots,

820

snapshots,

812

)

821

)

813

for candidates in groups:

822

for candidates in groups:

814

good = yield candidates

823

good = yield candidates

815

if good is not None:

824

if good is not None:

816

break

825

break

817

826

818

# If sparse revlog is enabled, we can try to refine the available deltas

827

# If sparse revlog is enabled, we can try to refine the available deltas

819

if not revlog._sparserevlog:

828

if not revlog._sparserevlog:

820

yield None

829

yield None

821

return

830

return

822

831

823

# if we have a refinable value, try to refine it

832

# if we have a refinable value, try to refine it

824

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

833

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

825

# refine snapshot down

834

# refine snapshot down

826

previous = None

835

previous = None

827

while previous != good:

836

while previous != good:

828

previous = good

837

previous = good

829

base = revlog.deltaparent(good)

838

base = revlog.deltaparent(good)

830

if base == nullrev:

839

if base == nullrev:

831

break

840

break

832

good = yield (base,)

841

good = yield (base,)

833

# refine snapshot up

842

# refine snapshot up

834

if not snapshots:

843

if not snapshots:

835

_findsnapshots(revlog, snapshots, good + 1)

844

_findsnapshots(revlog, snapshots, good + 1)

836

previous = None

845

previous = None

837

while good != previous:

846

while good != previous:

838

previous = good

847

previous = good

839

children = tuple(sorted(c for c in snapshots[good]))

848

children = tuple(sorted(c for c in snapshots[good]))

840

good = yield children

849

good = yield children

841

850

842

if debug_info is not None:

851

if debug_info is not None:

843

if good is None:

852

if good is None:

844

debug_info['no-solution'] += 1

853

debug_info['no-solution'] += 1

845

854

846

yield None

855

yield None

847

856

848

857

849

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

858

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

850

"""Provides group of revision to be tested as delta base

859

"""Provides group of revision to be tested as delta base

851

860

852

This lower level function focus on emitting delta theorically interresting

861

This lower level function focus on emitting delta theorically interresting

853

without looking it any practical details.

862

without looking it any practical details.

854

863

855

The group order aims at providing fast or small candidates first.

864

The group order aims at providing fast or small candidates first.

856

"""

865

"""

857

gdelta = revlog._generaldelta

866

gdelta = revlog._generaldelta

858

# gate sparse behind general-delta because of issue6056

867

# gate sparse behind general-delta because of issue6056

859

sparse = gdelta and revlog._sparserevlog

868

sparse = gdelta and revlog._sparserevlog

860

curr = len(revlog)

869

curr = len(revlog)

861

prev = curr - 1

870

prev = curr - 1

862

deltachain = lambda rev: revlog._deltachain(rev)[0]

871

deltachain = lambda rev: revlog._deltachain(rev)[0]

863

872

864

if gdelta:

873

if gdelta:

865

# exclude already lazy tested base if any

874

# exclude already lazy tested base if any

866

parents = [p for p in (p1, p2) if p != nullrev]

875

parents = [p for p in (p1, p2) if p != nullrev]

867

876

868

if not revlog._deltabothparents and len(parents) == 2:

877

if not revlog._deltabothparents and len(parents) == 2:

869

parents.sort()

878

parents.sort()

870

# To minimize the chance of having to build a fulltext,

879

# To minimize the chance of having to build a fulltext,

871

# pick first whichever parent is closest to us (max rev)

880

# pick first whichever parent is closest to us (max rev)

872

yield (parents[1],)

881

yield (parents[1],)

873

# then the other one (min rev) if the first did not fit

882

# then the other one (min rev) if the first did not fit

874

yield (parents[0],)

883

yield (parents[0],)

875

elif len(parents) > 0:

884

elif len(parents) > 0:

876

# Test all parents (1 or 2), and keep the best candidate

885

# Test all parents (1 or 2), and keep the best candidate

877

yield parents

886

yield parents

878

887

879

if sparse and parents:

888

if sparse and parents:

880

if snapshots is None:

889

if snapshots is None:

881

# map: base-rev: [snapshot-revs]

890

# map: base-rev: [snapshot-revs]

882

snapshots = collections.defaultdict(list)

891

snapshots = collections.defaultdict(list)

883

# See if we can use an existing snapshot in the parent chains to use as

892

# See if we can use an existing snapshot in the parent chains to use as

884

# a base for a new intermediate-snapshot

893

# a base for a new intermediate-snapshot

885

#

894

#

886

# search for snapshot in parents delta chain

895

# search for snapshot in parents delta chain

887

# map: snapshot-level: snapshot-rev

896

# map: snapshot-level: snapshot-rev

888

parents_snaps = collections.defaultdict(set)

897

parents_snaps = collections.defaultdict(set)

889

candidate_chains = [deltachain(p) for p in parents]

898

candidate_chains = [deltachain(p) for p in parents]

890

for chain in candidate_chains:

899

for chain in candidate_chains:

891

for idx, s in enumerate(chain):

900

for idx, s in enumerate(chain):

892

if not revlog.issnapshot(s):

901

if not revlog.issnapshot(s):

893

break

902

break

894

parents_snaps[idx].add(s)

903

parents_snaps[idx].add(s)

895

snapfloor = min(parents_snaps[0]) + 1

904

snapfloor = min(parents_snaps[0]) + 1

896

_findsnapshots(revlog, snapshots, snapfloor)

905

_findsnapshots(revlog, snapshots, snapfloor)

897

# search for the highest "unrelated" revision

906

# search for the highest "unrelated" revision

898

#

907

#

899

# Adding snapshots used by "unrelated" revision increase the odd we

908

# Adding snapshots used by "unrelated" revision increase the odd we

900

# reuse an independant, yet better snapshot chain.

909

# reuse an independant, yet better snapshot chain.

901

#

910

#

902

# XXX instead of building a set of revisions, we could lazily enumerate

911

# XXX instead of building a set of revisions, we could lazily enumerate

903

# over the chains. That would be more efficient, however we stick to

912

# over the chains. That would be more efficient, however we stick to

904

# simple code for now.

913

# simple code for now.

905

all_revs = set()

914

all_revs = set()

906

for chain in candidate_chains:

915

for chain in candidate_chains:

907

all_revs.update(chain)

916

all_revs.update(chain)

908

other = None

917

other = None

909

for r in revlog.revs(prev, snapfloor):

918

for r in revlog.revs(prev, snapfloor):

910

if r not in all_revs:

919

if r not in all_revs:

911

other = r

920

other = r

912

break

921

break

913

if other is not None:

922

if other is not None:

914

# To avoid unfair competition, we won't use unrelated intermediate

923

# To avoid unfair competition, we won't use unrelated intermediate

915

# snapshot that are deeper than the ones from the parent delta

924

# snapshot that are deeper than the ones from the parent delta

916

# chain.

925

# chain.

917

max_depth = max(parents_snaps.keys())

926

max_depth = max(parents_snaps.keys())

918

chain = deltachain(other)

927

chain = deltachain(other)

919

for depth, s in enumerate(chain):

928

for depth, s in enumerate(chain):

920

if s < snapfloor:

929

if s < snapfloor:

921

continue

930

continue

922

if max_depth < depth:

931

if max_depth < depth:

923

break

932

break

924

if not revlog.issnapshot(s):

933

if not revlog.issnapshot(s):

925

break

934

break

926

parents_snaps[depth].add(s)

935

parents_snaps[depth].add(s)

927

# Test them as possible intermediate snapshot base

936

# Test them as possible intermediate snapshot base

928

# We test them from highest to lowest level. High level one are more

937

# We test them from highest to lowest level. High level one are more

929

# likely to result in small delta

938

# likely to result in small delta

930

floor = None

939

floor = None

931

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

940

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

932

siblings = set()

941

siblings = set()

933

for s in snaps:

942

for s in snaps:

934

siblings.update(snapshots[s])

943

siblings.update(snapshots[s])

935

# Before considering making a new intermediate snapshot, we check

944

# Before considering making a new intermediate snapshot, we check

936

# if an existing snapshot, children of base we consider, would be

945

# if an existing snapshot, children of base we consider, would be

937

# suitable.

946

# suitable.

938

#

947

#

939

# It give a change to reuse a delta chain "unrelated" to the

948

# It give a change to reuse a delta chain "unrelated" to the

940

# current revision instead of starting our own. Without such

949

# current revision instead of starting our own. Without such

941

# re-use, topological branches would keep reopening new chains.

950

# re-use, topological branches would keep reopening new chains.

942

# Creating more and more snapshot as the repository grow.

951

# Creating more and more snapshot as the repository grow.

943

952

944

if floor is not None:

953

if floor is not None:

945

# We only do this for siblings created after the one in our

954

# We only do this for siblings created after the one in our

946

# parent's delta chain. Those created before has less chances

955

# parent's delta chain. Those created before has less chances

947

# to be valid base since our ancestors had to create a new

956

# to be valid base since our ancestors had to create a new

948

# snapshot.

957

# snapshot.

949

siblings = [r for r in siblings if floor < r]

958

siblings = [r for r in siblings if floor < r]

950

yield tuple(sorted(siblings))

959

yield tuple(sorted(siblings))

951

# then test the base from our parent's delta chain.

960

# then test the base from our parent's delta chain.

952

yield tuple(sorted(snaps))

961

yield tuple(sorted(snaps))

953

floor = min(snaps)

962

floor = min(snaps)

954

# No suitable base found in the parent chain, search if any full

963

# No suitable base found in the parent chain, search if any full

955

# snapshots emitted since parent's base would be a suitable base for an

964

# snapshots emitted since parent's base would be a suitable base for an

956

# intermediate snapshot.

965

# intermediate snapshot.

957

#

966

#

958

# It give a chance to reuse a delta chain unrelated to the current

967

# It give a chance to reuse a delta chain unrelated to the current

959

# revisions instead of starting our own. Without such re-use,

968

# revisions instead of starting our own. Without such re-use,

960

# topological branches would keep reopening new full chains. Creating

969

# topological branches would keep reopening new full chains. Creating

961

# more and more snapshot as the repository grow.

970

# more and more snapshot as the repository grow.

962

yield tuple(snapshots[nullrev])

971

yield tuple(snapshots[nullrev])

963

972

964

if not sparse:

973

if not sparse:

965

# other approach failed try against prev to hopefully save us a

974

# other approach failed try against prev to hopefully save us a

966

# fulltext.

975

# fulltext.

967

yield (prev,)

976

yield (prev,)

968

977

969

978

970

class deltacomputer:

979

class deltacomputer:

971

def __init__(

980

def __init__(

972

self,

981

self,

973

revlog,

982

revlog,

974

write_debug=None,

983

write_debug=None,

975

debug_search=False,

984

debug_search=False,

976

debug_info=None,

985

debug_info=None,

977

):

986

):

978

self.revlog = revlog

987

self.revlog = revlog

979

self._write_debug = write_debug

988

self._write_debug = write_debug

980

self._debug_search = debug_search

989

self._debug_search = debug_search

981

self._debug_info = debug_info

990

self._debug_info = debug_info

982

991

983

def buildtext(self, revinfo, fh):

992

def buildtext(self, revinfo, fh):

984

"""Builds a fulltext version of a revision

993

"""Builds a fulltext version of a revision

985

994

986

revinfo: revisioninfo instance that contains all needed info

995

revinfo: revisioninfo instance that contains all needed info

987

fh: file handle to either the .i or the .d revlog file,

996

fh: file handle to either the .i or the .d revlog file,

988

depending on whether it is inlined or not

997

depending on whether it is inlined or not

989

"""

998

"""

990

btext = revinfo.btext

999

btext = revinfo.btext

991

if btext[0] is not None:

1000

if btext[0] is not None:

992

return btext[0]

1001

return btext[0]

993

1002

994

revlog = self.revlog

1003

revlog = self.revlog

995

cachedelta = revinfo.cachedelta

1004

cachedelta = revinfo.cachedelta

996

baserev = cachedelta[0]

1005

baserev = cachedelta[0]

997

delta = cachedelta[1]

1006

delta = cachedelta[1]

998

1007

999

fulltext = btext[0] = _textfromdelta(

1008

fulltext = btext[0] = _textfromdelta(

1000

fh,

1009

fh,

1001

revlog,

1010

revlog,

1002

baserev,

1011

baserev,

1003

delta,

1012

delta,

1004

revinfo.p1,

1013

revinfo.p1,

1005

revinfo.p2,

1014

revinfo.p2,

1006

revinfo.flags,

1015

revinfo.flags,

1007

revinfo.node,

1016

revinfo.node,

1008

)

1017

)

1009

return fulltext

1018

return fulltext

1010

1019

1011

def _builddeltadiff(self, base, revinfo, fh):

1020

def _builddeltadiff(self, base, revinfo, fh):

1012

revlog = self.revlog

1021

revlog = self.revlog

1013

t = self.buildtext(revinfo, fh)

1022

t = self.buildtext(revinfo, fh)

1014

if revlog.iscensored(base):

1023

if revlog.iscensored(base):

1015

# deltas based on a censored revision must replace the

1024

# deltas based on a censored revision must replace the

1016

# full content in one patch, so delta works everywhere

1025

# full content in one patch, so delta works everywhere

1017

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1026

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1018

delta = header + t

1027

delta = header + t

1019

else:

1028

else:

1020

ptext = revlog.rawdata(base, _df=fh)

1029

ptext = revlog.rawdata(base, _df=fh)

1021

delta = mdiff.textdiff(ptext, t)

1030

delta = mdiff.textdiff(ptext, t)

1022

1031

1023

return delta

1032

return delta

1024

1033

1025

def _builddeltainfo(self, revinfo, base, fh):

1034

def _builddeltainfo(self, revinfo, base, fh):

1026

# can we use the cached delta?

1035

# can we use the cached delta?

1027

revlog = self.revlog

1036

revlog = self.revlog

1028

debug_search = self._write_debug is not None and self._debug_search

1037

debug_search = self._write_debug is not None and self._debug_search

1029

chainbase = revlog.chainbase(base)

1038

chainbase = revlog.chainbase(base)

1030

if revlog._generaldelta:

1039

if revlog._generaldelta:

1031

deltabase = base

1040

deltabase = base

1032

else:

1041

else:

1033

deltabase = chainbase

1042

deltabase = chainbase

1034

snapshotdepth = None

1043

snapshotdepth = None

1035

if revlog._sparserevlog and deltabase == nullrev:

1044

if revlog._sparserevlog and deltabase == nullrev:

1036

snapshotdepth = 0

1045

snapshotdepth = 0

1037

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1046

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1038

# A delta chain should always be one full snapshot,

1047

# A delta chain should always be one full snapshot,

1039

# zero or more semi-snapshots, and zero or more deltas

1048

# zero or more semi-snapshots, and zero or more deltas

1040

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1049

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1041

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1050

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1042

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1051

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1043

delta = None

1052

delta = None

1044

if revinfo.cachedelta:

1053

if revinfo.cachedelta:

1045

cachebase, cachediff = revinfo.cachedelta

1054

cachebase, cachediff = revinfo.cachedelta

1046

# check if the diff still apply

1055

# check if the diff still apply

1047

currentbase = cachebase

1056

currentbase = cachebase

1048

while (

1057

while (

1049

currentbase != nullrev

1058

currentbase != nullrev

1050

and currentbase != base

1059

and currentbase != base

1051

and self.revlog.length(currentbase) == 0

1060

and self.revlog.length(currentbase) == 0

1052

):

1061

):

1053

currentbase = self.revlog.deltaparent(currentbase)

1062

currentbase = self.revlog.deltaparent(currentbase)

1054

if self.revlog._lazydelta and currentbase == base:

1063

if self.revlog._lazydelta and currentbase == base:

1055

delta = revinfo.cachedelta[1]

1064

delta = revinfo.cachedelta[1]

1056

if delta is None:

1065

if delta is None:

1057

delta = self._builddeltadiff(base, revinfo, fh)

1066

delta = self._builddeltadiff(base, revinfo, fh)

1058

if debug_search:

1067

if debug_search:

1059

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1068

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1060

msg %= len(delta)

1069

msg %= len(delta)

1061

self._write_debug(msg)

1070

self._write_debug(msg)

1062

# snapshotdept need to be neither None nor 0 level snapshot

1071

# snapshotdept need to be neither None nor 0 level snapshot

1063

if revlog.upperboundcomp is not None and snapshotdepth:

1072

if revlog.upperboundcomp is not None and snapshotdepth:

1064

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1073

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1065

snapshotlimit = revinfo.textlen >> snapshotdepth

1074

snapshotlimit = revinfo.textlen >> snapshotdepth

1066

if debug_search:

1075

if debug_search:

1067

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1076

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1068

msg %= lowestrealisticdeltalen

1077

msg %= lowestrealisticdeltalen

1069

self._write_debug(msg)

1078

self._write_debug(msg)

1070

if snapshotlimit < lowestrealisticdeltalen:

1079

if snapshotlimit < lowestrealisticdeltalen:

1071

if debug_search:

1080

if debug_search:

1072

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1081

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1073

self._write_debug(msg)

1082

self._write_debug(msg)

1074

return None

1083

return None

1075

if revlog.length(base) < lowestrealisticdeltalen:

1084

if revlog.length(base) < lowestrealisticdeltalen:

1076

if debug_search:

1085

if debug_search:

1077

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1086

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1078

self._write_debug(msg)

1087

self._write_debug(msg)

1079

return None

1088

return None

1080

header, data = revlog.compress(delta)

1089

header, data = revlog.compress(delta)

1081

deltalen = len(header) + len(data)

1090

deltalen = len(header) + len(data)

1082

offset = revlog.end(len(revlog) - 1)

1091

offset = revlog.end(len(revlog) - 1)

1083

dist = deltalen + offset - revlog.start(chainbase)

1092

dist = deltalen + offset - revlog.start(chainbase)

1084

chainlen, compresseddeltalen = revlog._chaininfo(base)

1093

chainlen, compresseddeltalen = revlog._chaininfo(base)

1085

chainlen += 1

1094

chainlen += 1

1086

compresseddeltalen += deltalen

1095

compresseddeltalen += deltalen

1087

1096

1088

return _deltainfo(

1097

return _deltainfo(

1089

dist,

1098

dist,

1090

deltalen,

1099

deltalen,

1091

(header, data),

1100

(header, data),

1092

deltabase,

1101

deltabase,

1093

chainbase,

1102

chainbase,

1094

chainlen,

1103

chainlen,

1095

compresseddeltalen,

1104

compresseddeltalen,

1096

snapshotdepth,

1105

snapshotdepth,

1097

)

1106

)

1098

1107

1099

def _fullsnapshotinfo(self, fh, revinfo, curr):

1108

def _fullsnapshotinfo(self, fh, revinfo, curr):

1100

rawtext = self.buildtext(revinfo, fh)

1109

rawtext = self.buildtext(revinfo, fh)

1101

data = self.revlog.compress(rawtext)

1110

data = self.revlog.compress(rawtext)

1102

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1111

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1103

deltabase = chainbase = curr

1112

deltabase = chainbase = curr

1104

snapshotdepth = 0

1113

snapshotdepth = 0

1105

chainlen = 1

1114

chainlen = 1

1106

1115

1107

return _deltainfo(

1116

return _deltainfo(

1108

dist,

1117

dist,

1109

deltalen,

1118

deltalen,

1110

data,

1119

data,

1111

deltabase,

1120

deltabase,

1112

chainbase,

1121

chainbase,

1113

chainlen,

1122

chainlen,

1114

compresseddeltalen,

1123

compresseddeltalen,

1115

snapshotdepth,

1124

snapshotdepth,

1116

)

1125

)

1117

1126

1118

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1127

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1119

"""Find an acceptable delta against a candidate revision

1128

"""Find an acceptable delta against a candidate revision

1120

1129

1121

revinfo: information about the revision (instance of _revisioninfo)

1130

revinfo: information about the revision (instance of _revisioninfo)

1122

fh: file handle to either the .i or the .d revlog file,

1131

fh: file handle to either the .i or the .d revlog file,

1123

depending on whether it is inlined or not

1132

depending on whether it is inlined or not

1124

1133

1125

Returns the first acceptable candidate revision, as ordered by

1134

Returns the first acceptable candidate revision, as ordered by

1126

_candidategroups

1135

_candidategroups

1127

1136

1128

If no suitable deltabase is found, we return delta info for a full

1137

If no suitable deltabase is found, we return delta info for a full

1129

snapshot.

1138

snapshot.

1130

1139

1131

`excluded_bases` is an optional set of revision that cannot be used as

1140

`excluded_bases` is an optional set of revision that cannot be used as

1132

a delta base. Use this to recompute delta suitable in censor or strip

1141

a delta base. Use this to recompute delta suitable in censor or strip

1133

context.

1142

context.

1134

"""

1143

"""

1135

if target_rev is None:

1144

if target_rev is None:

1136

target_rev = len(self.revlog)

1145

target_rev = len(self.revlog)

1137

1146

1138

if not revinfo.textlen:

1147

if not revinfo.textlen:

1139

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1148

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1140

1149

1141

if excluded_bases is None:

1150

if excluded_bases is None:

1142

excluded_bases = set()

1151

excluded_bases = set()

1143

1152

1144

# no delta for flag processor revision (see "candelta" for why)

1153

# no delta for flag processor revision (see "candelta" for why)

1145

# not calling candelta since only one revision needs test, also to

1154

# not calling candelta since only one revision needs test, also to

1146

# avoid overhead fetching flags again.

1155

# avoid overhead fetching flags again.

1147

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1156

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1148

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1157

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1149

1158

1150

gather_debug = (

1159

gather_debug = (

1151

self._write_debug is not None or self._debug_info is not None

1160

self._write_debug is not None or self._debug_info is not None

1152

)

1161

)

1153

debug_search = self._write_debug is not None and self._debug_search

1162

debug_search = self._write_debug is not None and self._debug_search

1154

1163

1155

if gather_debug:

1164

if gather_debug:

1156

start = util.timer()

1165

start = util.timer()

1157

1166

1158

# count the number of different delta we tried (for debug purpose)

1167

# count the number of different delta we tried (for debug purpose)

1159

dbg_try_count = 0

1168

dbg_try_count = 0

1160

# count the number of "search round" we did. (for debug purpose)

1169

# count the number of "search round" we did. (for debug purpose)

1161

dbg_try_rounds = 0

1170

dbg_try_rounds = 0

1162

dbg_type = b'unknown'

1171

dbg_type = b'unknown'

1163

1172

1164

cachedelta = revinfo.cachedelta

1173

cachedelta = revinfo.cachedelta

1165

p1 = revinfo.p1

1174

p1 = revinfo.p1

1166

p2 = revinfo.p2

1175

p2 = revinfo.p2

1167

revlog = self.revlog

1176

revlog = self.revlog

1168

1177

1169

deltainfo = None

1178

deltainfo = None

1170

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1179

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1171

1180

1172

if gather_debug:

1181

if gather_debug:

1173

if p1r != nullrev:

1182

if p1r != nullrev:

1174

p1_chain_len = revlog._chaininfo(p1r)[0]

1183

p1_chain_len = revlog._chaininfo(p1r)[0]

1175

else:

1184

else:

1176

p1_chain_len = -1

1185

p1_chain_len = -1

1177

if p2r != nullrev:

1186

if p2r != nullrev:

1178

p2_chain_len = revlog._chaininfo(p2r)[0]

1187

p2_chain_len = revlog._chaininfo(p2r)[0]

1179

else:

1188

else:

1180

p2_chain_len = -1

1189

p2_chain_len = -1

1181

if debug_search:

1190

if debug_search:

1182

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1191

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1183

msg %= target_rev

1192

msg %= target_rev

1184

self._write_debug(msg)

1193

self._write_debug(msg)

1185

1194

1186

groups = _candidategroups(

1195

groups = _candidategroups(

1187

self.revlog,

1196

self.revlog,

1188

revinfo.textlen,

1197

revinfo.textlen,

1189

p1r,

1198

p1r,

1190

p2r,

1199

p2r,

1191

cachedelta,

1200

cachedelta,

1192

excluded_bases,

1201

excluded_bases,

1193

target_rev,

1202

target_rev,

1194

)

1203

)

1195

candidaterevs = next(groups)

1204

candidaterevs = next(groups)

1196

while candidaterevs is not None:

1205

while candidaterevs is not None:

1197

dbg_try_rounds += 1

1206

dbg_try_rounds += 1

1198

if debug_search:

1207

if debug_search:

1199

prev = None

1208

prev = None

1200

if deltainfo is not None:

1209

if deltainfo is not None:

1201

prev = deltainfo.base

1210

prev = deltainfo.base

1202

1211

1203

if (

1212

if (

1204

cachedelta is not None

1213

cachedelta is not None

1205

and len(candidaterevs) == 1

1214

and len(candidaterevs) == 1

1206

and cachedelta[0] in candidaterevs

1215

and cachedelta[0] in candidaterevs

1207

):

1216

):

1208

round_type = b"cached-delta"

1217

round_type = b"cached-delta"

1209

elif p1 in candidaterevs or p2 in candidaterevs:

1218

elif p1 in candidaterevs or p2 in candidaterevs:

1210

round_type = b"parents"

1219

round_type = b"parents"

1211

elif prev is not None and all(c < prev for c in candidaterevs):

1220

elif prev is not None and all(c < prev for c in candidaterevs):

1212

round_type = b"refine-down"

1221

round_type = b"refine-down"

1213

elif prev is not None and all(c > prev for c in candidaterevs):

1222

elif prev is not None and all(c > prev for c in candidaterevs):

1214

round_type = b"refine-up"

1223

round_type = b"refine-up"

1215

else:

1224

else:

1216

round_type = b"search-down"

1225

round_type = b"search-down"

1217

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1226

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1218

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1227

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1219

self._write_debug(msg)

1228

self._write_debug(msg)

1220

nominateddeltas = []

1229

nominateddeltas = []

1221

if deltainfo is not None:

1230

if deltainfo is not None:

1222

if debug_search:

1231

if debug_search:

1223

msg = (

1232

msg = (

1224

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1233

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1225

)

1234

)

1226

msg %= (deltainfo.base, deltainfo.deltalen)

1235

msg %= (deltainfo.base, deltainfo.deltalen)

1227

self._write_debug(msg)

1236

self._write_debug(msg)

1228

# if we already found a good delta,

1237

# if we already found a good delta,

1229

# challenge it against refined candidates

1238

# challenge it against refined candidates

1230

nominateddeltas.append(deltainfo)

1239

nominateddeltas.append(deltainfo)

1231

for candidaterev in candidaterevs:

1240

for candidaterev in candidaterevs:

1232

if debug_search:

1241

if debug_search:

1233

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1242

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1234

msg %= candidaterev

1243

msg %= candidaterev

1235

self._write_debug(msg)

1244

self._write_debug(msg)

1236

candidate_type = None

1245

candidate_type = None

1237

if candidaterev == p1:

1246

if candidaterev == p1:

1238

candidate_type = b"p1"

1247

candidate_type = b"p1"

1239

elif candidaterev == p2:

1248

elif candidaterev == p2:

1240

candidate_type = b"p2"

1249

candidate_type = b"p2"

1241

elif self.revlog.issnapshot(candidaterev):

1250

elif self.revlog.issnapshot(candidaterev):

1242

candidate_type = b"snapshot-%d"

1251

candidate_type = b"snapshot-%d"

1243

candidate_type %= self.revlog.snapshotdepth(

1252

candidate_type %= self.revlog.snapshotdepth(

1244

candidaterev

1253

candidaterev

1245

)

1254

)

1246

1255

1247

if candidate_type is not None:

1256

if candidate_type is not None:

1248

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1257

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1249

msg %= candidate_type

1258

msg %= candidate_type

1250

self._write_debug(msg)

1259

self._write_debug(msg)

1251

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1260

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1252

msg %= self.revlog.length(candidaterev)

1261

msg %= self.revlog.length(candidaterev)

1253

self._write_debug(msg)

1262

self._write_debug(msg)

1254

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1263

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1255

msg %= self.revlog.deltaparent(candidaterev)

1264

msg %= self.revlog.deltaparent(candidaterev)

1256

self._write_debug(msg)

1265

self._write_debug(msg)

1257

1266

1258

dbg_try_count += 1

1267

dbg_try_count += 1

1259

1268

1260

if debug_search:

1269

if debug_search:

1261

delta_start = util.timer()

1270

delta_start = util.timer()

1262

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1271

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1263

if debug_search:

1272

if debug_search:

1264

delta_end = util.timer()

1273

delta_end = util.timer()

1265

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1274

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1266

msg %= delta_end - delta_start

1275

msg %= delta_end - delta_start

1267

self._write_debug(msg)

1276

self._write_debug(msg)

1268

if candidatedelta is not None:

1277

if candidatedelta is not None:

1269

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1278

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1270

if debug_search:

1279

if debug_search:

1271

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1280

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1272

msg %= candidatedelta.deltalen

1281

msg %= candidatedelta.deltalen

1273

self._write_debug(msg)

1282

self._write_debug(msg)

1274

nominateddeltas.append(candidatedelta)

1283

nominateddeltas.append(candidatedelta)

1275

elif debug_search:

1284

elif debug_search:

1276

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1285

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1277

msg %= candidatedelta.deltalen

1286

msg %= candidatedelta.deltalen

1278

self._write_debug(msg)

1287

self._write_debug(msg)

1279

elif debug_search:

1288

elif debug_search:

1280

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1289

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1281

self._write_debug(msg)

1290

self._write_debug(msg)

1282

if nominateddeltas:

1291

if nominateddeltas:

1283

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1292

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1284

if deltainfo is not None:

1293

if deltainfo is not None:

1285

candidaterevs = groups.send(deltainfo.base)

1294

candidaterevs = groups.send(deltainfo.base)

1286

else:

1295

else:

1287

candidaterevs = next(groups)

1296

candidaterevs = next(groups)

1288

1297

1289

if deltainfo is None:

1298

if deltainfo is None:

1290

dbg_type = b"full"

1299

dbg_type = b"full"

1291

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1300

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1292

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1301

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1293

dbg_type = b"snapshot"

1302

dbg_type = b"snapshot"

1294

else:

1303

else:

1295

dbg_type = b"delta"

1304

dbg_type = b"delta"

1296

1305

1297

if gather_debug:

1306

if gather_debug:

1298

end = util.timer()

1307

end = util.timer()

1299

used_cached = (

1308

used_cached = (

1300

cachedelta is not None

1309

cachedelta is not None

1301

and dbg_try_rounds == 1

1310

and dbg_try_rounds == 1

1302

and dbg_try_count == 1

1311

and dbg_try_count == 1

1303

and deltainfo.base == cachedelta[0]

1312

and deltainfo.base == cachedelta[0]

1304

)

1313

)

1305

dbg = {

1314

dbg = {

1306

'duration': end - start,

1315

'duration': end - start,

1307

'revision': target_rev,

1316

'revision': target_rev,

1308

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1317

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1309

'search_round_count': dbg_try_rounds,

1318

'search_round_count': dbg_try_rounds,

1310

'using-cached-base': used_cached,

1319

'using-cached-base': used_cached,

1311

'delta_try_count': dbg_try_count,

1320

'delta_try_count': dbg_try_count,

1312

'type': dbg_type,

1321

'type': dbg_type,

1313

'p1-chain-len': p1_chain_len,

1322

'p1-chain-len': p1_chain_len,

1314

'p2-chain-len': p2_chain_len,

1323

'p2-chain-len': p2_chain_len,

1315

}

1324

}

1316

if (

1325

if (

1317

deltainfo.snapshotdepth # pytype: disable=attribute-error

1326

deltainfo.snapshotdepth # pytype: disable=attribute-error

1318

is not None

1327

is not None

1319

):

1328

):

1320

dbg[

1329

dbg[

1321

'snapshot-depth'

1330

'snapshot-depth'

1322

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1331

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1323

else:

1332

else:

1324

dbg['snapshot-depth'] = 0

1333

dbg['snapshot-depth'] = 0

1325

target_revlog = b"UNKNOWN"

1334

target_revlog = b"UNKNOWN"

1326

target_type = self.revlog.target[0]

1335

target_type = self.revlog.target[0]

1327

target_key = self.revlog.target[1]

1336

target_key = self.revlog.target[1]

1328

if target_type == KIND_CHANGELOG:

1337

if target_type == KIND_CHANGELOG:

1329

target_revlog = b'CHANGELOG:'

1338

target_revlog = b'CHANGELOG:'

1330

elif target_type == KIND_MANIFESTLOG:

1339

elif target_type == KIND_MANIFESTLOG:

1331

target_revlog = b'MANIFESTLOG:'

1340

target_revlog = b'MANIFESTLOG:'

1332

if target_key:

1341

if target_key:

1333

target_revlog += b'%s:' % target_key

1342

target_revlog += b'%s:' % target_key

1334

elif target_type == KIND_FILELOG:

1343

elif target_type == KIND_FILELOG:

1335

target_revlog = b'FILELOG:'

1344

target_revlog = b'FILELOG:'

1336

if target_key:

1345

if target_key:

1337

target_revlog += b'%s:' % target_key

1346

target_revlog += b'%s:' % target_key

1338

dbg['target-revlog'] = target_revlog

1347

dbg['target-revlog'] = target_revlog

1339

1348

1340

if self._debug_info is not None:

1349

if self._debug_info is not None:

1341

self._debug_info.append(dbg)

1350

self._debug_info.append(dbg)

1342

1351

1343

if self._write_debug is not None:

1352

if self._write_debug is not None:

1344

msg = (

1353

msg = (

1345

b"DBG-DELTAS:"

1354

b"DBG-DELTAS:"

1346

b" %-12s"

1355

b" %-12s"

1347

b" rev=%d:"

1356

b" rev=%d:"

1348

b" delta-base=%d"

1357

b" delta-base=%d"

1349

b" is-cached=%d"

1358

b" is-cached=%d"

1350

b" - search-rounds=%d"

1359

b" - search-rounds=%d"

1351

b" try-count=%d"

1360

b" try-count=%d"

1352

b" - delta-type=%-6s"

1361

b" - delta-type=%-6s"

1353

b" snap-depth=%d"

1362

b" snap-depth=%d"

1354

b" - p1-chain-length=%d"

1363

b" - p1-chain-length=%d"

1355

b" p2-chain-length=%d"

1364

b" p2-chain-length=%d"

1356

b" - duration=%f"

1365

b" - duration=%f"

1357

b"\n"

1366

b"\n"

1358

)

1367

)

1359

msg %= (

1368

msg %= (

1360

dbg["target-revlog"],

1369

dbg["target-revlog"],

1361

dbg["revision"],

1370

dbg["revision"],

1362

dbg["delta-base"],

1371

dbg["delta-base"],

1363

dbg["using-cached-base"],

1372

dbg["using-cached-base"],

1364

dbg["search_round_count"],

1373

dbg["search_round_count"],

1365

dbg["delta_try_count"],

1374

dbg["delta_try_count"],

1366

dbg["type"],

1375

dbg["type"],

1367

dbg["snapshot-depth"],

1376

dbg["snapshot-depth"],

1368

dbg["p1-chain-len"],

1377

dbg["p1-chain-len"],

1369

dbg["p2-chain-len"],

1378

dbg["p2-chain-len"],

1370

dbg["duration"],

1379

dbg["duration"],

1371

)

1380

)

1372

self._write_debug(msg)

1381

self._write_debug(msg)

1373

return deltainfo

1382

return deltainfo

1374

1383

1375

1384

1376

def delta_compression(default_compression_header, deltainfo):

1385

def delta_compression(default_compression_header, deltainfo):

1377

"""return (COMPRESSION_MODE, deltainfo)

1386

"""return (COMPRESSION_MODE, deltainfo)

1378

1387

1379

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1388

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1380

compression.

1389

compression.

1381

"""

1390

"""

1382

h, d = deltainfo.data

1391

h, d = deltainfo.data

1383

compression_mode = COMP_MODE_INLINE

1392

compression_mode = COMP_MODE_INLINE

1384

if not h and not d:

1393

if not h and not d:

1385

# not data to store at all... declare them uncompressed

1394

# not data to store at all... declare them uncompressed

1386

compression_mode = COMP_MODE_PLAIN

1395

compression_mode = COMP_MODE_PLAIN

1387

elif not h:

1396

elif not h:

1388

t = d[0:1]

1397

t = d[0:1]

1389

if t == b'\0':

1398

if t == b'\0':

1390

compression_mode = COMP_MODE_PLAIN

1399

compression_mode = COMP_MODE_PLAIN

1391

elif t == default_compression_header:

1400

elif t == default_compression_header:

1392

compression_mode = COMP_MODE_DEFAULT

1401

compression_mode = COMP_MODE_DEFAULT

1393

elif h == b'u':

1402

elif h == b'u':

1394

# we have a more efficient way to declare uncompressed

1403

# we have a more efficient way to declare uncompressed

1395

h = b''

1404

h = b''

1396

compression_mode = COMP_MODE_PLAIN

1405

compression_mode = COMP_MODE_PLAIN

1397

deltainfo = drop_u_compression(deltainfo)

1406

deltainfo = drop_u_compression(deltainfo)

1398

return compression_mode, deltainfo

1407

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
-                        tested.add(rev)
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
+                            tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
+                            tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
+                            tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
+                            tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
+                            tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
+                            tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
+                            tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
+                                    tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
+                                    tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
+                        tested.update(group)
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, b'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 # XXX cache me higher
                 snapshots = collections.defaultdict(list)
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshots,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: [snapshot-revs]
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         used_cached = (
                             cachedelta is not None
                             and dbg_try_rounds == 1
                             and dbg_try_count == 1
                             and deltainfo.base == cachedelta[0]
                         )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo