upstream/mercurial-mirror Commit - r50575:40e24d82

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

self._data = data

50

self._data = data

51

self._srdensitythreshold = density

51

self._srdensitythreshold = density

52

self._srmingapsize = mingap

52

self._srmingapsize = mingap

53

self._snapshot = set(snapshot)

53

self._snapshot = set(snapshot)

54

self.index = None

54

self.index = None

55

56

def start(self, rev):

56

def start(self, rev):

57

if rev == nullrev:

57

if rev == nullrev:

58

return 0

58

return 0

59

if rev == 0:

59

if rev == 0:

60

return 0

60

return 0

61

return self._data[rev - 1]

61

return self._data[rev - 1]

62

63

def end(self, rev):

63

def end(self, rev):

64

if rev == nullrev:

64

if rev == nullrev:

65

return 0

65

return 0

66

return self._data[rev]

66

return self._data[rev]

67

68

def length(self, rev):

68

def length(self, rev):

69

return self.end(rev) - self.start(rev)

69

return self.end(rev) - self.start(rev)

70

71

def __len__(self):

71

def __len__(self):

72

return len(self._data)

72

return len(self._data)

73

74

def issnapshot(self, rev):

74

def issnapshot(self, rev):

75

if rev == nullrev:

75

if rev == nullrev:

76

return True

76

return True

77

return rev in self._snapshot

77

return rev in self._snapshot

78

79

80

def slicechunk(revlog, revs, targetsize=None):

80

def slicechunk(revlog, revs, targetsize=None):

81

"""slice revs to reduce the amount of unrelated data to be read from disk.

81

"""slice revs to reduce the amount of unrelated data to be read from disk.

82

83

``revs`` is sliced into groups that should be read in one time.

83

``revs`` is sliced into groups that should be read in one time.

84

Assume that revs are sorted.

84

Assume that revs are sorted.

85

86

The initial chunk is sliced until the overall density (payload/chunks-span

86

The initial chunk is sliced until the overall density (payload/chunks-span

87

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

87

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

88

`revlog._srmingapsize` is skipped.

88

`revlog._srmingapsize` is skipped.

89

90

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

90

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

91

For consistency with other slicing choice, this limit won't go lower than

91

For consistency with other slicing choice, this limit won't go lower than

92

`revlog._srmingapsize`.

92

`revlog._srmingapsize`.

93

94

If individual revisions chunk are larger than this limit, they will still

94

If individual revisions chunk are larger than this limit, they will still

95

be raised individually.

95

be raised individually.

96

97

>>> data = [

97

>>> data = [

98

... 5, #00 (5)

98

... 5, #00 (5)

99

... 10, #01 (5)

99

... 10, #01 (5)

100

... 12, #02 (2)

100

... 12, #02 (2)

101

... 12, #03 (empty)

101

... 12, #03 (empty)

102

... 27, #04 (15)

102

... 27, #04 (15)

103

... 31, #05 (4)

103

... 31, #05 (4)

104

... 31, #06 (empty)

104

... 31, #06 (empty)

105

... 42, #07 (11)

105

... 42, #07 (11)

106

... 47, #08 (5)

106

... 47, #08 (5)

107

... 47, #09 (empty)

107

... 47, #09 (empty)

108

... 48, #10 (1)

108

... 48, #10 (1)

109

... 51, #11 (3)

109

... 51, #11 (3)

110

... 74, #12 (23)

110

... 74, #12 (23)

111

... 85, #13 (11)

111

... 85, #13 (11)

112

... 86, #14 (1)

112

... 86, #14 (1)

113

... 91, #15 (5)

113

... 91, #15 (5)

114

... ]

114

... ]

115

>>> revlog = _testrevlog(data, snapshot=range(16))

115

>>> revlog = _testrevlog(data, snapshot=range(16))

116

117

>>> list(slicechunk(revlog, list(range(16))))

117

>>> list(slicechunk(revlog, list(range(16))))

118

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

118

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

119

>>> list(slicechunk(revlog, [0, 15]))

119

>>> list(slicechunk(revlog, [0, 15]))

120

[[0], [15]]

120

[[0], [15]]

121

>>> list(slicechunk(revlog, [0, 11, 15]))

121

>>> list(slicechunk(revlog, [0, 11, 15]))

122

[[0], [11], [15]]

122

[[0], [11], [15]]

123

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

123

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

124

[[0], [11, 13, 15]]

124

[[0], [11, 13, 15]]

125

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

125

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

126

[[1, 2], [5, 8, 10, 11], [14]]

126

[[1, 2], [5, 8, 10, 11], [14]]

127

128

Slicing with a maximum chunk size

128

Slicing with a maximum chunk size

129

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

129

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

130

[[0], [11], [13], [15]]

130

[[0], [11], [13], [15]]

131

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

131

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

132

[[0], [11], [13, 15]]

132

[[0], [11], [13, 15]]

133

134

Slicing involving nullrev

134

Slicing involving nullrev

135

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

135

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

136

[[-1, 0], [11], [13, 15]]

136

[[-1, 0], [11], [13, 15]]

137

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

137

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

138

[[-1], [13], [15]]

138

[[-1], [13], [15]]

139

"""

139

"""

140

if targetsize is not None:

140

if targetsize is not None:

141

targetsize = max(targetsize, revlog._srmingapsize)

141

targetsize = max(targetsize, revlog._srmingapsize)

142

# targetsize should not be specified when evaluating delta candidates:

142

# targetsize should not be specified when evaluating delta candidates:

143

# * targetsize is used to ensure we stay within specification when reading,

143

# * targetsize is used to ensure we stay within specification when reading,

144

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

144

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

145

if densityslicing is None:

145

if densityslicing is None:

146

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

146

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

147

for chunk in densityslicing(

147

for chunk in densityslicing(

148

revs, revlog._srdensitythreshold, revlog._srmingapsize

148

revs, revlog._srdensitythreshold, revlog._srmingapsize

149

):

149

):

150

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

150

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

151

yield subchunk

151

yield subchunk

152

153

154

def _slicechunktosize(revlog, revs, targetsize=None):

154

def _slicechunktosize(revlog, revs, targetsize=None):

155

"""slice revs to match the target size

155

"""slice revs to match the target size

156

157

This is intended to be used on chunk that density slicing selected by that

157

This is intended to be used on chunk that density slicing selected by that

158

are still too large compared to the read garantee of revlog. This might

158

are still too large compared to the read garantee of revlog. This might

159

happens when "minimal gap size" interrupted the slicing or when chain are

159

happens when "minimal gap size" interrupted the slicing or when chain are

160

built in a way that create large blocks next to each other.

160

built in a way that create large blocks next to each other.

161

162

>>> data = [

162

>>> data = [

163

... 3, #0 (3)

163

... 3, #0 (3)

164

... 5, #1 (2)

164

... 5, #1 (2)

165

... 6, #2 (1)

165

... 6, #2 (1)

166

... 8, #3 (2)

166

... 8, #3 (2)

167

... 8, #4 (empty)

167

... 8, #4 (empty)

168

... 11, #5 (3)

168

... 11, #5 (3)

169

... 12, #6 (1)

169

... 12, #6 (1)

170

... 13, #7 (1)

170

... 13, #7 (1)

171

... 14, #8 (1)

171

... 14, #8 (1)

172

... ]

172

... ]

173

174

== All snapshots cases ==

174

== All snapshots cases ==

175

>>> revlog = _testrevlog(data, snapshot=range(9))

175

>>> revlog = _testrevlog(data, snapshot=range(9))

176

177

Cases where chunk is already small enough

177

Cases where chunk is already small enough

178

>>> list(_slicechunktosize(revlog, [0], 3))

178

>>> list(_slicechunktosize(revlog, [0], 3))

179

[[0]]

179

[[0]]

180

>>> list(_slicechunktosize(revlog, [6, 7], 3))

180

>>> list(_slicechunktosize(revlog, [6, 7], 3))

181

[[6, 7]]

181

[[6, 7]]

182

>>> list(_slicechunktosize(revlog, [0], None))

182

>>> list(_slicechunktosize(revlog, [0], None))

183

[[0]]

183

[[0]]

184

>>> list(_slicechunktosize(revlog, [6, 7], None))

184

>>> list(_slicechunktosize(revlog, [6, 7], None))

185

[[6, 7]]

185

[[6, 7]]

186

187

cases where we need actual slicing

187

cases where we need actual slicing

188

>>> list(_slicechunktosize(revlog, [0, 1], 3))

188

>>> list(_slicechunktosize(revlog, [0, 1], 3))

189

[[0], [1]]

189

[[0], [1]]

190

>>> list(_slicechunktosize(revlog, [1, 3], 3))

190

>>> list(_slicechunktosize(revlog, [1, 3], 3))

191

[[1], [3]]

191

[[1], [3]]

192

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

192

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

193

[[1, 2], [3]]

193

[[1, 2], [3]]

194

>>> list(_slicechunktosize(revlog, [3, 5], 3))

194

>>> list(_slicechunktosize(revlog, [3, 5], 3))

195

[[3], [5]]

195

[[3], [5]]

196

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

196

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

197

[[3], [5]]

197

[[3], [5]]

198

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

198

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

199

[[5], [6, 7, 8]]

199

[[5], [6, 7, 8]]

200

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

200

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

201

[[0], [1, 2], [3], [5], [6, 7, 8]]

201

[[0], [1, 2], [3], [5], [6, 7, 8]]

202

203

Case with too large individual chunk (must return valid chunk)

203

Case with too large individual chunk (must return valid chunk)

204

>>> list(_slicechunktosize(revlog, [0, 1], 2))

204

>>> list(_slicechunktosize(revlog, [0, 1], 2))

205

[[0], [1]]

205

[[0], [1]]

206

>>> list(_slicechunktosize(revlog, [1, 3], 1))

206

>>> list(_slicechunktosize(revlog, [1, 3], 1))

207

[[1], [3]]

207

[[1], [3]]

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

209

[[3], [5]]

209

[[3], [5]]

210

211

== No Snapshot cases ==

211

== No Snapshot cases ==

212

>>> revlog = _testrevlog(data)

212

>>> revlog = _testrevlog(data)

213

214

Cases where chunk is already small enough

214

Cases where chunk is already small enough

215

>>> list(_slicechunktosize(revlog, [0], 3))

215

>>> list(_slicechunktosize(revlog, [0], 3))

216

[[0]]

216

[[0]]

217

>>> list(_slicechunktosize(revlog, [6, 7], 3))

217

>>> list(_slicechunktosize(revlog, [6, 7], 3))

218

[[6, 7]]

218

[[6, 7]]

219

>>> list(_slicechunktosize(revlog, [0], None))

219

>>> list(_slicechunktosize(revlog, [0], None))

220

[[0]]

220

[[0]]

221

>>> list(_slicechunktosize(revlog, [6, 7], None))

221

>>> list(_slicechunktosize(revlog, [6, 7], None))

222

[[6, 7]]

222

[[6, 7]]

223

224

cases where we need actual slicing

224

cases where we need actual slicing

225

>>> list(_slicechunktosize(revlog, [0, 1], 3))

225

>>> list(_slicechunktosize(revlog, [0, 1], 3))

226

[[0], [1]]

226

[[0], [1]]

227

>>> list(_slicechunktosize(revlog, [1, 3], 3))

227

>>> list(_slicechunktosize(revlog, [1, 3], 3))

228

[[1], [3]]

228

[[1], [3]]

229

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

229

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

230

[[1], [2, 3]]

230

[[1], [2, 3]]

231

>>> list(_slicechunktosize(revlog, [3, 5], 3))

231

>>> list(_slicechunktosize(revlog, [3, 5], 3))

232

[[3], [5]]

232

[[3], [5]]

233

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

233

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

234

[[3], [4, 5]]

234

[[3], [4, 5]]

235

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

235

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

236

[[5], [6, 7, 8]]

236

[[5], [6, 7, 8]]

237

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

237

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

238

[[0], [1, 2], [3], [5], [6, 7, 8]]

238

[[0], [1, 2], [3], [5], [6, 7, 8]]

239

240

Case with too large individual chunk (must return valid chunk)

240

Case with too large individual chunk (must return valid chunk)

241

>>> list(_slicechunktosize(revlog, [0, 1], 2))

241

>>> list(_slicechunktosize(revlog, [0, 1], 2))

242

[[0], [1]]

242

[[0], [1]]

243

>>> list(_slicechunktosize(revlog, [1, 3], 1))

243

>>> list(_slicechunktosize(revlog, [1, 3], 1))

244

[[1], [3]]

244

[[1], [3]]

245

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

245

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

246

[[3], [5]]

246

[[3], [5]]

247

248

== mixed case ==

248

== mixed case ==

249

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

249

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

250

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

250

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

251

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

251

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

252

"""

252

"""

253

assert targetsize is None or 0 <= targetsize

253

assert targetsize is None or 0 <= targetsize

254

startdata = revlog.start(revs[0])

254

startdata = revlog.start(revs[0])

255

enddata = revlog.end(revs[-1])

255

enddata = revlog.end(revs[-1])

256

fullspan = enddata - startdata

256

fullspan = enddata - startdata

257

if targetsize is None or fullspan <= targetsize:

257

if targetsize is None or fullspan <= targetsize:

258

yield revs

258

yield revs

259

return

259

return

260

261

startrevidx = 0

261

startrevidx = 0

262

endrevidx = 1

262

endrevidx = 1

263

iterrevs = enumerate(revs)

263

iterrevs = enumerate(revs)

264

next(iterrevs) # skip first rev.

264

next(iterrevs) # skip first rev.

265

# first step: get snapshots out of the way

265

# first step: get snapshots out of the way

266

for idx, r in iterrevs:

266

for idx, r in iterrevs:

267

span = revlog.end(r) - startdata

267

span = revlog.end(r) - startdata

268

snapshot = revlog.issnapshot(r)

268

snapshot = revlog.issnapshot(r)

269

if span <= targetsize and snapshot:

269

if span <= targetsize and snapshot:

270

endrevidx = idx + 1

270

endrevidx = idx + 1

271

else:

271

else:

272

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

272

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

273

if chunk:

273

if chunk:

274

yield chunk

274

yield chunk

275

startrevidx = idx

275

startrevidx = idx

276

startdata = revlog.start(r)

276

startdata = revlog.start(r)

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

if not snapshot:

278

if not snapshot:

279

break

279

break

280

281

# for the others, we use binary slicing to quickly converge toward valid

281

# for the others, we use binary slicing to quickly converge toward valid

282

# chunks (otherwise, we might end up looking for start/end of many

282

# chunks (otherwise, we might end up looking for start/end of many

283

# revisions). This logic is not looking for the perfect slicing point, it

283

# revisions). This logic is not looking for the perfect slicing point, it

284

# focuses on quickly converging toward valid chunks.

284

# focuses on quickly converging toward valid chunks.

285

nbitem = len(revs)

285

nbitem = len(revs)

286

while (enddata - startdata) > targetsize:

286

while (enddata - startdata) > targetsize:

287

endrevidx = nbitem

287

endrevidx = nbitem

288

if nbitem - startrevidx <= 1:

288

if nbitem - startrevidx <= 1:

289

break # protect against individual chunk larger than limit

289

break # protect against individual chunk larger than limit

290

localenddata = revlog.end(revs[endrevidx - 1])

290

localenddata = revlog.end(revs[endrevidx - 1])

291

span = localenddata - startdata

291

span = localenddata - startdata

292

while span > targetsize:

292

while span > targetsize:

293

if endrevidx - startrevidx <= 1:

293

if endrevidx - startrevidx <= 1:

294

break # protect against individual chunk larger than limit

294

break # protect against individual chunk larger than limit

295

endrevidx -= (endrevidx - startrevidx) // 2

295

endrevidx -= (endrevidx - startrevidx) // 2

296

localenddata = revlog.end(revs[endrevidx - 1])

296

localenddata = revlog.end(revs[endrevidx - 1])

297

span = localenddata - startdata

297

span = localenddata - startdata

298

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

298

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

299

if chunk:

299

if chunk:

300

yield chunk

300

yield chunk

301

startrevidx = endrevidx

301

startrevidx = endrevidx

302

startdata = revlog.start(revs[startrevidx])

302

startdata = revlog.start(revs[startrevidx])

303

304

chunk = _trimchunk(revlog, revs, startrevidx)

304

chunk = _trimchunk(revlog, revs, startrevidx)

305

if chunk:

305

if chunk:

306

yield chunk

306

yield chunk

307

308

309

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

309

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

310

"""slice revs to reduce the amount of unrelated data to be read from disk.

310

"""slice revs to reduce the amount of unrelated data to be read from disk.

311

312

``revs`` is sliced into groups that should be read in one time.

312

``revs`` is sliced into groups that should be read in one time.

313

Assume that revs are sorted.

313

Assume that revs are sorted.

314

315

The initial chunk is sliced until the overall density (payload/chunks-span

315

The initial chunk is sliced until the overall density (payload/chunks-span

316

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

316

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

317

skipped.

317

skipped.

318

319

>>> revlog = _testrevlog([

319

>>> revlog = _testrevlog([

320

... 5, #00 (5)

320

... 5, #00 (5)

321

... 10, #01 (5)

321

... 10, #01 (5)

322

... 12, #02 (2)

322

... 12, #02 (2)

323

... 12, #03 (empty)

323

... 12, #03 (empty)

324

... 27, #04 (15)

324

... 27, #04 (15)

325

... 31, #05 (4)

325

... 31, #05 (4)

326

... 31, #06 (empty)

326

... 31, #06 (empty)

327

... 42, #07 (11)

327

... 42, #07 (11)

328

... 47, #08 (5)

328

... 47, #08 (5)

329

... 47, #09 (empty)

329

... 47, #09 (empty)

330

... 48, #10 (1)

330

... 48, #10 (1)

331

... 51, #11 (3)

331

... 51, #11 (3)

332

... 74, #12 (23)

332

... 74, #12 (23)

333

... 85, #13 (11)

333

... 85, #13 (11)

334

... 86, #14 (1)

334

... 86, #14 (1)

335

... 91, #15 (5)

335

... 91, #15 (5)

336

... ])

336

... ])

337

338

>>> list(_slicechunktodensity(revlog, list(range(16))))

338

>>> list(_slicechunktodensity(revlog, list(range(16))))

339

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

339

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

340

>>> list(_slicechunktodensity(revlog, [0, 15]))

340

>>> list(_slicechunktodensity(revlog, [0, 15]))

341

[[0], [15]]

341

[[0], [15]]

342

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

342

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

343

[[0], [11], [15]]

343

[[0], [11], [15]]

344

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

344

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

345

[[0], [11, 13, 15]]

345

[[0], [11, 13, 15]]

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

347

[[1, 2], [5, 8, 10, 11], [14]]

347

[[1, 2], [5, 8, 10, 11], [14]]

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

... mingapsize=20))

349

... mingapsize=20))

350

[[1, 2, 3, 5, 8, 10, 11], [14]]

350

[[1, 2, 3, 5, 8, 10, 11], [14]]

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

... targetdensity=0.95))

352

... targetdensity=0.95))

353

[[1, 2], [5], [8, 10, 11], [14]]

353

[[1, 2], [5], [8, 10, 11], [14]]

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

... targetdensity=0.95, mingapsize=12))

355

... targetdensity=0.95, mingapsize=12))

356

[[1, 2], [5, 8, 10, 11], [14]]

356

[[1, 2], [5, 8, 10, 11], [14]]

357

"""

357

"""

358

start = revlog.start

358

start = revlog.start

359

length = revlog.length

359

length = revlog.length

360

361

if len(revs) <= 1:

361

if len(revs) <= 1:

362

yield revs

362

yield revs

363

return

363

return

364

365

deltachainspan = segmentspan(revlog, revs)

365

deltachainspan = segmentspan(revlog, revs)

366

367

if deltachainspan < mingapsize:

367

if deltachainspan < mingapsize:

368

yield revs

368

yield revs

369

return

369

return

370

371

readdata = deltachainspan

371

readdata = deltachainspan

372

chainpayload = sum(length(r) for r in revs)

372

chainpayload = sum(length(r) for r in revs)

373

374

if deltachainspan:

374

if deltachainspan:

375

density = chainpayload / float(deltachainspan)

375

density = chainpayload / float(deltachainspan)

376

else:

376

else:

377

density = 1.0

377

density = 1.0

378

379

if density >= targetdensity:

379

if density >= targetdensity:

380

yield revs

380

yield revs

381

return

381

return

382

383

# Store the gaps in a heap to have them sorted by decreasing size

383

# Store the gaps in a heap to have them sorted by decreasing size

384

gaps = []

384

gaps = []

385

prevend = None

385

prevend = None

386

for i, rev in enumerate(revs):

386

for i, rev in enumerate(revs):

387

revstart = start(rev)

387

revstart = start(rev)

388

revlen = length(rev)

388

revlen = length(rev)

389

390

# Skip empty revisions to form larger holes

390

# Skip empty revisions to form larger holes

391

if revlen == 0:

391

if revlen == 0:

392

continue

392

continue

393

394

if prevend is not None:

394

if prevend is not None:

395

gapsize = revstart - prevend

395

gapsize = revstart - prevend

396

# only consider holes that are large enough

396

# only consider holes that are large enough

397

if gapsize > mingapsize:

397

if gapsize > mingapsize:

398

gaps.append((gapsize, i))

398

gaps.append((gapsize, i))

399

400

prevend = revstart + revlen

400

prevend = revstart + revlen

401

# sort the gaps to pop them from largest to small

401

# sort the gaps to pop them from largest to small

402

gaps.sort()

402

gaps.sort()

403

404

# Collect the indices of the largest holes until the density is acceptable

404

# Collect the indices of the largest holes until the density is acceptable

405

selected = []

405

selected = []

406

while gaps and density < targetdensity:

406

while gaps and density < targetdensity:

407

gapsize, gapidx = gaps.pop()

407

gapsize, gapidx = gaps.pop()

408

409

selected.append(gapidx)

409

selected.append(gapidx)

410

411

# the gap sizes are stored as negatives to be sorted decreasingly

411

# the gap sizes are stored as negatives to be sorted decreasingly

412

# by the heap

412

# by the heap

413

readdata -= gapsize

413

readdata -= gapsize

414

if readdata > 0:

414

if readdata > 0:

415

density = chainpayload / float(readdata)

415

density = chainpayload / float(readdata)

416

else:

416

else:

417

density = 1.0

417

density = 1.0

418

selected.sort()

418

selected.sort()

419

420

# Cut the revs at collected indices

420

# Cut the revs at collected indices

421

previdx = 0

421

previdx = 0

422

for idx in selected:

422

for idx in selected:

423

424

chunk = _trimchunk(revlog, revs, previdx, idx)

424

chunk = _trimchunk(revlog, revs, previdx, idx)

425

if chunk:

425

if chunk:

426

yield chunk

426

yield chunk

427

428

previdx = idx

428

previdx = idx

429

430

chunk = _trimchunk(revlog, revs, previdx)

430

chunk = _trimchunk(revlog, revs, previdx)

431

if chunk:

431

if chunk:

432

yield chunk

432

yield chunk

433

434

435

def _trimchunk(revlog, revs, startidx, endidx=None):

435

def _trimchunk(revlog, revs, startidx, endidx=None):

436

"""returns revs[startidx:endidx] without empty trailing revs

436

"""returns revs[startidx:endidx] without empty trailing revs

437

438

Doctest Setup

438

Doctest Setup

439

>>> revlog = _testrevlog([

439

>>> revlog = _testrevlog([

440

... 5, #0

440

... 5, #0

441

... 10, #1

441

... 10, #1

442

... 12, #2

442

... 12, #2

443

... 12, #3 (empty)

443

... 12, #3 (empty)

444

... 17, #4

444

... 17, #4

445

... 21, #5

445

... 21, #5

446

... 21, #6 (empty)

446

... 21, #6 (empty)

447

... ])

447

... ])

448

449

Contiguous cases:

449

Contiguous cases:

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

451

[0, 1, 2, 3, 4, 5]

451

[0, 1, 2, 3, 4, 5]

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

453

[0, 1, 2, 3, 4]

453

[0, 1, 2, 3, 4]

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

455

[0, 1, 2]

455

[0, 1, 2]

456

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

456

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

457

[2]

457

[2]

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

459

[3, 4, 5]

459

[3, 4, 5]

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

461

[3, 4]

461

[3, 4]

462

463

Discontiguous cases:

463

Discontiguous cases:

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

465

[1, 3, 5]

465

[1, 3, 5]

466

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

466

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

467

[1]

467

[1]

468

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

468

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

469

[3, 5]

469

[3, 5]

470

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

470

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

471

[3, 5]

471

[3, 5]

472

"""

472

"""

473

length = revlog.length

473

length = revlog.length

474

475

if endidx is None:

475

if endidx is None:

476

endidx = len(revs)

476

endidx = len(revs)

477

478

# If we have a non-emtpy delta candidate, there are nothing to trim

478

# If we have a non-emtpy delta candidate, there are nothing to trim

479

if revs[endidx - 1] < len(revlog):

479

if revs[endidx - 1] < len(revlog):

480

# Trim empty revs at the end, except the very first revision of a chain

480

# Trim empty revs at the end, except the very first revision of a chain

481

while (

481

while (

482

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

482

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

483

):

483

):

484

endidx -= 1

484

endidx -= 1

485

486

return revs[startidx:endidx]

486

return revs[startidx:endidx]

487

488

489

def segmentspan(revlog, revs):

489

def segmentspan(revlog, revs):

490

"""Get the byte span of a segment of revisions

490

"""Get the byte span of a segment of revisions

491

492

revs is a sorted array of revision numbers

492

revs is a sorted array of revision numbers

493

494

>>> revlog = _testrevlog([

494

>>> revlog = _testrevlog([

495

... 5, #0

495

... 5, #0

496

... 10, #1

496

... 10, #1

497

... 12, #2

497

... 12, #2

498

... 12, #3 (empty)

498

... 12, #3 (empty)

499

... 17, #4

499

... 17, #4

500

... ])

500

... ])

501

502

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

502

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

503

17

503

17

504

>>> segmentspan(revlog, [0, 4])

504

>>> segmentspan(revlog, [0, 4])

505

17

505

17

506

>>> segmentspan(revlog, [3, 4])

506

>>> segmentspan(revlog, [3, 4])

507

5

507

5

508

>>> segmentspan(revlog, [1, 2, 3,])

508

>>> segmentspan(revlog, [1, 2, 3,])

509

7

509

7

510

>>> segmentspan(revlog, [1, 3])

510

>>> segmentspan(revlog, [1, 3])

511

7

511

7

512

"""

512

"""

513

if not revs:

513

if not revs:

514

return 0

514

return 0

515

end = revlog.end(revs[-1])

515

end = revlog.end(revs[-1])

516

return end - revlog.start(revs[0])

516

return end - revlog.start(revs[0])

517

518

519

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

519

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

520

"""build full text from a (base, delta) pair and other metadata"""

520

"""build full text from a (base, delta) pair and other metadata"""

521

# special case deltas which replace entire base; no need to decode

521

# special case deltas which replace entire base; no need to decode

522

# base revision. this neatly avoids censored bases, which throw when

522

# base revision. this neatly avoids censored bases, which throw when

523

# they're decoded.

523

# they're decoded.

524

hlen = struct.calcsize(b">lll")

524

hlen = struct.calcsize(b">lll")

525

if delta[:hlen] == mdiff.replacediffheader(

525

if delta[:hlen] == mdiff.replacediffheader(

526

revlog.rawsize(baserev), len(delta) - hlen

526

revlog.rawsize(baserev), len(delta) - hlen

527

):

527

):

528

fulltext = delta[hlen:]

528

fulltext = delta[hlen:]

529

else:

529

else:

530

# deltabase is rawtext before changed by flag processors, which is

530

# deltabase is rawtext before changed by flag processors, which is

531

# equivalent to non-raw text

531

# equivalent to non-raw text

532

basetext = revlog.revision(baserev, _df=fh)

532

basetext = revlog.revision(baserev, _df=fh)

533

fulltext = mdiff.patch(basetext, delta)

533

fulltext = mdiff.patch(basetext, delta)

534

535

try:

535

try:

536

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

536

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

537

if validatehash:

537

if validatehash:

538

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

538

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

539

if flags & REVIDX_ISCENSORED:

539

if flags & REVIDX_ISCENSORED:

540

raise error.StorageError(

540

raise error.StorageError(

541

_(b'node %s is not censored') % expectednode

541

_(b'node %s is not censored') % expectednode

542

)

542

)

543

except error.CensoredNodeError:

543

except error.CensoredNodeError:

544

# must pass the censored index flag to add censored revisions

544

# must pass the censored index flag to add censored revisions

545

if not flags & REVIDX_ISCENSORED:

545

if not flags & REVIDX_ISCENSORED:

546

raise

546

raise

547

return fulltext

547

return fulltext

548

549

550

@attr.s(slots=True, frozen=True)

550

@attr.s(slots=True, frozen=True)

551

class _deltainfo:

551

class _deltainfo:

552

distance = attr.ib()

552

distance = attr.ib()

553

deltalen = attr.ib()

553

deltalen = attr.ib()

554

data = attr.ib()

554

data = attr.ib()

555

base = attr.ib()

555

base = attr.ib()

556

chainbase = attr.ib()

556

chainbase = attr.ib()

557

chainlen = attr.ib()

557

chainlen = attr.ib()

558

compresseddeltalen = attr.ib()

558

compresseddeltalen = attr.ib()

559

snapshotdepth = attr.ib()

559

snapshotdepth = attr.ib()

560

561

562

def drop_u_compression(delta):

562

def drop_u_compression(delta):

563

"""turn into a "u" (no-compression) into no-compression without header

563

"""turn into a "u" (no-compression) into no-compression without header

564

565

This is useful for revlog format that has better compression method.

565

This is useful for revlog format that has better compression method.

566

"""

566

"""

567

assert delta.data[0] == b'u', delta.data[0]

567

assert delta.data[0] == b'u', delta.data[0]

568

return _deltainfo(

568

return _deltainfo(

569

delta.distance,

569

delta.distance,

570

delta.deltalen - 1,

570

delta.deltalen - 1,

571

(b'', delta.data[1]),

571

(b'', delta.data[1]),

572

delta.base,

572

delta.base,

573

delta.chainbase,

573

delta.chainbase,

574

delta.chainlen,

574

delta.chainlen,

575

delta.compresseddeltalen,

575

delta.compresseddeltalen,

576

delta.snapshotdepth,

576

delta.snapshotdepth,

577

)

577

)

578

579

580

def is_good_delta_info(revlog, deltainfo, revinfo):

580

def is_good_delta_info(revlog, deltainfo, revinfo):

581

"""Returns True if the given delta is good. Good means that it is within

581

"""Returns True if the given delta is good. Good means that it is within

582

the disk span, disk size, and chain length bounds that we know to be

582

the disk span, disk size, and chain length bounds that we know to be

583

performant."""

583

performant."""

584

if deltainfo is None:

584

if deltainfo is None:

585

return False

585

return False

586

587

# - 'deltainfo.distance' is the distance from the base revision --

587

# - 'deltainfo.distance' is the distance from the base revision --

588

# bounding it limits the amount of I/O we need to do.

588

# bounding it limits the amount of I/O we need to do.

589

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

589

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

590

# deltas we need to apply -- bounding it limits the amount of CPU

590

# deltas we need to apply -- bounding it limits the amount of CPU

591

# we consume.

591

# we consume.

592

593

textlen = revinfo.textlen

593

textlen = revinfo.textlen

594

defaultmax = textlen * 4

594

defaultmax = textlen * 4

595

maxdist = revlog._maxdeltachainspan

595

maxdist = revlog._maxdeltachainspan

596

if not maxdist:

596

if not maxdist:

597

maxdist = deltainfo.distance # ensure the conditional pass

597

maxdist = deltainfo.distance # ensure the conditional pass

598

maxdist = max(maxdist, defaultmax)

598

maxdist = max(maxdist, defaultmax)

599

600

# Bad delta from read span:

600

# Bad delta from read span:

601

#

601

#

602

# If the span of data read is larger than the maximum allowed.

602

# If the span of data read is larger than the maximum allowed.

603

#

603

#

604

# In the sparse-revlog case, we rely on the associated "sparse reading"

604

# In the sparse-revlog case, we rely on the associated "sparse reading"

605

# to avoid issue related to the span of data. In theory, it would be

605

# to avoid issue related to the span of data. In theory, it would be

606

# possible to build pathological revlog where delta pattern would lead

606

# possible to build pathological revlog where delta pattern would lead

607

# to too many reads. However, they do not happen in practice at all. So

607

# to too many reads. However, they do not happen in practice at all. So

608

# we skip the span check entirely.

608

# we skip the span check entirely.

609

if not revlog._sparserevlog and maxdist < deltainfo.distance:

609

if not revlog._sparserevlog and maxdist < deltainfo.distance:

610

return False

610

return False

611

612

# Bad delta from new delta size:

612

# Bad delta from new delta size:

613

#

613

#

614

# If the delta size is larger than the target text, storing the

614

# If the delta size is larger than the target text, storing the

615

# delta will be inefficient.

615

# delta will be inefficient.

616

if textlen < deltainfo.deltalen:

616

if textlen < deltainfo.deltalen:

617

return False

617

return False

618

619

# Bad delta from cumulated payload size:

619

# Bad delta from cumulated payload size:

620

#

620

#

621

# If the sum of delta get larger than K * target text length.

621

# If the sum of delta get larger than K * target text length.

622

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

622

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

623

return False

623

return False

624

625

# Bad delta from chain length:

625

# Bad delta from chain length:

626

#

626

#

627

# If the number of delta in the chain gets too high.

627

# If the number of delta in the chain gets too high.

628

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

628

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

629

return False

629

return False

630

631

# bad delta from intermediate snapshot size limit

631

# bad delta from intermediate snapshot size limit

632

#

632

#

633

# If an intermediate snapshot size is higher than the limit. The

633

# If an intermediate snapshot size is higher than the limit. The

634

# limit exist to prevent endless chain of intermediate delta to be

634

# limit exist to prevent endless chain of intermediate delta to be

635

# created.

635

# created.

636

if (

636

if (

637

deltainfo.snapshotdepth is not None

637

deltainfo.snapshotdepth is not None

638

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

638

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

639

):

639

):

640

return False

640

return False

641

642

# bad delta if new intermediate snapshot is larger than the previous

642

# bad delta if new intermediate snapshot is larger than the previous

643

# snapshot

643

# snapshot

644

if (

644

if (

645

deltainfo.snapshotdepth

645

deltainfo.snapshotdepth

646

and revlog.length(deltainfo.base) < deltainfo.deltalen

646

and revlog.length(deltainfo.base) < deltainfo.deltalen

647

):

647

):

648

return False

648

return False

649

650

return True

650

return True

651

652

653

# If a revision's full text is that much bigger than a base candidate full

653

# If a revision's full text is that much bigger than a base candidate full

654

# text's, it is very unlikely that it will produce a valid delta. We no longer

654

# text's, it is very unlikely that it will produce a valid delta. We no longer

655

# consider these candidates.

655

# consider these candidates.

656

LIMIT_BASE2TEXT = 500

656

LIMIT_BASE2TEXT = 500

657

658

659

def _candidategroups(

659

def _candidategroups(

660

revlog,

660

revlog,

661

textlen,

661

textlen,

662

p1,

662

p1,

663

p2,

663

p2,

664

cachedelta,

664

cachedelta,

665

excluded_bases=None,

665

excluded_bases=None,

666

target_rev=None,

666

target_rev=None,

667

):

667

):

668

"""Provides group of revision to be tested as delta base

668

"""Provides group of revision to be tested as delta base

669

670

This top level function focus on emitting groups with unique and worthwhile

670

This top level function focus on emitting groups with unique and worthwhile

671

content. See _raw_candidate_groups for details about the group order.

671

content. See _raw_candidate_groups for details about the group order.

672

"""

672

"""

673

# should we try to build a delta?

673

# should we try to build a delta?

674

if not (len(revlog) and revlog._storedeltachains):

674

if not (len(revlog) and revlog._storedeltachains):

675

yield None

675

yield None

676

return

676

return

677

678

deltalength = revlog.length

678

deltalength = revlog.length

679

deltaparent = revlog.deltaparent

679

deltaparent = revlog.deltaparent

680

sparse = revlog._sparserevlog

680

sparse = revlog._sparserevlog

681

good = None

681

good = None

682

683

deltas_limit = textlen * LIMIT_DELTA2TEXT

683

deltas_limit = textlen * LIMIT_DELTA2TEXT

684

group_chunk_size = revlog._candidate_group_chunk_size

684

group_chunk_size = revlog._candidate_group_chunk_size

685

686

tested = {nullrev}

686

tested = {nullrev}

687

candidates = _refinedgroups(

687

candidates = _refinedgroups(

688

revlog,

688

revlog,

689

p1,

689

p1,

690

p2,

690

p2,

691

cachedelta,

691

cachedelta,

692

)

692

)

693

while True:

693

while True:

694

temptative = candidates.send(good)

694

temptative = candidates.send(good)

695

if temptative is None:

695

if temptative is None:

696

break

696

break

697

group = []

697

group = []

698

for rev in temptative:

698

for rev in temptative:

699

# skip over empty delta (no need to include them in a chain)

699

# skip over empty delta (no need to include them in a chain)

700

while revlog._generaldelta and not (

700

while revlog._generaldelta and not (

701

rev == nullrev or rev in tested or deltalength(rev)

701

rev == nullrev or rev in tested or deltalength(rev)

702

):

702

):

703

tested.add(rev)

703

tested.add(rev)

704

rev = deltaparent(rev)

704

rev = deltaparent(rev)

705

# no need to try a delta against nullrev, this will be done as a

705

# no need to try a delta against nullrev, this will be done as a

706

# last resort.

706

# last resort.

707

if rev == nullrev:

707

if rev == nullrev:

708

continue

708

continue

709

# filter out revision we tested already

709

# filter out revision we tested already

710

if rev in tested:

710

if rev in tested:

711

continue

711

continue

712

# an higher authority deamed the base unworthy (e.g. censored)

712

# an higher authority deamed the base unworthy (e.g. censored)

713

if excluded_bases is not None and rev in excluded_bases:

713

if excluded_bases is not None and rev in excluded_bases:

714

tested.add(rev)

714

tested.add(rev)

715

continue

715

continue

716

# We are in some recomputation cases and that rev is too high in

716

# We are in some recomputation cases and that rev is too high in

717

# the revlog

717

# the revlog

718

if target_rev is not None and rev >= target_rev:

718

if target_rev is not None and rev >= target_rev:

719

tested.add(rev)

719

tested.add(rev)

720

continue

720

continue

721

# filter out delta base that will never produce good delta

721

# filter out delta base that will never produce good delta

722

if deltas_limit < revlog.length(rev):

722

if deltas_limit < revlog.length(rev):

723

tested.add(rev)

723

tested.add(rev)

724

continue

724

continue

725

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

725

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

726

tested.add(rev)

726

tested.add(rev)

727

continue

727

continue

728

# no delta for rawtext-changing revs (see "candelta" for why)

728

# no delta for rawtext-changing revs (see "candelta" for why)

729

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

729

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

730

tested.add(rev)

730

tested.add(rev)

731

continue

731

continue

732

733

# If we reach here, we are about to build and test a delta.

733

# If we reach here, we are about to build and test a delta.

734

# The delta building process will compute the chaininfo in all

734

# The delta building process will compute the chaininfo in all

735

# case, since that computation is cached, it is fine to access it

735

# case, since that computation is cached, it is fine to access it

736

# here too.

736

# here too.

737

chainlen, chainsize = revlog._chaininfo(rev)

737

chainlen, chainsize = revlog._chaininfo(rev)

738

# if chain will be too long, skip base

738

# if chain will be too long, skip base

739

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

739

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

740

tested.add(rev)

740

tested.add(rev)

741

continue

741

continue

742

# if chain already have too much data, skip base

742

# if chain already have too much data, skip base

743

if deltas_limit < chainsize:

743

if deltas_limit < chainsize:

744

tested.add(rev)

744

tested.add(rev)

745

continue

745

continue

746

if sparse and revlog.upperboundcomp is not None:

746

if sparse and revlog.upperboundcomp is not None:

747

maxcomp = revlog.upperboundcomp

747

maxcomp = revlog.upperboundcomp

748

basenotsnap = (p1, p2, nullrev)

748

basenotsnap = (p1, p2, nullrev)

749

if rev not in basenotsnap and revlog.issnapshot(rev):

749

if rev not in basenotsnap and revlog.issnapshot(rev):

750

snapshotdepth = revlog.snapshotdepth(rev)

750

snapshotdepth = revlog.snapshotdepth(rev)

751

# If text is significantly larger than the base, we can

751

# If text is significantly larger than the base, we can

752

# expect the resulting delta to be proportional to the size

752

# expect the resulting delta to be proportional to the size

753

# difference

753

# difference

754

revsize = revlog.rawsize(rev)

754

revsize = revlog.rawsize(rev)

755

rawsizedistance = max(textlen - revsize, 0)

755

rawsizedistance = max(textlen - revsize, 0)

756

# use an estimate of the compression upper bound.

756

# use an estimate of the compression upper bound.

757

lowestrealisticdeltalen = rawsizedistance // maxcomp

757

lowestrealisticdeltalen = rawsizedistance // maxcomp

758

759

# check the absolute constraint on the delta size

759

# check the absolute constraint on the delta size

760

snapshotlimit = textlen >> snapshotdepth

760

snapshotlimit = textlen >> snapshotdepth

761

if snapshotlimit < lowestrealisticdeltalen:

761

if snapshotlimit < lowestrealisticdeltalen:

762

# delta lower bound is larger than accepted upper bound

762

# delta lower bound is larger than accepted upper bound

763

tested.add(rev)

763

tested.add(rev)

764

continue

764

continue

765

766

# check the relative constraint on the delta size

766

# check the relative constraint on the delta size

767

revlength = revlog.length(rev)

767

revlength = revlog.length(rev)

768

if revlength < lowestrealisticdeltalen:

768

if revlength < lowestrealisticdeltalen:

769

# delta probable lower bound is larger than target base

769

# delta probable lower bound is larger than target base

770

tested.add(rev)

770

tested.add(rev)

771

continue

771

continue

772

773

group.append(rev)

773

group.append(rev)

774

if group:

774

if group:

775

# When the size of the candidate group is big, it can result in a

775

# When the size of the candidate group is big, it can result in a

776

# quite significant performance impact. To reduce this, we can send

776

# quite significant performance impact. To reduce this, we can send

777

# them in smaller batches until the new batch does not provide any

777

# them in smaller batches until the new batch does not provide any

778

# improvements.

778

# improvements.

779

#

779

#

780

# This might reduce the overall efficiency of the compression in

780

# This might reduce the overall efficiency of the compression in

781

# some corner cases, but that should also prevent very pathological

781

# some corner cases, but that should also prevent very pathological

782

# cases from being an issue. (eg. 20 000 candidates).

782

# cases from being an issue. (eg. 20 000 candidates).

783

#

783

#

784

# XXX note that the ordering of the group becomes important as it

784

# XXX note that the ordering of the group becomes important as it

785

# now impacts the final result. The current order is unprocessed

785

# now impacts the final result. The current order is unprocessed

786

# and can be improved.

786

# and can be improved.

787

if group_chunk_size == 0:

787

if group_chunk_size == 0:

788

tested.update(group)

788

tested.update(group)

789

good = yield tuple(group)

789

good = yield tuple(group)

790

else:

790

else:

791

prev_good = good

791

prev_good = good

792

for start in range(0, len(group), group_chunk_size):

792

for start in range(0, len(group), group_chunk_size):

793

sub_group = group[start : start + group_chunk_size]

793

sub_group = group[start : start + group_chunk_size]

794

tested.update(sub_group)

794

tested.update(sub_group)

795

good = yield tuple(sub_group)

795

good = yield tuple(sub_group)

796

if prev_good == good:

796

if prev_good == good:

797

break

797

break

798

799

yield None

799

yield None

800

801

802

def _refinedgroups(revlog, p1, p2, cachedelta):

802

def _refinedgroups(revlog, p1, p2, cachedelta):

803

good = None

803

good = None

804

# First we try to reuse a the delta contained in the bundle.

804

# First we try to reuse a the delta contained in the bundle.

805

# (or from the source revlog)

805

# (or from the source revlog)

806

#

806

#

807

# This logic only applies to general delta repositories and can be disabled

807

# This logic only applies to general delta repositories and can be disabled

808

# through configuration. Disabling reuse source delta is useful when

808

# through configuration. Disabling reuse source delta is useful when

809

# we want to make sure we recomputed "optimal" deltas.

809

# we want to make sure we recomputed "optimal" deltas.

810

debug_info = None

810

debug_info = None

811

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

811

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

812

# Assume what we received from the server is a good choice

812

# Assume what we received from the server is a good choice

813

# build delta will reuse the cache

813

# build delta will reuse the cache

814

if debug_info is not None:

814

if debug_info is not None:

815

debug_info['cached-delta.tested'] += 1

815

debug_info['cached-delta.tested'] += 1

816

good = yield (cachedelta[0],)

816

good = yield (cachedelta[0],)

817

if good is not None:

817

if good is not None:

818

if debug_info is not None:

818

if debug_info is not None:

819

debug_info['cached-delta.accepted'] += 1

819

debug_info['cached-delta.accepted'] += 1

820

yield None

820

yield None

821

return

821

return

822

# XXX cache me higher

822

# XXX cache me higher

823

snapshot_cache = SnapshotCache()

823

snapshot_cache = SnapshotCache()

824

groups = _rawgroups(

824

groups = _rawgroups(

825

revlog,

825

revlog,

826

p1,

826

p1,

827

p2,

827

p2,

828

cachedelta,

828

cachedelta,

829

snapshot_cache,

829

snapshot_cache,

830

)

830

)

831

for candidates in groups:

831

for candidates in groups:

832

good = yield candidates

832

good = yield candidates

833

if good is not None:

833

if good is not None:

834

break

834

break

835

836

# If sparse revlog is enabled, we can try to refine the available deltas

836

# If sparse revlog is enabled, we can try to refine the available deltas

837

if not revlog._sparserevlog:

837

if not revlog._sparserevlog:

838

yield None

838

yield None

839

return

839

return

840

841

# if we have a refinable value, try to refine it

841

# if we have a refinable value, try to refine it

842

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

842

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

843

# refine snapshot down

843

# refine snapshot down

844

previous = None

844

previous = None

845

while previous != good:

845

while previous != good:

846

previous = good

846

previous = good

847

base = revlog.deltaparent(good)

847

base = revlog.deltaparent(good)

848

if base == nullrev:

848

if base == nullrev:

849

break

849

break

850

good = yield (base,)

850

good = yield (base,)

851

# refine snapshot up

851

# refine snapshot up

852

if not snapshot_cache.snapshots:

852

if not snapshot_cache.snapshots:

853

snapshot_cache.update(revlog, good + 1)

853

snapshot_cache.update(revlog, good + 1)

854

previous = None

854

previous = None

855

while good != previous:

855

while good != previous:

856

previous = good

856

previous = good

857

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

857

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

858

good = yield children

858

good = yield children

859

860

if debug_info is not None:

860

if debug_info is not None:

861

if good is None:

861

if good is None:

862

debug_info['no-solution'] += 1

862

debug_info['no-solution'] += 1

863

864

yield None

864

yield None

865

866

867

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

867

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

868

"""Provides group of revision to be tested as delta base

868

"""Provides group of revision to be tested as delta base

869

870

This lower level function focus on emitting delta theorically interresting

870

This lower level function focus on emitting delta theorically interresting

871

without looking it any practical details.

871

without looking it any practical details.

872

873

The group order aims at providing fast or small candidates first.

873

The group order aims at providing fast or small candidates first.

874

"""

874

"""

875

gdelta = revlog._generaldelta

875

gdelta = revlog._generaldelta

876

# gate sparse behind general-delta because of issue6056

876

# gate sparse behind general-delta because of issue6056

877

sparse = gdelta and revlog._sparserevlog

877

sparse = gdelta and revlog._sparserevlog

878

curr = len(revlog)

878

curr = len(revlog)

879

prev = curr - 1

879

prev = curr - 1

880

deltachain = lambda rev: revlog._deltachain(rev)[0]

880

deltachain = lambda rev: revlog._deltachain(rev)[0]

881

882

if gdelta:

882

if gdelta:

883

# exclude already lazy tested base if any

883

# exclude already lazy tested base if any

884

parents = [p for p in (p1, p2) if p != nullrev]

884

parents = [p for p in (p1, p2) if p != nullrev]

885

886

if not revlog._deltabothparents and len(parents) == 2:

886

if not revlog._deltabothparents and len(parents) == 2:

887

parents.sort()

887

parents.sort()

888

# To minimize the chance of having to build a fulltext,

888

# To minimize the chance of having to build a fulltext,

889

# pick first whichever parent is closest to us (max rev)

889

# pick first whichever parent is closest to us (max rev)

890

yield (parents[1],)

890

yield (parents[1],)

891

# then the other one (min rev) if the first did not fit

891

# then the other one (min rev) if the first did not fit

892

yield (parents[0],)

892

yield (parents[0],)

893

elif len(parents) > 0:

893

elif len(parents) > 0:

894

# Test all parents (1 or 2), and keep the best candidate

894

# Test all parents (1 or 2), and keep the best candidate

895

yield parents

895

yield parents

896

897

if sparse and parents:

897

if sparse and parents:

898

if snapshot_cache is None:

898

if snapshot_cache is None:

899

# map: base-rev: [snapshot-revs]

899

# map: base-rev: [snapshot-revs]

900

snapshot_cache = SnapshotCache()

900

snapshot_cache = SnapshotCache()

901

# See if we can use an existing snapshot in the parent chains to use as

901

# See if we can use an existing snapshot in the parent chains to use as

902

# a base for a new intermediate-snapshot

902

# a base for a new intermediate-snapshot

903

#

903

#

904

# search for snapshot in parents delta chain

904

# search for snapshot in parents delta chain

905

# map: snapshot-level: snapshot-rev

905

# map: snapshot-level: snapshot-rev

906

parents_snaps = collections.defaultdict(set)

906

parents_snaps = collections.defaultdict(set)

907

candidate_chains = [deltachain(p) for p in parents]

907

candidate_chains = [deltachain(p) for p in parents]

908

for chain in candidate_chains:

908

for chain in candidate_chains:

909

for idx, s in enumerate(chain):

909

for idx, s in enumerate(chain):

910

if not revlog.issnapshot(s):

910

if not revlog.issnapshot(s):

911

break

911

break

912

parents_snaps[idx].add(s)

912

parents_snaps[idx].add(s)

913

snapfloor = min(parents_snaps[0]) + 1

913

snapfloor = min(parents_snaps[0]) + 1

914

snapshot_cache.update(revlog, snapfloor)

914

snapshot_cache.update(revlog, snapfloor)

915

# search for the highest "unrelated" revision

915

# search for the highest "unrelated" revision

916

#

916

#

917

# Adding snapshots used by "unrelated" revision increase the odd we

917

# Adding snapshots used by "unrelated" revision increase the odd we

918

# reuse an independant, yet better snapshot chain.

918

# reuse an independant, yet better snapshot chain.

919

#

919

#

920

# XXX instead of building a set of revisions, we could lazily enumerate

920

# XXX instead of building a set of revisions, we could lazily enumerate

921

# over the chains. That would be more efficient, however we stick to

921

# over the chains. That would be more efficient, however we stick to

922

# simple code for now.

922

# simple code for now.

923

all_revs = set()

923

all_revs = set()

924

for chain in candidate_chains:

924

for chain in candidate_chains:

925

all_revs.update(chain)

925

all_revs.update(chain)

926

other = None

926

other = None

927

for r in revlog.revs(prev, snapfloor):

927

for r in revlog.revs(prev, snapfloor):

928

if r not in all_revs:

928

if r not in all_revs:

929

other = r

929

other = r

930

break

930

break

931

if other is not None:

931

if other is not None:

932

# To avoid unfair competition, we won't use unrelated intermediate

932

# To avoid unfair competition, we won't use unrelated intermediate

933

# snapshot that are deeper than the ones from the parent delta

933

# snapshot that are deeper than the ones from the parent delta

934

# chain.

934

# chain.

935

max_depth = max(parents_snaps.keys())

935

max_depth = max(parents_snaps.keys())

936

chain = deltachain(other)

936

chain = deltachain(other)

937

for depth, s in enumerate(chain):

937

for depth, s in enumerate(chain):

938

if s < snapfloor:

938

if s < snapfloor:

939

continue

939

continue

940

if max_depth < depth:

940

if max_depth < depth:

941

break

941

break

942

if not revlog.issnapshot(s):

942

if not revlog.issnapshot(s):

943

break

943

break

944

parents_snaps[depth].add(s)

944

parents_snaps[depth].add(s)

945

# Test them as possible intermediate snapshot base

945

# Test them as possible intermediate snapshot base

946

# We test them from highest to lowest level. High level one are more

946

# We test them from highest to lowest level. High level one are more

947

# likely to result in small delta

947

# likely to result in small delta

948

floor = None

948

floor = None

949

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

949

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

950

siblings = set()

950

siblings = set()

951

for s in snaps:

951

for s in snaps:

952

siblings.update(snapshot_cache.snapshots[s])

952

siblings.update(snapshot_cache.snapshots[s])

953

# Before considering making a new intermediate snapshot, we check

953

# Before considering making a new intermediate snapshot, we check

954

# if an existing snapshot, children of base we consider, would be

954

# if an existing snapshot, children of base we consider, would be

955

# suitable.

955

# suitable.

956

#

956

#

957

# It give a change to reuse a delta chain "unrelated" to the

957

# It give a change to reuse a delta chain "unrelated" to the

958

# current revision instead of starting our own. Without such

958

# current revision instead of starting our own. Without such

959

# re-use, topological branches would keep reopening new chains.

959

# re-use, topological branches would keep reopening new chains.

960

# Creating more and more snapshot as the repository grow.

960

# Creating more and more snapshot as the repository grow.

961

962

if floor is not None:

962

if floor is not None:

963

# We only do this for siblings created after the one in our

963

# We only do this for siblings created after the one in our

964

# parent's delta chain. Those created before has less chances

964

# parent's delta chain. Those created before has less chances

965

# to be valid base since our ancestors had to create a new

965

# to be valid base since our ancestors had to create a new

966

# snapshot.

966

# snapshot.

967

siblings = [r for r in siblings if floor < r]

967

siblings = [r for r in siblings if floor < r]

968

yield tuple(sorted(siblings))

968

yield tuple(sorted(siblings))

969

# then test the base from our parent's delta chain.

969

# then test the base from our parent's delta chain.

970

yield tuple(sorted(snaps))

970

yield tuple(sorted(snaps))

971

floor = min(snaps)

971

floor = min(snaps)

972

# No suitable base found in the parent chain, search if any full

972

# No suitable base found in the parent chain, search if any full

973

# snapshots emitted since parent's base would be a suitable base for an

973

# snapshots emitted since parent's base would be a suitable base for an

974

# intermediate snapshot.

974

# intermediate snapshot.

975

#

975

#

976

# It give a chance to reuse a delta chain unrelated to the current

976

# It give a chance to reuse a delta chain unrelated to the current

977

# revisions instead of starting our own. Without such re-use,

977

# revisions instead of starting our own. Without such re-use,

978

# topological branches would keep reopening new full chains. Creating

978

# topological branches would keep reopening new full chains. Creating

979

# more and more snapshot as the repository grow.

979

# more and more snapshot as the repository grow.

980

~~yield~~ ~~tuple~~(snapshot_cache.snapshots[nullrev])

980

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

981

yield tuple(sorted(full))

981

982

if not sparse:

983

if not sparse:

983

# other approach failed try against prev to hopefully save us a

984

# other approach failed try against prev to hopefully save us a

984

# fulltext.

985

# fulltext.

985

yield (prev,)

986

yield (prev,)

986

987

988

class SnapshotCache:

989

class SnapshotCache:

989

__slots__ = ('snapshots', '_start_rev', '_end_rev')

990

__slots__ = ('snapshots', '_start_rev', '_end_rev')

990

991

def __init__(self):

992

def __init__(self):

992

self.snapshots = collections.defaultdict(set)

993

self.snapshots = collections.defaultdict(set)

993

self._start_rev = None

994

self._start_rev = None

994

self._end_rev = None

995

self._end_rev = None

995

996

def update(self, revlog, start_rev=0):

997

def update(self, revlog, start_rev=0):

997

"""find snapshots from start_rev to tip"""

998

"""find snapshots from start_rev to tip"""

998

nb_revs = len(revlog)

999

nb_revs = len(revlog)

999

end_rev = nb_revs - 1

1000

end_rev = nb_revs - 1

1000

if start_rev > end_rev:

1001

if start_rev > end_rev:

1001

return # range is empty

1002

return # range is empty

1002

1003

if self._start_rev is None:

1004

if self._start_rev is None:

1004

assert self._end_rev is None

1005

assert self._end_rev is None

1005

self._update(revlog, start_rev, end_rev)

1006

self._update(revlog, start_rev, end_rev)

1006

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1007

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1007

if start_rev < self._start_rev:

1008

if start_rev < self._start_rev:

1008

self._update(revlog, start_rev, self._start_rev - 1)

1009

self._update(revlog, start_rev, self._start_rev - 1)

1009

if self._end_rev < end_rev:

1010

if self._end_rev < end_rev:

1010

self._update(revlog, self._end_rev + 1, end_rev)

1011

self._update(revlog, self._end_rev + 1, end_rev)

1011

1012

if self._start_rev is None:

1013

if self._start_rev is None:

1013

assert self._end_rev is None

1014

assert self._end_rev is None

1014

self._end_rev = end_rev

1015

self._end_rev = end_rev

1015

self._start_rev = start_rev

1016

self._start_rev = start_rev

1016

else:

1017

else:

1017

self._start_rev = min(self._start_rev, start_rev)

1018

self._start_rev = min(self._start_rev, start_rev)

1018

self._end_rev = max(self._end_rev, end_rev)

1019

self._end_rev = max(self._end_rev, end_rev)

1019

assert self._start_rev <= self._end_rev, (

1020

assert self._start_rev <= self._end_rev, (

1020

self._start_rev,

1021

self._start_rev,

1021

self._end_rev,

1022

self._end_rev,

1022

)

1023

)

1023

1024

def _update(self, revlog, start_rev, end_rev):

1025

def _update(self, revlog, start_rev, end_rev):

1025

"""internal method that actually do update content"""

1026

"""internal method that actually do update content"""

1026

assert self._start_rev is None or (

1027

assert self._start_rev is None or (

1027

start_rev < self._start_rev or start_rev > self._end_rev

1028

start_rev < self._start_rev or start_rev > self._end_rev

1028

), (self._start_rev, self._end_rev, start_rev, end_rev)

1029

), (self._start_rev, self._end_rev, start_rev, end_rev)

1029

assert self._start_rev is None or (

1030

assert self._start_rev is None or (

1030

end_rev < self._start_rev or end_rev > self._end_rev

1031

end_rev < self._start_rev or end_rev > self._end_rev

1031

), (self._start_rev, self._end_rev, start_rev, end_rev)

1032

), (self._start_rev, self._end_rev, start_rev, end_rev)

1032

cache = self.snapshots

1033

cache = self.snapshots

1033

if util.safehasattr(revlog.index, b'findsnapshots'):

1034

if util.safehasattr(revlog.index, b'findsnapshots'):

1034

revlog.index.findsnapshots(cache, start_rev, end_rev)

1035

revlog.index.findsnapshots(cache, start_rev, end_rev)

1035

else:

1036

else:

1036

deltaparent = revlog.deltaparent

1037

deltaparent = revlog.deltaparent

1037

issnapshot = revlog.issnapshot

1038

issnapshot = revlog.issnapshot

1038

for rev in revlog.revs(start_rev, end_rev):

1039

for rev in revlog.revs(start_rev, end_rev):

1039

if issnapshot(rev):

1040

if issnapshot(rev):

1040

cache[deltaparent(rev)].add(rev)

1041

cache[deltaparent(rev)].add(rev)

1041

1042

1043

class deltacomputer:

1044

class deltacomputer:

1044

def __init__(

1045

def __init__(

1045

self,

1046

self,

1046

revlog,

1047

revlog,

1047

write_debug=None,

1048

write_debug=None,

1048

debug_search=False,

1049

debug_search=False,

1049

debug_info=None,

1050

debug_info=None,

1050

):

1051

):

1051

self.revlog = revlog

1052

self.revlog = revlog

1052

self._write_debug = write_debug

1053

self._write_debug = write_debug

1053

self._debug_search = debug_search

1054

self._debug_search = debug_search

1054

self._debug_info = debug_info

1055

self._debug_info = debug_info

1055

1056

def buildtext(self, revinfo, fh):

1057

def buildtext(self, revinfo, fh):

1057

"""Builds a fulltext version of a revision

1058

"""Builds a fulltext version of a revision

1058

1059

revinfo: revisioninfo instance that contains all needed info

1060

revinfo: revisioninfo instance that contains all needed info

1060

fh: file handle to either the .i or the .d revlog file,

1061

fh: file handle to either the .i or the .d revlog file,

1061

depending on whether it is inlined or not

1062

depending on whether it is inlined or not

1062

"""

1063

"""

1063

btext = revinfo.btext

1064

btext = revinfo.btext

1064

if btext[0] is not None:

1065

if btext[0] is not None:

1065

return btext[0]

1066

return btext[0]

1066

1067

revlog = self.revlog

1068

revlog = self.revlog

1068

cachedelta = revinfo.cachedelta

1069

cachedelta = revinfo.cachedelta

1069

baserev = cachedelta[0]

1070

baserev = cachedelta[0]

1070

delta = cachedelta[1]

1071

delta = cachedelta[1]

1071

1072

fulltext = btext[0] = _textfromdelta(

1073

fulltext = btext[0] = _textfromdelta(

1073

fh,

1074

fh,

1074

revlog,

1075

revlog,

1075

baserev,

1076

baserev,

1076

delta,

1077

delta,

1077

revinfo.p1,

1078

revinfo.p1,

1078

revinfo.p2,

1079

revinfo.p2,

1079

revinfo.flags,

1080

revinfo.flags,

1080

revinfo.node,

1081

revinfo.node,

1081

)

1082

)

1082

return fulltext

1083

return fulltext

1083

1084

def _builddeltadiff(self, base, revinfo, fh):

1085

def _builddeltadiff(self, base, revinfo, fh):

1085

revlog = self.revlog

1086

revlog = self.revlog

1086

t = self.buildtext(revinfo, fh)

1087

t = self.buildtext(revinfo, fh)

1087

if revlog.iscensored(base):

1088

if revlog.iscensored(base):

1088

# deltas based on a censored revision must replace the

1089

# deltas based on a censored revision must replace the

1089

# full content in one patch, so delta works everywhere

1090

# full content in one patch, so delta works everywhere

1090

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1091

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1091

delta = header + t

1092

delta = header + t

1092

else:

1093

else:

1093

ptext = revlog.rawdata(base, _df=fh)

1094

ptext = revlog.rawdata(base, _df=fh)

1094

delta = mdiff.textdiff(ptext, t)

1095

delta = mdiff.textdiff(ptext, t)

1095

1096

return delta

1097

return delta

1097

1098

def _builddeltainfo(self, revinfo, base, fh):

1099

def _builddeltainfo(self, revinfo, base, fh):

1099

# can we use the cached delta?

1100

# can we use the cached delta?

1100

revlog = self.revlog

1101

revlog = self.revlog

1101

debug_search = self._write_debug is not None and self._debug_search

1102

debug_search = self._write_debug is not None and self._debug_search

1102

chainbase = revlog.chainbase(base)

1103

chainbase = revlog.chainbase(base)

1103

if revlog._generaldelta:

1104

if revlog._generaldelta:

1104

deltabase = base

1105

deltabase = base

1105

else:

1106

else:

1106

deltabase = chainbase

1107

deltabase = chainbase

1107

snapshotdepth = None

1108

snapshotdepth = None

1108

if revlog._sparserevlog and deltabase == nullrev:

1109

if revlog._sparserevlog and deltabase == nullrev:

1109

snapshotdepth = 0

1110

snapshotdepth = 0

1110

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1111

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1111

# A delta chain should always be one full snapshot,

1112

# A delta chain should always be one full snapshot,

1112

# zero or more semi-snapshots, and zero or more deltas

1113

# zero or more semi-snapshots, and zero or more deltas

1113

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1114

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1114

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1115

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1115

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1116

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1116

delta = None

1117

delta = None

1117

if revinfo.cachedelta:

1118

if revinfo.cachedelta:

1118

cachebase = revinfo.cachedelta[0]

1119

cachebase = revinfo.cachedelta[0]

1119

# check if the diff still apply

1120

# check if the diff still apply

1120

currentbase = cachebase

1121

currentbase = cachebase

1121

while (

1122

while (

1122

currentbase != nullrev

1123

currentbase != nullrev

1123

and currentbase != base

1124

and currentbase != base

1124

and self.revlog.length(currentbase) == 0

1125

and self.revlog.length(currentbase) == 0

1125

):

1126

):

1126

currentbase = self.revlog.deltaparent(currentbase)

1127

currentbase = self.revlog.deltaparent(currentbase)

1127

if self.revlog._lazydelta and currentbase == base:

1128

if self.revlog._lazydelta and currentbase == base:

1128

delta = revinfo.cachedelta[1]

1129

delta = revinfo.cachedelta[1]

1129

if delta is None:

1130

if delta is None:

1130

delta = self._builddeltadiff(base, revinfo, fh)

1131

delta = self._builddeltadiff(base, revinfo, fh)

1131

if debug_search:

1132

if debug_search:

1132

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1133

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1133

msg %= len(delta)

1134

msg %= len(delta)

1134

self._write_debug(msg)

1135

self._write_debug(msg)

1135

# snapshotdept need to be neither None nor 0 level snapshot

1136

# snapshotdept need to be neither None nor 0 level snapshot

1136

if revlog.upperboundcomp is not None and snapshotdepth:

1137

if revlog.upperboundcomp is not None and snapshotdepth:

1137

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1138

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1138

snapshotlimit = revinfo.textlen >> snapshotdepth

1139

snapshotlimit = revinfo.textlen >> snapshotdepth

1139

if debug_search:

1140

if debug_search:

1140

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1141

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1141

msg %= lowestrealisticdeltalen

1142

msg %= lowestrealisticdeltalen

1142

self._write_debug(msg)

1143

self._write_debug(msg)

1143

if snapshotlimit < lowestrealisticdeltalen:

1144

if snapshotlimit < lowestrealisticdeltalen:

1144

if debug_search:

1145

if debug_search:

1145

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1146

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1146

self._write_debug(msg)

1147

self._write_debug(msg)

1147

return None

1148

return None

1148

if revlog.length(base) < lowestrealisticdeltalen:

1149

if revlog.length(base) < lowestrealisticdeltalen:

1149

if debug_search:

1150

if debug_search:

1150

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1151

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1151

self._write_debug(msg)

1152

self._write_debug(msg)

1152

return None

1153

return None

1153

header, data = revlog.compress(delta)

1154

header, data = revlog.compress(delta)

1154

deltalen = len(header) + len(data)

1155

deltalen = len(header) + len(data)

1155

offset = revlog.end(len(revlog) - 1)

1156

offset = revlog.end(len(revlog) - 1)

1156

dist = deltalen + offset - revlog.start(chainbase)

1157

dist = deltalen + offset - revlog.start(chainbase)

1157

chainlen, compresseddeltalen = revlog._chaininfo(base)

1158

chainlen, compresseddeltalen = revlog._chaininfo(base)

1158

chainlen += 1

1159

chainlen += 1

1159

compresseddeltalen += deltalen

1160

compresseddeltalen += deltalen

1160

1161

return _deltainfo(

1162

return _deltainfo(

1162

dist,

1163

dist,

1163

deltalen,

1164

deltalen,

1164

(header, data),

1165

(header, data),

1165

deltabase,

1166

deltabase,

1166

chainbase,

1167

chainbase,

1167

chainlen,

1168

chainlen,

1168

compresseddeltalen,

1169

compresseddeltalen,

1169

snapshotdepth,

1170

snapshotdepth,

1170

)

1171

)

1171

1172

def _fullsnapshotinfo(self, fh, revinfo, curr):

1173

def _fullsnapshotinfo(self, fh, revinfo, curr):

1173

rawtext = self.buildtext(revinfo, fh)

1174

rawtext = self.buildtext(revinfo, fh)

1174

data = self.revlog.compress(rawtext)

1175

data = self.revlog.compress(rawtext)

1175

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1176

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1176

deltabase = chainbase = curr

1177

deltabase = chainbase = curr

1177

snapshotdepth = 0

1178

snapshotdepth = 0

1178

chainlen = 1

1179

chainlen = 1

1179

1180

return _deltainfo(

1181

return _deltainfo(

1181

dist,

1182

dist,

1182

deltalen,

1183

deltalen,

1183

data,

1184

data,

1184

deltabase,

1185

deltabase,

1185

chainbase,

1186

chainbase,

1186

chainlen,

1187

chainlen,

1187

compresseddeltalen,

1188

compresseddeltalen,

1188

snapshotdepth,

1189

snapshotdepth,

1189

)

1190

)

1190

1191

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1192

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1192

"""Find an acceptable delta against a candidate revision

1193

"""Find an acceptable delta against a candidate revision

1193

1194

revinfo: information about the revision (instance of _revisioninfo)

1195

revinfo: information about the revision (instance of _revisioninfo)

1195

fh: file handle to either the .i or the .d revlog file,

1196

fh: file handle to either the .i or the .d revlog file,

1196

depending on whether it is inlined or not

1197

depending on whether it is inlined or not

1197

1198

Returns the first acceptable candidate revision, as ordered by

1199

Returns the first acceptable candidate revision, as ordered by

1199

_candidategroups

1200

_candidategroups

1200

1201

If no suitable deltabase is found, we return delta info for a full

1202

If no suitable deltabase is found, we return delta info for a full

1202

snapshot.

1203

snapshot.

1203

1204

`excluded_bases` is an optional set of revision that cannot be used as

1205

`excluded_bases` is an optional set of revision that cannot be used as

1205

a delta base. Use this to recompute delta suitable in censor or strip

1206

a delta base. Use this to recompute delta suitable in censor or strip

1206

context.

1207

context.

1207

"""

1208

"""

1208

if target_rev is None:

1209

if target_rev is None:

1209

target_rev = len(self.revlog)

1210

target_rev = len(self.revlog)

1210

1211

if not revinfo.textlen:

1212

if not revinfo.textlen:

1212

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1213

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1213

1214

if excluded_bases is None:

1215

if excluded_bases is None:

1215

excluded_bases = set()

1216

excluded_bases = set()

1216

1217

# no delta for flag processor revision (see "candelta" for why)

1218

# no delta for flag processor revision (see "candelta" for why)

1218

# not calling candelta since only one revision needs test, also to

1219

# not calling candelta since only one revision needs test, also to

1219

# avoid overhead fetching flags again.

1220

# avoid overhead fetching flags again.

1220

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1221

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1221

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1222

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1222

1223

gather_debug = (

1224

gather_debug = (

1224

self._write_debug is not None or self._debug_info is not None

1225

self._write_debug is not None or self._debug_info is not None

1225

)

1226

)

1226

debug_search = self._write_debug is not None and self._debug_search

1227

debug_search = self._write_debug is not None and self._debug_search

1227

1228

if gather_debug:

1229

if gather_debug:

1229

start = util.timer()

1230

start = util.timer()

1230

1231

# count the number of different delta we tried (for debug purpose)

1232

# count the number of different delta we tried (for debug purpose)

1232

dbg_try_count = 0

1233

dbg_try_count = 0

1233

# count the number of "search round" we did. (for debug purpose)

1234

# count the number of "search round" we did. (for debug purpose)

1234

dbg_try_rounds = 0

1235

dbg_try_rounds = 0

1235

dbg_type = b'unknown'

1236

dbg_type = b'unknown'

1236

1237

cachedelta = revinfo.cachedelta

1238

cachedelta = revinfo.cachedelta

1238

p1 = revinfo.p1

1239

p1 = revinfo.p1

1239

p2 = revinfo.p2

1240

p2 = revinfo.p2

1240

revlog = self.revlog

1241

revlog = self.revlog

1241

1242

deltainfo = None

1243

deltainfo = None

1243

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1244

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1244

1245

if gather_debug:

1246

if gather_debug:

1246

if p1r != nullrev:

1247

if p1r != nullrev:

1247

p1_chain_len = revlog._chaininfo(p1r)[0]

1248

p1_chain_len = revlog._chaininfo(p1r)[0]

1248

else:

1249

else:

1249

p1_chain_len = -1

1250

p1_chain_len = -1

1250

if p2r != nullrev:

1251

if p2r != nullrev:

1251

p2_chain_len = revlog._chaininfo(p2r)[0]

1252

p2_chain_len = revlog._chaininfo(p2r)[0]

1252

else:

1253

else:

1253

p2_chain_len = -1

1254

p2_chain_len = -1

1254

if debug_search:

1255

if debug_search:

1255

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1256

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1256

msg %= target_rev

1257

msg %= target_rev

1257

self._write_debug(msg)

1258

self._write_debug(msg)

1258

1259

groups = _candidategroups(

1260

groups = _candidategroups(

1260

self.revlog,

1261

self.revlog,

1261

revinfo.textlen,

1262

revinfo.textlen,

1262

p1r,

1263

p1r,

1263

p2r,

1264

p2r,

1264

cachedelta,

1265

cachedelta,

1265

excluded_bases,

1266

excluded_bases,

1266

target_rev,

1267

target_rev,

1267

)

1268

)

1268

candidaterevs = next(groups)

1269

candidaterevs = next(groups)

1269

while candidaterevs is not None:

1270

while candidaterevs is not None:

1270

dbg_try_rounds += 1

1271

dbg_try_rounds += 1

1271

if debug_search:

1272

if debug_search:

1272

prev = None

1273

prev = None

1273

if deltainfo is not None:

1274

if deltainfo is not None:

1274

prev = deltainfo.base

1275

prev = deltainfo.base

1275

1276

if (

1277

if (

1277

cachedelta is not None

1278

cachedelta is not None

1278

and len(candidaterevs) == 1

1279

and len(candidaterevs) == 1

1279

and cachedelta[0] in candidaterevs

1280

and cachedelta[0] in candidaterevs

1280

):

1281

):

1281

round_type = b"cached-delta"

1282

round_type = b"cached-delta"

1282

elif p1 in candidaterevs or p2 in candidaterevs:

1283

elif p1 in candidaterevs or p2 in candidaterevs:

1283

round_type = b"parents"

1284

round_type = b"parents"

1284

elif prev is not None and all(c < prev for c in candidaterevs):

1285

elif prev is not None and all(c < prev for c in candidaterevs):

1285

round_type = b"refine-down"

1286

round_type = b"refine-down"

1286

elif prev is not None and all(c > prev for c in candidaterevs):

1287

elif prev is not None and all(c > prev for c in candidaterevs):

1287

round_type = b"refine-up"

1288

round_type = b"refine-up"

1288

else:

1289

else:

1289

round_type = b"search-down"

1290

round_type = b"search-down"

1290

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1291

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1291

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1292

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1292

self._write_debug(msg)

1293

self._write_debug(msg)

1293

nominateddeltas = []

1294

nominateddeltas = []

1294

if deltainfo is not None:

1295

if deltainfo is not None:

1295

if debug_search:

1296

if debug_search:

1296

msg = (

1297

msg = (

1297

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1298

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1298

)

1299

)

1299

msg %= (deltainfo.base, deltainfo.deltalen)

1300

msg %= (deltainfo.base, deltainfo.deltalen)

1300

self._write_debug(msg)

1301

self._write_debug(msg)

1301

# if we already found a good delta,

1302

# if we already found a good delta,

1302

# challenge it against refined candidates

1303

# challenge it against refined candidates

1303

nominateddeltas.append(deltainfo)

1304

nominateddeltas.append(deltainfo)

1304

for candidaterev in candidaterevs:

1305

for candidaterev in candidaterevs:

1305

if debug_search:

1306

if debug_search:

1306

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1307

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1307

msg %= candidaterev

1308

msg %= candidaterev

1308

self._write_debug(msg)

1309

self._write_debug(msg)

1309

candidate_type = None

1310

candidate_type = None

1310

if candidaterev == p1:

1311

if candidaterev == p1:

1311

candidate_type = b"p1"

1312

candidate_type = b"p1"

1312

elif candidaterev == p2:

1313

elif candidaterev == p2:

1313

candidate_type = b"p2"

1314

candidate_type = b"p2"

1314

elif self.revlog.issnapshot(candidaterev):

1315

elif self.revlog.issnapshot(candidaterev):

1315

candidate_type = b"snapshot-%d"

1316

candidate_type = b"snapshot-%d"

1316

candidate_type %= self.revlog.snapshotdepth(

1317

candidate_type %= self.revlog.snapshotdepth(

1317

candidaterev

1318

candidaterev

1318

)

1319

)

1319

1320

if candidate_type is not None:

1321

if candidate_type is not None:

1321

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1322

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1322

msg %= candidate_type

1323

msg %= candidate_type

1323

self._write_debug(msg)

1324

self._write_debug(msg)

1324

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1325

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1325

msg %= self.revlog.length(candidaterev)

1326

msg %= self.revlog.length(candidaterev)

1326

self._write_debug(msg)

1327

self._write_debug(msg)

1327

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1328

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1328

msg %= self.revlog.deltaparent(candidaterev)

1329

msg %= self.revlog.deltaparent(candidaterev)

1329

self._write_debug(msg)

1330

self._write_debug(msg)

1330

1331

dbg_try_count += 1

1332

dbg_try_count += 1

1332

1333

if debug_search:

1334

if debug_search:

1334

delta_start = util.timer()

1335

delta_start = util.timer()

1335

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1336

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1336

if debug_search:

1337

if debug_search:

1337

delta_end = util.timer()

1338

delta_end = util.timer()

1338

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1339

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1339

msg %= delta_end - delta_start

1340

msg %= delta_end - delta_start

1340

self._write_debug(msg)

1341

self._write_debug(msg)

1341

if candidatedelta is not None:

1342

if candidatedelta is not None:

1342

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1343

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1343

if debug_search:

1344

if debug_search:

1344

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1345

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1345

msg %= candidatedelta.deltalen

1346

msg %= candidatedelta.deltalen

1346

self._write_debug(msg)

1347

self._write_debug(msg)

1347

nominateddeltas.append(candidatedelta)

1348

nominateddeltas.append(candidatedelta)

1348

elif debug_search:

1349

elif debug_search:

1349

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1350

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1350

msg %= candidatedelta.deltalen

1351

msg %= candidatedelta.deltalen

1351

self._write_debug(msg)

1352

self._write_debug(msg)

1352

elif debug_search:

1353

elif debug_search:

1353

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1354

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1354

self._write_debug(msg)

1355

self._write_debug(msg)

1355

if nominateddeltas:

1356

if nominateddeltas:

1356

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1357

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1357

if deltainfo is not None:

1358

if deltainfo is not None:

1358

candidaterevs = groups.send(deltainfo.base)

1359

candidaterevs = groups.send(deltainfo.base)

1359

else:

1360

else:

1360

candidaterevs = next(groups)

1361

candidaterevs = next(groups)

1361

1362

if deltainfo is None:

1363

if deltainfo is None:

1363

dbg_type = b"full"

1364

dbg_type = b"full"

1364

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1365

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1365

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1366

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1366

dbg_type = b"snapshot"

1367

dbg_type = b"snapshot"

1367

else:

1368

else:

1368

dbg_type = b"delta"

1369

dbg_type = b"delta"

1369

1370

if gather_debug:

1371

if gather_debug:

1371

end = util.timer()

1372

end = util.timer()

1372

used_cached = (

1373

used_cached = (

1373

cachedelta is not None

1374

cachedelta is not None

1374

and dbg_try_rounds == 1

1375

and dbg_try_rounds == 1

1375

and dbg_try_count == 1

1376

and dbg_try_count == 1

1376

and deltainfo.base == cachedelta[0]

1377

and deltainfo.base == cachedelta[0]

1377

)

1378

)

1378

dbg = {

1379

dbg = {

1379

'duration': end - start,

1380

'duration': end - start,

1380

'revision': target_rev,

1381

'revision': target_rev,

1381

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1382

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1382

'search_round_count': dbg_try_rounds,

1383

'search_round_count': dbg_try_rounds,

1383

'using-cached-base': used_cached,

1384

'using-cached-base': used_cached,

1384

'delta_try_count': dbg_try_count,

1385

'delta_try_count': dbg_try_count,

1385

'type': dbg_type,

1386

'type': dbg_type,

1386

'p1-chain-len': p1_chain_len,

1387

'p1-chain-len': p1_chain_len,

1387

'p2-chain-len': p2_chain_len,

1388

'p2-chain-len': p2_chain_len,

1388

}

1389

}

1389

if (

1390

if (

1390

deltainfo.snapshotdepth # pytype: disable=attribute-error

1391

deltainfo.snapshotdepth # pytype: disable=attribute-error

1391

is not None

1392

is not None

1392

):

1393

):

1393

dbg[

1394

dbg[

1394

'snapshot-depth'

1395

'snapshot-depth'

1395

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1396

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1396

else:

1397

else:

1397

dbg['snapshot-depth'] = 0

1398

dbg['snapshot-depth'] = 0

1398

target_revlog = b"UNKNOWN"

1399

target_revlog = b"UNKNOWN"

1399

target_type = self.revlog.target[0]

1400

target_type = self.revlog.target[0]

1400

target_key = self.revlog.target[1]

1401

target_key = self.revlog.target[1]

1401

if target_type == KIND_CHANGELOG:

1402

if target_type == KIND_CHANGELOG:

1402

target_revlog = b'CHANGELOG:'

1403

target_revlog = b'CHANGELOG:'

1403

elif target_type == KIND_MANIFESTLOG:

1404

elif target_type == KIND_MANIFESTLOG:

1404

target_revlog = b'MANIFESTLOG:'

1405

target_revlog = b'MANIFESTLOG:'

1405

if target_key:

1406

if target_key:

1406

target_revlog += b'%s:' % target_key

1407

target_revlog += b'%s:' % target_key

1407

elif target_type == KIND_FILELOG:

1408

elif target_type == KIND_FILELOG:

1408

target_revlog = b'FILELOG:'

1409

target_revlog = b'FILELOG:'

1409

if target_key:

1410

if target_key:

1410

target_revlog += b'%s:' % target_key

1411

target_revlog += b'%s:' % target_key

1411

dbg['target-revlog'] = target_revlog

1412

dbg['target-revlog'] = target_revlog

1412

1413

if self._debug_info is not None:

1414

if self._debug_info is not None:

1414

self._debug_info.append(dbg)

1415

self._debug_info.append(dbg)

1415

1416

if self._write_debug is not None:

1417

if self._write_debug is not None:

1417

msg = (

1418

msg = (

1418

b"DBG-DELTAS:"

1419

b"DBG-DELTAS:"

1419

b" %-12s"

1420

b" %-12s"

1420

b" rev=%d:"

1421

b" rev=%d:"

1421

b" delta-base=%d"

1422

b" delta-base=%d"

1422

b" is-cached=%d"

1423

b" is-cached=%d"

1423

b" - search-rounds=%d"

1424

b" - search-rounds=%d"

1424

b" try-count=%d"

1425

b" try-count=%d"

1425

b" - delta-type=%-6s"

1426

b" - delta-type=%-6s"

1426

b" snap-depth=%d"

1427

b" snap-depth=%d"

1427

b" - p1-chain-length=%d"

1428

b" - p1-chain-length=%d"

1428

b" p2-chain-length=%d"

1429

b" p2-chain-length=%d"

1429

b" - duration=%f"

1430

b" - duration=%f"

1430

b"\n"

1431

b"\n"

1431

)

1432

)

1432

msg %= (

1433

msg %= (

1433

dbg["target-revlog"],

1434

dbg["target-revlog"],

1434

dbg["revision"],

1435

dbg["revision"],

1435

dbg["delta-base"],

1436

dbg["delta-base"],

1436

dbg["using-cached-base"],

1437

dbg["using-cached-base"],

1437

dbg["search_round_count"],

1438

dbg["search_round_count"],

1438

dbg["delta_try_count"],

1439

dbg["delta_try_count"],

1439

dbg["type"],

1440

dbg["type"],

1440

dbg["snapshot-depth"],

1441

dbg["snapshot-depth"],

1441

dbg["p1-chain-len"],

1442

dbg["p1-chain-len"],

1442

dbg["p2-chain-len"],

1443

dbg["p2-chain-len"],

1443

dbg["duration"],

1444

dbg["duration"],

1444

)

1445

)

1445

self._write_debug(msg)

1446

self._write_debug(msg)

1446

return deltainfo

1447

return deltainfo

1447

1448

1449

def delta_compression(default_compression_header, deltainfo):

1450

def delta_compression(default_compression_header, deltainfo):

1450

"""return (COMPRESSION_MODE, deltainfo)

1451

"""return (COMPRESSION_MODE, deltainfo)

1451

1452

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1453

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1453

compression.

1454

compression.

1454

"""

1455

"""

1455

h, d = deltainfo.data

1456

h, d = deltainfo.data

1456

compression_mode = COMP_MODE_INLINE

1457

compression_mode = COMP_MODE_INLINE

1457

if not h and not d:

1458

if not h and not d:

1458

# not data to store at all... declare them uncompressed

1459

# not data to store at all... declare them uncompressed

1459

compression_mode = COMP_MODE_PLAIN

1460

compression_mode = COMP_MODE_PLAIN

1460

elif not h:

1461

elif not h:

1461

t = d[0:1]

1462

t = d[0:1]

1462

if t == b'\0':

1463

if t == b'\0':

1463

compression_mode = COMP_MODE_PLAIN

1464

compression_mode = COMP_MODE_PLAIN

1464

elif t == default_compression_header:

1465

elif t == default_compression_header:

1465

compression_mode = COMP_MODE_DEFAULT

1466

compression_mode = COMP_MODE_DEFAULT

1466

elif h == b'u':

1467

elif h == b'u':

1467

# we have a more efficient way to declare uncompressed

1468

# we have a more efficient way to declare uncompressed

1468

h = b''

1469

h = b''

1469

compression_mode = COMP_MODE_PLAIN

1470

compression_mode = COMP_MODE_PLAIN

1470

deltainfo = drop_u_compression(deltainfo)

1471

deltainfo = drop_u_compression(deltainfo)

1471

return compression_mode, deltainfo

1472

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 group_chunk_size = revlog._candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 # XXX cache me higher
                 snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
-                    yield tuple(snapshot_cache.snapshots[nullrev])
+                    full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
+                    yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if util.safehasattr(revlog.index, b'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         used_cached = (
                             cachedelta is not None
                             and dbg_try_rounds == 1
                             and dbg_try_count == 1
                             and deltainfo.base == cachedelta[0]
                         )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo