upstream/mercurial-mirror Commit - r50677:acdb9a15

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_FORCE,

24

DELTA_BASE_REUSE_NO,

24

DELTA_BASE_REUSE_NO,

25

KIND_CHANGELOG,

25

KIND_CHANGELOG,

26

KIND_FILELOG,

26

KIND_FILELOG,

27

KIND_MANIFESTLOG,

27

KIND_MANIFESTLOG,

28

REVIDX_ISCENSORED,

28

REVIDX_ISCENSORED,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

30

)

30

)

31

32

from ..thirdparty import attr

32

from ..thirdparty import attr

33

34

from .. import (

34

from .. import (

35

error,

35

error,

36

mdiff,

36

mdiff,

37

util,

37

util,

38

)

38

)

39

40

from . import flagutil

40

from . import flagutil

41

42

# maximum <delta-chain-data>/<revision-text-length> ratio

42

# maximum <delta-chain-data>/<revision-text-length> ratio

43

LIMIT_DELTA2TEXT = 2

43

LIMIT_DELTA2TEXT = 2

44

45

46

class _testrevlog:

46

class _testrevlog:

47

"""minimalist fake revlog to use in doctests"""

47

"""minimalist fake revlog to use in doctests"""

48

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

50

"""data is an list of revision payload boundaries"""

50

"""data is an list of revision payload boundaries"""

51

self._data = data

51

self._data = data

52

self._srdensitythreshold = density

52

self._srdensitythreshold = density

53

self._srmingapsize = mingap

53

self._srmingapsize = mingap

54

self._snapshot = set(snapshot)

54

self._snapshot = set(snapshot)

55

self.index = None

55

self.index = None

56

57

def start(self, rev):

57

def start(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

if rev == 0:

60

if rev == 0:

61

return 0

61

return 0

62

return self._data[rev - 1]

62

return self._data[rev - 1]

63

64

def end(self, rev):

64

def end(self, rev):

65

if rev == nullrev:

65

if rev == nullrev:

66

return 0

66

return 0

67

return self._data[rev]

67

return self._data[rev]

68

69

def length(self, rev):

69

def length(self, rev):

70

return self.end(rev) - self.start(rev)

70

return self.end(rev) - self.start(rev)

71

72

def __len__(self):

72

def __len__(self):

73

return len(self._data)

73

return len(self._data)

74

75

def issnapshot(self, rev):

75

def issnapshot(self, rev):

76

if rev == nullrev:

76

if rev == nullrev:

77

return True

77

return True

78

return rev in self._snapshot

78

return rev in self._snapshot

79

80

81

def slicechunk(revlog, revs, targetsize=None):

81

def slicechunk(revlog, revs, targetsize=None):

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

83

84

``revs`` is sliced into groups that should be read in one time.

84

``revs`` is sliced into groups that should be read in one time.

85

Assume that revs are sorted.

85

Assume that revs are sorted.

86

87

The initial chunk is sliced until the overall density (payload/chunks-span

87

The initial chunk is sliced until the overall density (payload/chunks-span

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

89

`revlog._srmingapsize` is skipped.

89

`revlog._srmingapsize` is skipped.

90

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

92

For consistency with other slicing choice, this limit won't go lower than

92

For consistency with other slicing choice, this limit won't go lower than

93

`revlog._srmingapsize`.

93

`revlog._srmingapsize`.

94

95

If individual revisions chunk are larger than this limit, they will still

95

If individual revisions chunk are larger than this limit, they will still

96

be raised individually.

96

be raised individually.

97

98

>>> data = [

98

>>> data = [

99

... 5, #00 (5)

99

... 5, #00 (5)

100

... 10, #01 (5)

100

... 10, #01 (5)

101

... 12, #02 (2)

101

... 12, #02 (2)

102

... 12, #03 (empty)

102

... 12, #03 (empty)

103

... 27, #04 (15)

103

... 27, #04 (15)

104

... 31, #05 (4)

104

... 31, #05 (4)

105

... 31, #06 (empty)

105

... 31, #06 (empty)

106

... 42, #07 (11)

106

... 42, #07 (11)

107

... 47, #08 (5)

107

... 47, #08 (5)

108

... 47, #09 (empty)

108

... 47, #09 (empty)

109

... 48, #10 (1)

109

... 48, #10 (1)

110

... 51, #11 (3)

110

... 51, #11 (3)

111

... 74, #12 (23)

111

... 74, #12 (23)

112

... 85, #13 (11)

112

... 85, #13 (11)

113

... 86, #14 (1)

113

... 86, #14 (1)

114

... 91, #15 (5)

114

... 91, #15 (5)

115

... ]

115

... ]

116

>>> revlog = _testrevlog(data, snapshot=range(16))

116

>>> revlog = _testrevlog(data, snapshot=range(16))

117

118

>>> list(slicechunk(revlog, list(range(16))))

118

>>> list(slicechunk(revlog, list(range(16))))

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

120

>>> list(slicechunk(revlog, [0, 15]))

120

>>> list(slicechunk(revlog, [0, 15]))

121

[[0], [15]]

121

[[0], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 15]))

123

[[0], [11], [15]]

123

[[0], [11], [15]]

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

125

[[0], [11, 13, 15]]

125

[[0], [11, 13, 15]]

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

127

[[1, 2], [5, 8, 10, 11], [14]]

127

[[1, 2], [5, 8, 10, 11], [14]]

128

129

Slicing with a maximum chunk size

129

Slicing with a maximum chunk size

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

131

[[0], [11], [13], [15]]

131

[[0], [11], [13], [15]]

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

133

[[0], [11], [13, 15]]

133

[[0], [11], [13, 15]]

134

135

Slicing involving nullrev

135

Slicing involving nullrev

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

137

[[-1, 0], [11], [13, 15]]

137

[[-1, 0], [11], [13, 15]]

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

139

[[-1], [13], [15]]

139

[[-1], [13], [15]]

140

"""

140

"""

141

if targetsize is not None:

141

if targetsize is not None:

142

targetsize = max(targetsize, revlog._srmingapsize)

142

targetsize = max(targetsize, revlog._srmingapsize)

143

# targetsize should not be specified when evaluating delta candidates:

143

# targetsize should not be specified when evaluating delta candidates:

144

# * targetsize is used to ensure we stay within specification when reading,

144

# * targetsize is used to ensure we stay within specification when reading,

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

146

if densityslicing is None:

146

if densityslicing is None:

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

148

for chunk in densityslicing(

148

for chunk in densityslicing(

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

150

):

150

):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

152

yield subchunk

152

yield subchunk

153

154

155

def _slicechunktosize(revlog, revs, targetsize=None):

155

def _slicechunktosize(revlog, revs, targetsize=None):

156

"""slice revs to match the target size

156

"""slice revs to match the target size

157

158

This is intended to be used on chunk that density slicing selected by that

158

This is intended to be used on chunk that density slicing selected by that

159

are still too large compared to the read garantee of revlog. This might

159

are still too large compared to the read garantee of revlog. This might

160

happens when "minimal gap size" interrupted the slicing or when chain are

160

happens when "minimal gap size" interrupted the slicing or when chain are

161

built in a way that create large blocks next to each other.

161

built in a way that create large blocks next to each other.

162

163

>>> data = [

163

>>> data = [

164

... 3, #0 (3)

164

... 3, #0 (3)

165

... 5, #1 (2)

165

... 5, #1 (2)

166

... 6, #2 (1)

166

... 6, #2 (1)

167

... 8, #3 (2)

167

... 8, #3 (2)

168

... 8, #4 (empty)

168

... 8, #4 (empty)

169

... 11, #5 (3)

169

... 11, #5 (3)

170

... 12, #6 (1)

170

... 12, #6 (1)

171

... 13, #7 (1)

171

... 13, #7 (1)

172

... 14, #8 (1)

172

... 14, #8 (1)

173

... ]

173

... ]

174

175

== All snapshots cases ==

175

== All snapshots cases ==

176

>>> revlog = _testrevlog(data, snapshot=range(9))

176

>>> revlog = _testrevlog(data, snapshot=range(9))

177

178

Cases where chunk is already small enough

178

Cases where chunk is already small enough

179

>>> list(_slicechunktosize(revlog, [0], 3))

179

>>> list(_slicechunktosize(revlog, [0], 3))

180

[[0]]

180

[[0]]

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

182

[[6, 7]]

182

[[6, 7]]

183

>>> list(_slicechunktosize(revlog, [0], None))

183

>>> list(_slicechunktosize(revlog, [0], None))

184

[[0]]

184

[[0]]

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

186

[[6, 7]]

186

[[6, 7]]

187

188

cases where we need actual slicing

188

cases where we need actual slicing

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

190

[[0], [1]]

190

[[0], [1]]

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

192

[[1], [3]]

192

[[1], [3]]

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

194

[[1, 2], [3]]

194

[[1, 2], [3]]

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

198

[[3], [5]]

198

[[3], [5]]

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

200

[[5], [6, 7, 8]]

200

[[5], [6, 7, 8]]

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

203

204

Case with too large individual chunk (must return valid chunk)

204

Case with too large individual chunk (must return valid chunk)

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

206

[[0], [1]]

206

[[0], [1]]

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

208

[[1], [3]]

208

[[1], [3]]

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

210

[[3], [5]]

210

[[3], [5]]

211

212

== No Snapshot cases ==

212

== No Snapshot cases ==

213

>>> revlog = _testrevlog(data)

213

>>> revlog = _testrevlog(data)

214

215

Cases where chunk is already small enough

215

Cases where chunk is already small enough

216

>>> list(_slicechunktosize(revlog, [0], 3))

216

>>> list(_slicechunktosize(revlog, [0], 3))

217

[[0]]

217

[[0]]

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

219

[[6, 7]]

219

[[6, 7]]

220

>>> list(_slicechunktosize(revlog, [0], None))

220

>>> list(_slicechunktosize(revlog, [0], None))

221

[[0]]

221

[[0]]

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

223

[[6, 7]]

223

[[6, 7]]

224

225

cases where we need actual slicing

225

cases where we need actual slicing

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

227

[[0], [1]]

227

[[0], [1]]

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

229

[[1], [3]]

229

[[1], [3]]

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

231

[[1], [2, 3]]

231

[[1], [2, 3]]

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

233

[[3], [5]]

233

[[3], [5]]

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

235

[[3], [4, 5]]

235

[[3], [4, 5]]

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

237

[[5], [6, 7, 8]]

237

[[5], [6, 7, 8]]

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

240

241

Case with too large individual chunk (must return valid chunk)

241

Case with too large individual chunk (must return valid chunk)

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

243

[[0], [1]]

243

[[0], [1]]

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

245

[[1], [3]]

245

[[1], [3]]

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

247

[[3], [5]]

247

[[3], [5]]

248

249

== mixed case ==

249

== mixed case ==

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

253

"""

253

"""

254

assert targetsize is None or 0 <= targetsize

254

assert targetsize is None or 0 <= targetsize

255

startdata = revlog.start(revs[0])

255

startdata = revlog.start(revs[0])

256

enddata = revlog.end(revs[-1])

256

enddata = revlog.end(revs[-1])

257

fullspan = enddata - startdata

257

fullspan = enddata - startdata

258

if targetsize is None or fullspan <= targetsize:

258

if targetsize is None or fullspan <= targetsize:

259

yield revs

259

yield revs

260

return

260

return

261

262

startrevidx = 0

262

startrevidx = 0

263

endrevidx = 1

263

endrevidx = 1

264

iterrevs = enumerate(revs)

264

iterrevs = enumerate(revs)

265

next(iterrevs) # skip first rev.

265

next(iterrevs) # skip first rev.

266

# first step: get snapshots out of the way

266

# first step: get snapshots out of the way

267

for idx, r in iterrevs:

267

for idx, r in iterrevs:

268

span = revlog.end(r) - startdata

268

span = revlog.end(r) - startdata

269

snapshot = revlog.issnapshot(r)

269

snapshot = revlog.issnapshot(r)

270

if span <= targetsize and snapshot:

270

if span <= targetsize and snapshot:

271

endrevidx = idx + 1

271

endrevidx = idx + 1

272

else:

272

else:

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

274

if chunk:

274

if chunk:

275

yield chunk

275

yield chunk

276

startrevidx = idx

276

startrevidx = idx

277

startdata = revlog.start(r)

277

startdata = revlog.start(r)

278

endrevidx = idx + 1

278

endrevidx = idx + 1

279

if not snapshot:

279

if not snapshot:

280

break

280

break

281

282

# for the others, we use binary slicing to quickly converge toward valid

282

# for the others, we use binary slicing to quickly converge toward valid

283

# chunks (otherwise, we might end up looking for start/end of many

283

# chunks (otherwise, we might end up looking for start/end of many

284

# revisions). This logic is not looking for the perfect slicing point, it

284

# revisions). This logic is not looking for the perfect slicing point, it

285

# focuses on quickly converging toward valid chunks.

285

# focuses on quickly converging toward valid chunks.

286

nbitem = len(revs)

286

nbitem = len(revs)

287

while (enddata - startdata) > targetsize:

287

while (enddata - startdata) > targetsize:

288

endrevidx = nbitem

288

endrevidx = nbitem

289

if nbitem - startrevidx <= 1:

289

if nbitem - startrevidx <= 1:

290

break # protect against individual chunk larger than limit

290

break # protect against individual chunk larger than limit

291

localenddata = revlog.end(revs[endrevidx - 1])

291

localenddata = revlog.end(revs[endrevidx - 1])

292

span = localenddata - startdata

292

span = localenddata - startdata

293

while span > targetsize:

293

while span > targetsize:

294

if endrevidx - startrevidx <= 1:

294

if endrevidx - startrevidx <= 1:

295

break # protect against individual chunk larger than limit

295

break # protect against individual chunk larger than limit

296

endrevidx -= (endrevidx - startrevidx) // 2

296

endrevidx -= (endrevidx - startrevidx) // 2

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

300

if chunk:

300

if chunk:

301

yield chunk

301

yield chunk

302

startrevidx = endrevidx

302

startrevidx = endrevidx

303

startdata = revlog.start(revs[startrevidx])

303

startdata = revlog.start(revs[startrevidx])

304

305

chunk = _trimchunk(revlog, revs, startrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

309

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

313

``revs`` is sliced into groups that should be read in one time.

313

``revs`` is sliced into groups that should be read in one time.

314

Assume that revs are sorted.

314

Assume that revs are sorted.

315

316

The initial chunk is sliced until the overall density (payload/chunks-span

316

The initial chunk is sliced until the overall density (payload/chunks-span

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

318

skipped.

318

skipped.

319

320

>>> revlog = _testrevlog([

320

>>> revlog = _testrevlog([

321

... 5, #00 (5)

321

... 5, #00 (5)

322

... 10, #01 (5)

322

... 10, #01 (5)

323

... 12, #02 (2)

323

... 12, #02 (2)

324

... 12, #03 (empty)

324

... 12, #03 (empty)

325

... 27, #04 (15)

325

... 27, #04 (15)

326

... 31, #05 (4)

326

... 31, #05 (4)

327

... 31, #06 (empty)

327

... 31, #06 (empty)

328

... 42, #07 (11)

328

... 42, #07 (11)

329

... 47, #08 (5)

329

... 47, #08 (5)

330

... 47, #09 (empty)

330

... 47, #09 (empty)

331

... 48, #10 (1)

331

... 48, #10 (1)

332

... 51, #11 (3)

332

... 51, #11 (3)

333

... 74, #12 (23)

333

... 74, #12 (23)

334

... 85, #13 (11)

334

... 85, #13 (11)

335

... 86, #14 (1)

335

... 86, #14 (1)

336

... 91, #15 (5)

336

... 91, #15 (5)

337

... ])

337

... ])

338

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

342

[[0], [15]]

342

[[0], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

344

[[0], [11], [15]]

344

[[0], [11], [15]]

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

346

[[0], [11, 13, 15]]

346

[[0], [11, 13, 15]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

348

[[1, 2], [5, 8, 10, 11], [14]]

348

[[1, 2], [5, 8, 10, 11], [14]]

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

... mingapsize=20))

350

... mingapsize=20))

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

... targetdensity=0.95))

353

... targetdensity=0.95))

354

[[1, 2], [5], [8, 10, 11], [14]]

354

[[1, 2], [5], [8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... targetdensity=0.95, mingapsize=12))

356

... targetdensity=0.95, mingapsize=12))

357

[[1, 2], [5, 8, 10, 11], [14]]

357

[[1, 2], [5, 8, 10, 11], [14]]

358

"""

358

"""

359

start = revlog.start

359

start = revlog.start

360

length = revlog.length

360

length = revlog.length

361

362

if len(revs) <= 1:

362

if len(revs) <= 1:

363

yield revs

363

yield revs

364

return

364

return

365

366

deltachainspan = segmentspan(revlog, revs)

366

deltachainspan = segmentspan(revlog, revs)

367

368

if deltachainspan < mingapsize:

368

if deltachainspan < mingapsize:

369

yield revs

369

yield revs

370

return

370

return

371

372

readdata = deltachainspan

372

readdata = deltachainspan

373

chainpayload = sum(length(r) for r in revs)

373

chainpayload = sum(length(r) for r in revs)

374

375

if deltachainspan:

375

if deltachainspan:

376

density = chainpayload / float(deltachainspan)

376

density = chainpayload / float(deltachainspan)

377

else:

377

else:

378

density = 1.0

378

density = 1.0

379

380

if density >= targetdensity:

380

if density >= targetdensity:

381

yield revs

381

yield revs

382

return

382

return

383

384

# Store the gaps in a heap to have them sorted by decreasing size

384

# Store the gaps in a heap to have them sorted by decreasing size

385

gaps = []

385

gaps = []

386

prevend = None

386

prevend = None

387

for i, rev in enumerate(revs):

387

for i, rev in enumerate(revs):

388

revstart = start(rev)

388

revstart = start(rev)

389

revlen = length(rev)

389

revlen = length(rev)

390

391

# Skip empty revisions to form larger holes

391

# Skip empty revisions to form larger holes

392

if revlen == 0:

392

if revlen == 0:

393

continue

393

continue

394

395

if prevend is not None:

395

if prevend is not None:

396

gapsize = revstart - prevend

396

gapsize = revstart - prevend

397

# only consider holes that are large enough

397

# only consider holes that are large enough

398

if gapsize > mingapsize:

398

if gapsize > mingapsize:

399

gaps.append((gapsize, i))

399

gaps.append((gapsize, i))

400

401

prevend = revstart + revlen

401

prevend = revstart + revlen

402

# sort the gaps to pop them from largest to small

402

# sort the gaps to pop them from largest to small

403

gaps.sort()

403

gaps.sort()

404

405

# Collect the indices of the largest holes until the density is acceptable

405

# Collect the indices of the largest holes until the density is acceptable

406

selected = []

406

selected = []

407

while gaps and density < targetdensity:

407

while gaps and density < targetdensity:

408

gapsize, gapidx = gaps.pop()

408

gapsize, gapidx = gaps.pop()

409

410

selected.append(gapidx)

410

selected.append(gapidx)

411

412

# the gap sizes are stored as negatives to be sorted decreasingly

412

# the gap sizes are stored as negatives to be sorted decreasingly

413

# by the heap

413

# by the heap

414

readdata -= gapsize

414

readdata -= gapsize

415

if readdata > 0:

415

if readdata > 0:

416

density = chainpayload / float(readdata)

416

density = chainpayload / float(readdata)

417

else:

417

else:

418

density = 1.0

418

density = 1.0

419

selected.sort()

419

selected.sort()

420

421

# Cut the revs at collected indices

421

# Cut the revs at collected indices

422

previdx = 0

422

previdx = 0

423

for idx in selected:

423

for idx in selected:

424

425

chunk = _trimchunk(revlog, revs, previdx, idx)

425

chunk = _trimchunk(revlog, revs, previdx, idx)

426

if chunk:

426

if chunk:

427

yield chunk

427

yield chunk

428

429

previdx = idx

429

previdx = idx

430

431

chunk = _trimchunk(revlog, revs, previdx)

431

chunk = _trimchunk(revlog, revs, previdx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

436

def _trimchunk(revlog, revs, startidx, endidx=None):

436

def _trimchunk(revlog, revs, startidx, endidx=None):

437

"""returns revs[startidx:endidx] without empty trailing revs

437

"""returns revs[startidx:endidx] without empty trailing revs

438

439

Doctest Setup

439

Doctest Setup

440

>>> revlog = _testrevlog([

440

>>> revlog = _testrevlog([

441

... 5, #0

441

... 5, #0

442

... 10, #1

442

... 10, #1

443

... 12, #2

443

... 12, #2

444

... 12, #3 (empty)

444

... 12, #3 (empty)

445

... 17, #4

445

... 17, #4

446

... 21, #5

446

... 21, #5

447

... 21, #6 (empty)

447

... 21, #6 (empty)

448

... ])

448

... ])

449

450

Contiguous cases:

450

Contiguous cases:

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

452

[0, 1, 2, 3, 4, 5]

452

[0, 1, 2, 3, 4, 5]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

454

[0, 1, 2, 3, 4]

454

[0, 1, 2, 3, 4]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

456

[0, 1, 2]

456

[0, 1, 2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

458

[2]

458

[2]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

460

[3, 4, 5]

460

[3, 4, 5]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

462

[3, 4]

462

[3, 4]

463

464

Discontiguous cases:

464

Discontiguous cases:

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

466

[1, 3, 5]

466

[1, 3, 5]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

468

[1]

468

[1]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

470

[3, 5]

470

[3, 5]

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

472

[3, 5]

472

[3, 5]

473

"""

473

"""

474

length = revlog.length

474

length = revlog.length

475

476

if endidx is None:

476

if endidx is None:

477

endidx = len(revs)

477

endidx = len(revs)

478

479

# If we have a non-emtpy delta candidate, there are nothing to trim

479

# If we have a non-emtpy delta candidate, there are nothing to trim

480

if revs[endidx - 1] < len(revlog):

480

if revs[endidx - 1] < len(revlog):

481

# Trim empty revs at the end, except the very first revision of a chain

481

# Trim empty revs at the end, except the very first revision of a chain

482

while (

482

while (

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

484

):

484

):

485

endidx -= 1

485

endidx -= 1

486

487

return revs[startidx:endidx]

487

return revs[startidx:endidx]

488

489

490

def segmentspan(revlog, revs):

490

def segmentspan(revlog, revs):

491

"""Get the byte span of a segment of revisions

491

"""Get the byte span of a segment of revisions

492

493

revs is a sorted array of revision numbers

493

revs is a sorted array of revision numbers

494

495

>>> revlog = _testrevlog([

495

>>> revlog = _testrevlog([

496

... 5, #0

496

... 5, #0

497

... 10, #1

497

... 10, #1

498

... 12, #2

498

... 12, #2

499

... 12, #3 (empty)

499

... 12, #3 (empty)

500

... 17, #4

500

... 17, #4

501

... ])

501

... ])

502

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

504

17

504

17

505

>>> segmentspan(revlog, [0, 4])

505

>>> segmentspan(revlog, [0, 4])

506

17

506

17

507

>>> segmentspan(revlog, [3, 4])

507

>>> segmentspan(revlog, [3, 4])

508

5

508

5

509

>>> segmentspan(revlog, [1, 2, 3,])

509

>>> segmentspan(revlog, [1, 2, 3,])

510

7

510

7

511

>>> segmentspan(revlog, [1, 3])

511

>>> segmentspan(revlog, [1, 3])

512

7

512

7

513

"""

513

"""

514

if not revs:

514

if not revs:

515

return 0

515

return 0

516

end = revlog.end(revs[-1])

516

end = revlog.end(revs[-1])

517

return end - revlog.start(revs[0])

517

return end - revlog.start(revs[0])

518

519

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

521

"""build full text from a (base, delta) pair and other metadata"""

521

"""build full text from a (base, delta) pair and other metadata"""

522

# special case deltas which replace entire base; no need to decode

522

# special case deltas which replace entire base; no need to decode

523

# base revision. this neatly avoids censored bases, which throw when

523

# base revision. this neatly avoids censored bases, which throw when

524

# they're decoded.

524

# they're decoded.

525

hlen = struct.calcsize(b">lll")

525

hlen = struct.calcsize(b">lll")

526

if delta[:hlen] == mdiff.replacediffheader(

526

if delta[:hlen] == mdiff.replacediffheader(

527

revlog.rawsize(baserev), len(delta) - hlen

527

revlog.rawsize(baserev), len(delta) - hlen

528

):

528

):

529

fulltext = delta[hlen:]

529

fulltext = delta[hlen:]

530

else:

530

else:

531

# deltabase is rawtext before changed by flag processors, which is

531

# deltabase is rawtext before changed by flag processors, which is

532

# equivalent to non-raw text

532

# equivalent to non-raw text

533

basetext = revlog.revision(baserev, _df=fh)

533

basetext = revlog.revision(baserev, _df=fh)

534

fulltext = mdiff.patch(basetext, delta)

534

fulltext = mdiff.patch(basetext, delta)

535

536

try:

536

try:

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

538

if validatehash:

538

if validatehash:

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

540

if flags & REVIDX_ISCENSORED:

540

if flags & REVIDX_ISCENSORED:

541

raise error.StorageError(

541

raise error.StorageError(

542

_(b'node %s is not censored') % expectednode

542

_(b'node %s is not censored') % expectednode

543

)

543

)

544

except error.CensoredNodeError:

544

except error.CensoredNodeError:

545

# must pass the censored index flag to add censored revisions

545

# must pass the censored index flag to add censored revisions

546

if not flags & REVIDX_ISCENSORED:

546

if not flags & REVIDX_ISCENSORED:

547

raise

547

raise

548

return fulltext

548

return fulltext

549

550

551

@attr.s(slots=True, frozen=True)

551

@attr.s(slots=True, frozen=True)

552

class _deltainfo:

552

class _deltainfo:

553

distance = attr.ib()

553

distance = attr.ib()

554

deltalen = attr.ib()

554

deltalen = attr.ib()

555

data = attr.ib()

555

data = attr.ib()

556

base = attr.ib()

556

base = attr.ib()

557

chainbase = attr.ib()

557

chainbase = attr.ib()

558

chainlen = attr.ib()

558

chainlen = attr.ib()

559

compresseddeltalen = attr.ib()

559

compresseddeltalen = attr.ib()

560

snapshotdepth = attr.ib()

560

snapshotdepth = attr.ib()

561

562

563

def drop_u_compression(delta):

563

def drop_u_compression(delta):

564

"""turn into a "u" (no-compression) into no-compression without header

564

"""turn into a "u" (no-compression) into no-compression without header

565

566

This is useful for revlog format that has better compression method.

566

This is useful for revlog format that has better compression method.

567

"""

567

"""

568

assert delta.data[0] == b'u', delta.data[0]

568

assert delta.data[0] == b'u', delta.data[0]

569

return _deltainfo(

569

return _deltainfo(

570

delta.distance,

570

delta.distance,

571

delta.deltalen - 1,

571

delta.deltalen - 1,

572

(b'', delta.data[1]),

572

(b'', delta.data[1]),

573

delta.base,

573

delta.base,

574

delta.chainbase,

574

delta.chainbase,

575

delta.chainlen,

575

delta.chainlen,

576

delta.compresseddeltalen,

576

delta.compresseddeltalen,

577

delta.snapshotdepth,

577

delta.snapshotdepth,

578

)

578

)

579

580

581

def is_good_delta_info(revlog, deltainfo, revinfo):

581

def is_good_delta_info(revlog, deltainfo, revinfo):

582

"""Returns True if the given delta is good. Good means that it is within

582

"""Returns True if the given delta is good. Good means that it is within

583

the disk span, disk size, and chain length bounds that we know to be

583

the disk span, disk size, and chain length bounds that we know to be

584

performant."""

584

performant."""

585

if deltainfo is None:

585

if deltainfo is None:

586

return False

586

return False

587

588

if (

588

if (

589

revinfo.cachedelta is not None

589

revinfo.cachedelta is not None

590

and deltainfo.base == revinfo.cachedelta[0]

590

and deltainfo.base == revinfo.cachedelta[0]

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

592

):

592

):

593

return True

593

return True

594

595

# - 'deltainfo.distance' is the distance from the base revision --

595

# - 'deltainfo.distance' is the distance from the base revision --

596

# bounding it limits the amount of I/O we need to do.

596

# bounding it limits the amount of I/O we need to do.

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

598

# deltas we need to apply -- bounding it limits the amount of CPU

598

# deltas we need to apply -- bounding it limits the amount of CPU

599

# we consume.

599

# we consume.

600

601

textlen = revinfo.textlen

601

textlen = revinfo.textlen

602

defaultmax = textlen * 4

602

defaultmax = textlen * 4

603

maxdist = revlog._maxdeltachainspan

603

maxdist = revlog._maxdeltachainspan

604

if not maxdist:

604

if not maxdist:

605

maxdist = deltainfo.distance # ensure the conditional pass

605

maxdist = deltainfo.distance # ensure the conditional pass

606

maxdist = max(maxdist, defaultmax)

606

maxdist = max(maxdist, defaultmax)

607

608

# Bad delta from read span:

608

# Bad delta from read span:

609

#

609

#

610

# If the span of data read is larger than the maximum allowed.

610

# If the span of data read is larger than the maximum allowed.

611

#

611

#

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

613

# to avoid issue related to the span of data. In theory, it would be

613

# to avoid issue related to the span of data. In theory, it would be

614

# possible to build pathological revlog where delta pattern would lead

614

# possible to build pathological revlog where delta pattern would lead

615

# to too many reads. However, they do not happen in practice at all. So

615

# to too many reads. However, they do not happen in practice at all. So

616

# we skip the span check entirely.

616

# we skip the span check entirely.

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

618

return False

618

return False

619

620

# Bad delta from new delta size:

620

# Bad delta from new delta size:

621

#

621

#

622

# If the delta size is larger than the target text, storing the

622

# If the delta size is larger than the target text, storing the

623

# delta will be inefficient.

623

# delta will be inefficient.

624

if textlen < deltainfo.deltalen:

624

if textlen < deltainfo.deltalen:

625

return False

625

return False

626

627

# Bad delta from cumulated payload size:

627

# Bad delta from cumulated payload size:

628

#

628

#

629

# If the sum of delta get larger than K * target text length.

629

# If the sum of delta get larger than K * target text length.

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

631

return False

631

return False

632

633

# Bad delta from chain length:

633

# Bad delta from chain length:

634

#

634

#

635

# If the number of delta in the chain gets too high.

635

# If the number of delta in the chain gets too high.

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

637

return False

637

return False

638

639

# bad delta from intermediate snapshot size limit

639

# bad delta from intermediate snapshot size limit

640

#

640

#

641

# If an intermediate snapshot size is higher than the limit. The

641

# If an intermediate snapshot size is higher than the limit. The

642

# limit exist to prevent endless chain of intermediate delta to be

642

# limit exist to prevent endless chain of intermediate delta to be

643

# created.

643

# created.

644

if (

644

if (

645

deltainfo.snapshotdepth is not None

645

deltainfo.snapshotdepth is not None

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

647

):

647

):

648

return False

648

return False

649

650

# bad delta if new intermediate snapshot is larger than the previous

650

# bad delta if new intermediate snapshot is larger than the previous

651

# snapshot

651

# snapshot

652

if (

652

if (

653

deltainfo.snapshotdepth

653

deltainfo.snapshotdepth

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

655

):

655

):

656

return False

656

return False

657

658

return True

658

return True

659

660

661

# If a revision's full text is that much bigger than a base candidate full

661

# If a revision's full text is that much bigger than a base candidate full

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

663

# consider these candidates.

663

# consider these candidates.

664

LIMIT_BASE2TEXT = 500

664

LIMIT_BASE2TEXT = 500

665

666

667

def _candidategroups(

667

def _candidategroups(

668

revlog,

668

revlog,

669

textlen,

669

textlen,

670

p1,

670

p1,

671

p2,

671

p2,

672

cachedelta,

672

cachedelta,

673

excluded_bases=None,

673

excluded_bases=None,

674

target_rev=None,

674

target_rev=None,

675

snapshot_cache=None,

675

snapshot_cache=None,

676

):

676

):

677

"""Provides group of revision to be tested as delta base

677

"""Provides group of revision to be tested as delta base

678

679

This top level function focus on emitting groups with unique and worthwhile

679

This top level function focus on emitting groups with unique and worthwhile

680

content. See _raw_candidate_groups for details about the group order.

680

content. See _raw_candidate_groups for details about the group order.

681

"""

681

"""

682

# should we try to build a delta?

682

# should we try to build a delta?

683

if not (len(revlog) and revlog._storedeltachains):

683

if not (len(revlog) and revlog._storedeltachains):

684

yield None

684

yield None

685

return

685

return

686

687

if (

688

cachedelta is not None

689

and nullrev == cachedelta[0]

690

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

691

):

692

# instruction are to forcibly do a full snapshot

693

yield None

694

return

695

687

deltalength = revlog.length

696

deltalength = revlog.length

688

deltaparent = revlog.deltaparent

697

deltaparent = revlog.deltaparent

689

sparse = revlog._sparserevlog

698

sparse = revlog._sparserevlog

690

good = None

699

good = None

691

700

692

deltas_limit = textlen * LIMIT_DELTA2TEXT

701

deltas_limit = textlen * LIMIT_DELTA2TEXT

693

group_chunk_size = revlog._candidate_group_chunk_size

702

group_chunk_size = revlog._candidate_group_chunk_size

694

703

695

tested = {nullrev}

704

tested = {nullrev}

696

candidates = _refinedgroups(

705

candidates = _refinedgroups(

697

revlog,

706

revlog,

698

p1,

707

p1,

699

p2,

708

p2,

700

cachedelta,

709

cachedelta,

701

snapshot_cache=snapshot_cache,

710

snapshot_cache=snapshot_cache,

702

)

711

)

703

while True:

712

while True:

704

temptative = candidates.send(good)

713

temptative = candidates.send(good)

705

if temptative is None:

714

if temptative is None:

706

break

715

break

707

group = []

716

group = []

708

for rev in temptative:

717

for rev in temptative:

709

# skip over empty delta (no need to include them in a chain)

718

# skip over empty delta (no need to include them in a chain)

710

while revlog._generaldelta and not (

719

while revlog._generaldelta and not (

711

rev == nullrev or rev in tested or deltalength(rev)

720

rev == nullrev or rev in tested or deltalength(rev)

712

):

721

):

713

tested.add(rev)

722

tested.add(rev)

714

rev = deltaparent(rev)

723

rev = deltaparent(rev)

715

# no need to try a delta against nullrev, this will be done as a

724

# no need to try a delta against nullrev, this will be done as a

716

# last resort.

725

# last resort.

717

if rev == nullrev:

726

if rev == nullrev:

718

continue

727

continue

719

# filter out revision we tested already

728

# filter out revision we tested already

720

if rev in tested:

729

if rev in tested:

721

continue

730

continue

722

731

723

if (

732

if (

724

cachedelta is not None

733

cachedelta is not None

725

and rev == cachedelta[0]

734

and rev == cachedelta[0]

726

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

735

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

727

):

736

):

728

# instructions are to forcibly consider/use this delta base

737

# instructions are to forcibly consider/use this delta base

729

group.append(rev)

738

group.append(rev)

730

continue

739

continue

731

740

732

# an higher authority deamed the base unworthy (e.g. censored)

741

# an higher authority deamed the base unworthy (e.g. censored)

733

if excluded_bases is not None and rev in excluded_bases:

742

if excluded_bases is not None and rev in excluded_bases:

734

tested.add(rev)

743

tested.add(rev)

735

continue

744

continue

736

# We are in some recomputation cases and that rev is too high in

745

# We are in some recomputation cases and that rev is too high in

737

# the revlog

746

# the revlog

738

if target_rev is not None and rev >= target_rev:

747

if target_rev is not None and rev >= target_rev:

739

tested.add(rev)

748

tested.add(rev)

740

continue

749

continue

741

# filter out delta base that will never produce good delta

750

# filter out delta base that will never produce good delta

742

if deltas_limit < revlog.length(rev):

751

if deltas_limit < revlog.length(rev):

743

tested.add(rev)

752

tested.add(rev)

744

continue

753

continue

745

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

754

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

746

tested.add(rev)

755

tested.add(rev)

747

continue

756

continue

748

# no delta for rawtext-changing revs (see "candelta" for why)

757

# no delta for rawtext-changing revs (see "candelta" for why)

749

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

758

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

750

tested.add(rev)

759

tested.add(rev)

751

continue

760

continue

752

761

753

# If we reach here, we are about to build and test a delta.

762

# If we reach here, we are about to build and test a delta.

754

# The delta building process will compute the chaininfo in all

763

# The delta building process will compute the chaininfo in all

755

# case, since that computation is cached, it is fine to access it

764

# case, since that computation is cached, it is fine to access it

756

# here too.

765

# here too.

757

chainlen, chainsize = revlog._chaininfo(rev)

766

chainlen, chainsize = revlog._chaininfo(rev)

758

# if chain will be too long, skip base

767

# if chain will be too long, skip base

759

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

768

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

760

tested.add(rev)

769

tested.add(rev)

761

continue

770

continue

762

# if chain already have too much data, skip base

771

# if chain already have too much data, skip base

763

if deltas_limit < chainsize:

772

if deltas_limit < chainsize:

764

tested.add(rev)

773

tested.add(rev)

765

continue

774

continue

766

if sparse and revlog.upperboundcomp is not None:

775

if sparse and revlog.upperboundcomp is not None:

767

maxcomp = revlog.upperboundcomp

776

maxcomp = revlog.upperboundcomp

768

basenotsnap = (p1, p2, nullrev)

777

basenotsnap = (p1, p2, nullrev)

769

if rev not in basenotsnap and revlog.issnapshot(rev):

778

if rev not in basenotsnap and revlog.issnapshot(rev):

770

snapshotdepth = revlog.snapshotdepth(rev)

779

snapshotdepth = revlog.snapshotdepth(rev)

771

# If text is significantly larger than the base, we can

780

# If text is significantly larger than the base, we can

772

# expect the resulting delta to be proportional to the size

781

# expect the resulting delta to be proportional to the size

773

# difference

782

# difference

774

revsize = revlog.rawsize(rev)

783

revsize = revlog.rawsize(rev)

775

rawsizedistance = max(textlen - revsize, 0)

784

rawsizedistance = max(textlen - revsize, 0)

776

# use an estimate of the compression upper bound.

785

# use an estimate of the compression upper bound.

777

lowestrealisticdeltalen = rawsizedistance // maxcomp

786

lowestrealisticdeltalen = rawsizedistance // maxcomp

778

787

779

# check the absolute constraint on the delta size

788

# check the absolute constraint on the delta size

780

snapshotlimit = textlen >> snapshotdepth

789

snapshotlimit = textlen >> snapshotdepth

781

if snapshotlimit < lowestrealisticdeltalen:

790

if snapshotlimit < lowestrealisticdeltalen:

782

# delta lower bound is larger than accepted upper bound

791

# delta lower bound is larger than accepted upper bound

783

tested.add(rev)

792

tested.add(rev)

784

continue

793

continue

785

794

786

# check the relative constraint on the delta size

795

# check the relative constraint on the delta size

787

revlength = revlog.length(rev)

796

revlength = revlog.length(rev)

788

if revlength < lowestrealisticdeltalen:

797

if revlength < lowestrealisticdeltalen:

789

# delta probable lower bound is larger than target base

798

# delta probable lower bound is larger than target base

790

tested.add(rev)

799

tested.add(rev)

791

continue

800

continue

792

801

793

group.append(rev)

802

group.append(rev)

794

if group:

803

if group:

795

# When the size of the candidate group is big, it can result in a

804

# When the size of the candidate group is big, it can result in a

796

# quite significant performance impact. To reduce this, we can send

805

# quite significant performance impact. To reduce this, we can send

797

# them in smaller batches until the new batch does not provide any

806

# them in smaller batches until the new batch does not provide any

798

# improvements.

807

# improvements.

799

#

808

#

800

# This might reduce the overall efficiency of the compression in

809

# This might reduce the overall efficiency of the compression in

801

# some corner cases, but that should also prevent very pathological

810

# some corner cases, but that should also prevent very pathological

802

# cases from being an issue. (eg. 20 000 candidates).

811

# cases from being an issue. (eg. 20 000 candidates).

803

#

812

#

804

# XXX note that the ordering of the group becomes important as it

813

# XXX note that the ordering of the group becomes important as it

805

# now impacts the final result. The current order is unprocessed

814

# now impacts the final result. The current order is unprocessed

806

# and can be improved.

815

# and can be improved.

807

if group_chunk_size == 0:

816

if group_chunk_size == 0:

808

tested.update(group)

817

tested.update(group)

809

good = yield tuple(group)

818

good = yield tuple(group)

810

else:

819

else:

811

prev_good = good

820

prev_good = good

812

for start in range(0, len(group), group_chunk_size):

821

for start in range(0, len(group), group_chunk_size):

813

sub_group = group[start : start + group_chunk_size]

822

sub_group = group[start : start + group_chunk_size]

814

tested.update(sub_group)

823

tested.update(sub_group)

815

good = yield tuple(sub_group)

824

good = yield tuple(sub_group)

816

if prev_good == good:

825

if prev_good == good:

817

break

826

break

818

827

819

yield None

828

yield None

820

829

821

830

822

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

831

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

823

good = None

832

good = None

824

# First we try to reuse a the delta contained in the bundle.

833

# First we try to reuse a the delta contained in the bundle.

825

# (or from the source revlog)

834

# (or from the source revlog)

826

#

835

#

827

# This logic only applies to general delta repositories and can be disabled

836

# This logic only applies to general delta repositories and can be disabled

828

# through configuration. Disabling reuse source delta is useful when

837

# through configuration. Disabling reuse source delta is useful when

829

# we want to make sure we recomputed "optimal" deltas.

838

# we want to make sure we recomputed "optimal" deltas.

830

debug_info = None

839

debug_info = None

831

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

840

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

832

# Assume what we received from the server is a good choice

841

# Assume what we received from the server is a good choice

833

# build delta will reuse the cache

842

# build delta will reuse the cache

834

if debug_info is not None:

843

if debug_info is not None:

835

debug_info['cached-delta.tested'] += 1

844

debug_info['cached-delta.tested'] += 1

836

good = yield (cachedelta[0],)

845

good = yield (cachedelta[0],)

837

if good is not None:

846

if good is not None:

838

if debug_info is not None:

847

if debug_info is not None:

839

debug_info['cached-delta.accepted'] += 1

848

debug_info['cached-delta.accepted'] += 1

840

yield None

849

yield None

841

return

850

return

842

if snapshot_cache is None:

851

if snapshot_cache is None:

843

snapshot_cache = SnapshotCache()

852

snapshot_cache = SnapshotCache()

844

groups = _rawgroups(

853

groups = _rawgroups(

845

revlog,

854

revlog,

846

p1,

855

p1,

847

p2,

856

p2,

848

cachedelta,

857

cachedelta,

849

snapshot_cache,

858

snapshot_cache,

850

)

859

)

851

for candidates in groups:

860

for candidates in groups:

852

good = yield candidates

861

good = yield candidates

853

if good is not None:

862

if good is not None:

854

break

863

break

855

864

856

# If sparse revlog is enabled, we can try to refine the available deltas

865

# If sparse revlog is enabled, we can try to refine the available deltas

857

if not revlog._sparserevlog:

866

if not revlog._sparserevlog:

858

yield None

867

yield None

859

return

868

return

860

869

861

# if we have a refinable value, try to refine it

870

# if we have a refinable value, try to refine it

862

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

871

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

863

# refine snapshot down

872

# refine snapshot down

864

previous = None

873

previous = None

865

while previous != good:

874

while previous != good:

866

previous = good

875

previous = good

867

base = revlog.deltaparent(good)

876

base = revlog.deltaparent(good)

868

if base == nullrev:

877

if base == nullrev:

869

break

878

break

870

good = yield (base,)

879

good = yield (base,)

871

# refine snapshot up

880

# refine snapshot up

872

if not snapshot_cache.snapshots:

881

if not snapshot_cache.snapshots:

873

snapshot_cache.update(revlog, good + 1)

882

snapshot_cache.update(revlog, good + 1)

874

previous = None

883

previous = None

875

while good != previous:

884

while good != previous:

876

previous = good

885

previous = good

877

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

886

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

878

good = yield children

887

good = yield children

879

888

880

if debug_info is not None:

889

if debug_info is not None:

881

if good is None:

890

if good is None:

882

debug_info['no-solution'] += 1

891

debug_info['no-solution'] += 1

883

892

884

yield None

893

yield None

885

894

886

895

887

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

896

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

888

"""Provides group of revision to be tested as delta base

897

"""Provides group of revision to be tested as delta base

889

898

890

This lower level function focus on emitting delta theorically interresting

899

This lower level function focus on emitting delta theorically interresting

891

without looking it any practical details.

900

without looking it any practical details.

892

901

893

The group order aims at providing fast or small candidates first.

902

The group order aims at providing fast or small candidates first.

894

"""

903

"""

895

gdelta = revlog._generaldelta

904

gdelta = revlog._generaldelta

896

# gate sparse behind general-delta because of issue6056

905

# gate sparse behind general-delta because of issue6056

897

sparse = gdelta and revlog._sparserevlog

906

sparse = gdelta and revlog._sparserevlog

898

curr = len(revlog)

907

curr = len(revlog)

899

prev = curr - 1

908

prev = curr - 1

900

deltachain = lambda rev: revlog._deltachain(rev)[0]

909

deltachain = lambda rev: revlog._deltachain(rev)[0]

901

910

902

if gdelta:

911

if gdelta:

903

# exclude already lazy tested base if any

912

# exclude already lazy tested base if any

904

parents = [p for p in (p1, p2) if p != nullrev]

913

parents = [p for p in (p1, p2) if p != nullrev]

905

914

906

if not revlog._deltabothparents and len(parents) == 2:

915

if not revlog._deltabothparents and len(parents) == 2:

907

parents.sort()

916

parents.sort()

908

# To minimize the chance of having to build a fulltext,

917

# To minimize the chance of having to build a fulltext,

909

# pick first whichever parent is closest to us (max rev)

918

# pick first whichever parent is closest to us (max rev)

910

yield (parents[1],)

919

yield (parents[1],)

911

# then the other one (min rev) if the first did not fit

920

# then the other one (min rev) if the first did not fit

912

yield (parents[0],)

921

yield (parents[0],)

913

elif len(parents) > 0:

922

elif len(parents) > 0:

914

# Test all parents (1 or 2), and keep the best candidate

923

# Test all parents (1 or 2), and keep the best candidate

915

yield parents

924

yield parents

916

925

917

if sparse and parents:

926

if sparse and parents:

918

if snapshot_cache is None:

927

if snapshot_cache is None:

919

# map: base-rev: [snapshot-revs]

928

# map: base-rev: [snapshot-revs]

920

snapshot_cache = SnapshotCache()

929

snapshot_cache = SnapshotCache()

921

# See if we can use an existing snapshot in the parent chains to use as

930

# See if we can use an existing snapshot in the parent chains to use as

922

# a base for a new intermediate-snapshot

931

# a base for a new intermediate-snapshot

923

#

932

#

924

# search for snapshot in parents delta chain

933

# search for snapshot in parents delta chain

925

# map: snapshot-level: snapshot-rev

934

# map: snapshot-level: snapshot-rev

926

parents_snaps = collections.defaultdict(set)

935

parents_snaps = collections.defaultdict(set)

927

candidate_chains = [deltachain(p) for p in parents]

936

candidate_chains = [deltachain(p) for p in parents]

928

for chain in candidate_chains:

937

for chain in candidate_chains:

929

for idx, s in enumerate(chain):

938

for idx, s in enumerate(chain):

930

if not revlog.issnapshot(s):

939

if not revlog.issnapshot(s):

931

break

940

break

932

parents_snaps[idx].add(s)

941

parents_snaps[idx].add(s)

933

snapfloor = min(parents_snaps[0]) + 1

942

snapfloor = min(parents_snaps[0]) + 1

934

snapshot_cache.update(revlog, snapfloor)

943

snapshot_cache.update(revlog, snapfloor)

935

# search for the highest "unrelated" revision

944

# search for the highest "unrelated" revision

936

#

945

#

937

# Adding snapshots used by "unrelated" revision increase the odd we

946

# Adding snapshots used by "unrelated" revision increase the odd we

938

# reuse an independant, yet better snapshot chain.

947

# reuse an independant, yet better snapshot chain.

939

#

948

#

940

# XXX instead of building a set of revisions, we could lazily enumerate

949

# XXX instead of building a set of revisions, we could lazily enumerate

941

# over the chains. That would be more efficient, however we stick to

950

# over the chains. That would be more efficient, however we stick to

942

# simple code for now.

951

# simple code for now.

943

all_revs = set()

952

all_revs = set()

944

for chain in candidate_chains:

953

for chain in candidate_chains:

945

all_revs.update(chain)

954

all_revs.update(chain)

946

other = None

955

other = None

947

for r in revlog.revs(prev, snapfloor):

956

for r in revlog.revs(prev, snapfloor):

948

if r not in all_revs:

957

if r not in all_revs:

949

other = r

958

other = r

950

break

959

break

951

if other is not None:

960

if other is not None:

952

# To avoid unfair competition, we won't use unrelated intermediate

961

# To avoid unfair competition, we won't use unrelated intermediate

953

# snapshot that are deeper than the ones from the parent delta

962

# snapshot that are deeper than the ones from the parent delta

954

# chain.

963

# chain.

955

max_depth = max(parents_snaps.keys())

964

max_depth = max(parents_snaps.keys())

956

chain = deltachain(other)

965

chain = deltachain(other)

957

for depth, s in enumerate(chain):

966

for depth, s in enumerate(chain):

958

if s < snapfloor:

967

if s < snapfloor:

959

continue

968

continue

960

if max_depth < depth:

969

if max_depth < depth:

961

break

970

break

962

if not revlog.issnapshot(s):

971

if not revlog.issnapshot(s):

963

break

972

break

964

parents_snaps[depth].add(s)

973

parents_snaps[depth].add(s)

965

# Test them as possible intermediate snapshot base

974

# Test them as possible intermediate snapshot base

966

# We test them from highest to lowest level. High level one are more

975

# We test them from highest to lowest level. High level one are more

967

# likely to result in small delta

976

# likely to result in small delta

968

floor = None

977

floor = None

969

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

978

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

970

siblings = set()

979

siblings = set()

971

for s in snaps:

980

for s in snaps:

972

siblings.update(snapshot_cache.snapshots[s])

981

siblings.update(snapshot_cache.snapshots[s])

973

# Before considering making a new intermediate snapshot, we check

982

# Before considering making a new intermediate snapshot, we check

974

# if an existing snapshot, children of base we consider, would be

983

# if an existing snapshot, children of base we consider, would be

975

# suitable.

984

# suitable.

976

#

985

#

977

# It give a change to reuse a delta chain "unrelated" to the

986

# It give a change to reuse a delta chain "unrelated" to the

978

# current revision instead of starting our own. Without such

987

# current revision instead of starting our own. Without such

979

# re-use, topological branches would keep reopening new chains.

988

# re-use, topological branches would keep reopening new chains.

980

# Creating more and more snapshot as the repository grow.

989

# Creating more and more snapshot as the repository grow.

981

990

982

if floor is not None:

991

if floor is not None:

983

# We only do this for siblings created after the one in our

992

# We only do this for siblings created after the one in our

984

# parent's delta chain. Those created before has less chances

993

# parent's delta chain. Those created before has less chances

985

# to be valid base since our ancestors had to create a new

994

# to be valid base since our ancestors had to create a new

986

# snapshot.

995

# snapshot.

987

siblings = [r for r in siblings if floor < r]

996

siblings = [r for r in siblings if floor < r]

988

yield tuple(sorted(siblings))

997

yield tuple(sorted(siblings))

989

# then test the base from our parent's delta chain.

998

# then test the base from our parent's delta chain.

990

yield tuple(sorted(snaps))

999

yield tuple(sorted(snaps))

991

floor = min(snaps)

1000

floor = min(snaps)

992

# No suitable base found in the parent chain, search if any full

1001

# No suitable base found in the parent chain, search if any full

993

# snapshots emitted since parent's base would be a suitable base for an

1002

# snapshots emitted since parent's base would be a suitable base for an

994

# intermediate snapshot.

1003

# intermediate snapshot.

995

#

1004

#

996

# It give a chance to reuse a delta chain unrelated to the current

1005

# It give a chance to reuse a delta chain unrelated to the current

997

# revisions instead of starting our own. Without such re-use,

1006

# revisions instead of starting our own. Without such re-use,

998

# topological branches would keep reopening new full chains. Creating

1007

# topological branches would keep reopening new full chains. Creating

999

# more and more snapshot as the repository grow.

1008

# more and more snapshot as the repository grow.

1000

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1009

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1001

yield tuple(sorted(full))

1010

yield tuple(sorted(full))

1002

1011

1003

if not sparse:

1012

if not sparse:

1004

# other approach failed try against prev to hopefully save us a

1013

# other approach failed try against prev to hopefully save us a

1005

# fulltext.

1014

# fulltext.

1006

yield (prev,)

1015

yield (prev,)

1007

1016

1008

1017

1009

class SnapshotCache:

1018

class SnapshotCache:

1010

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1019

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1011

1020

1012

def __init__(self):

1021

def __init__(self):

1013

self.snapshots = collections.defaultdict(set)

1022

self.snapshots = collections.defaultdict(set)

1014

self._start_rev = None

1023

self._start_rev = None

1015

self._end_rev = None

1024

self._end_rev = None

1016

1025

1017

def update(self, revlog, start_rev=0):

1026

def update(self, revlog, start_rev=0):

1018

"""find snapshots from start_rev to tip"""

1027

"""find snapshots from start_rev to tip"""

1019

nb_revs = len(revlog)

1028

nb_revs = len(revlog)

1020

end_rev = nb_revs - 1

1029

end_rev = nb_revs - 1

1021

if start_rev > end_rev:

1030

if start_rev > end_rev:

1022

return # range is empty

1031

return # range is empty

1023

1032

1024

if self._start_rev is None:

1033

if self._start_rev is None:

1025

assert self._end_rev is None

1034

assert self._end_rev is None

1026

self._update(revlog, start_rev, end_rev)

1035

self._update(revlog, start_rev, end_rev)

1027

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1036

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1028

if start_rev < self._start_rev:

1037

if start_rev < self._start_rev:

1029

self._update(revlog, start_rev, self._start_rev - 1)

1038

self._update(revlog, start_rev, self._start_rev - 1)

1030

if self._end_rev < end_rev:

1039

if self._end_rev < end_rev:

1031

self._update(revlog, self._end_rev + 1, end_rev)

1040

self._update(revlog, self._end_rev + 1, end_rev)

1032

1041

1033

if self._start_rev is None:

1042

if self._start_rev is None:

1034

assert self._end_rev is None

1043

assert self._end_rev is None

1035

self._end_rev = end_rev

1044

self._end_rev = end_rev

1036

self._start_rev = start_rev

1045

self._start_rev = start_rev

1037

else:

1046

else:

1038

self._start_rev = min(self._start_rev, start_rev)

1047

self._start_rev = min(self._start_rev, start_rev)

1039

self._end_rev = max(self._end_rev, end_rev)

1048

self._end_rev = max(self._end_rev, end_rev)

1040

assert self._start_rev <= self._end_rev, (

1049

assert self._start_rev <= self._end_rev, (

1041

self._start_rev,

1050

self._start_rev,

1042

self._end_rev,

1051

self._end_rev,

1043

)

1052

)

1044

1053

1045

def _update(self, revlog, start_rev, end_rev):

1054

def _update(self, revlog, start_rev, end_rev):

1046

"""internal method that actually do update content"""

1055

"""internal method that actually do update content"""

1047

assert self._start_rev is None or (

1056

assert self._start_rev is None or (

1048

start_rev < self._start_rev or start_rev > self._end_rev

1057

start_rev < self._start_rev or start_rev > self._end_rev

1049

), (self._start_rev, self._end_rev, start_rev, end_rev)

1058

), (self._start_rev, self._end_rev, start_rev, end_rev)

1050

assert self._start_rev is None or (

1059

assert self._start_rev is None or (

1051

end_rev < self._start_rev or end_rev > self._end_rev

1060

end_rev < self._start_rev or end_rev > self._end_rev

1052

), (self._start_rev, self._end_rev, start_rev, end_rev)

1061

), (self._start_rev, self._end_rev, start_rev, end_rev)

1053

cache = self.snapshots

1062

cache = self.snapshots

1054

if util.safehasattr(revlog.index, b'findsnapshots'):

1063

if util.safehasattr(revlog.index, b'findsnapshots'):

1055

revlog.index.findsnapshots(cache, start_rev, end_rev)

1064

revlog.index.findsnapshots(cache, start_rev, end_rev)

1056

else:

1065

else:

1057

deltaparent = revlog.deltaparent

1066

deltaparent = revlog.deltaparent

1058

issnapshot = revlog.issnapshot

1067

issnapshot = revlog.issnapshot

1059

for rev in revlog.revs(start_rev, end_rev):

1068

for rev in revlog.revs(start_rev, end_rev):

1060

if issnapshot(rev):

1069

if issnapshot(rev):

1061

cache[deltaparent(rev)].add(rev)

1070

cache[deltaparent(rev)].add(rev)

1062

1071

1063

1072

1064

class deltacomputer:

1073

class deltacomputer:

1065

def __init__(

1074

def __init__(

1066

self,

1075

self,

1067

revlog,

1076

revlog,

1068

write_debug=None,

1077

write_debug=None,

1069

debug_search=False,

1078

debug_search=False,

1070

debug_info=None,

1079

debug_info=None,

1071

):

1080

):

1072

self.revlog = revlog

1081

self.revlog = revlog

1073

self._write_debug = write_debug

1082

self._write_debug = write_debug

1074

self._debug_search = debug_search

1083

self._debug_search = debug_search

1075

self._debug_info = debug_info

1084

self._debug_info = debug_info

1076

self._snapshot_cache = SnapshotCache()

1085

self._snapshot_cache = SnapshotCache()

1077

1086

1078

def buildtext(self, revinfo, fh):

1087

def buildtext(self, revinfo, fh):

1079

"""Builds a fulltext version of a revision

1088

"""Builds a fulltext version of a revision

1080

1089

1081

revinfo: revisioninfo instance that contains all needed info

1090

revinfo: revisioninfo instance that contains all needed info

1082

fh: file handle to either the .i or the .d revlog file,

1091

fh: file handle to either the .i or the .d revlog file,

1083

depending on whether it is inlined or not

1092

depending on whether it is inlined or not

1084

"""

1093

"""

1085

btext = revinfo.btext

1094

btext = revinfo.btext

1086

if btext[0] is not None:

1095

if btext[0] is not None:

1087

return btext[0]

1096

return btext[0]

1088

1097

1089

revlog = self.revlog

1098

revlog = self.revlog

1090

cachedelta = revinfo.cachedelta

1099

cachedelta = revinfo.cachedelta

1091

baserev = cachedelta[0]

1100

baserev = cachedelta[0]

1092

delta = cachedelta[1]

1101

delta = cachedelta[1]

1093

1102

1094

fulltext = btext[0] = _textfromdelta(

1103

fulltext = btext[0] = _textfromdelta(

1095

fh,

1104

fh,

1096

revlog,

1105

revlog,

1097

baserev,

1106

baserev,

1098

delta,

1107

delta,

1099

revinfo.p1,

1108

revinfo.p1,

1100

revinfo.p2,

1109

revinfo.p2,

1101

revinfo.flags,

1110

revinfo.flags,

1102

revinfo.node,

1111

revinfo.node,

1103

)

1112

)

1104

return fulltext

1113

return fulltext

1105

1114

1106

def _builddeltadiff(self, base, revinfo, fh):

1115

def _builddeltadiff(self, base, revinfo, fh):

1107

revlog = self.revlog

1116

revlog = self.revlog

1108

t = self.buildtext(revinfo, fh)

1117

t = self.buildtext(revinfo, fh)

1109

if revlog.iscensored(base):

1118

if revlog.iscensored(base):

1110

# deltas based on a censored revision must replace the

1119

# deltas based on a censored revision must replace the

1111

# full content in one patch, so delta works everywhere

1120

# full content in one patch, so delta works everywhere

1112

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1121

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1113

delta = header + t

1122

delta = header + t

1114

else:

1123

else:

1115

ptext = revlog.rawdata(base, _df=fh)

1124

ptext = revlog.rawdata(base, _df=fh)

1116

delta = mdiff.textdiff(ptext, t)

1125

delta = mdiff.textdiff(ptext, t)

1117

1126

1118

return delta

1127

return delta

1119

1128

1120

def _builddeltainfo(self, revinfo, base, fh):

1129

def _builddeltainfo(self, revinfo, base, fh):

1121

# can we use the cached delta?

1130

# can we use the cached delta?

1122

revlog = self.revlog

1131

revlog = self.revlog

1123

debug_search = self._write_debug is not None and self._debug_search

1132

debug_search = self._write_debug is not None and self._debug_search

1124

chainbase = revlog.chainbase(base)

1133

chainbase = revlog.chainbase(base)

1125

if revlog._generaldelta:

1134

if revlog._generaldelta:

1126

deltabase = base

1135

deltabase = base

1127

else:

1136

else:

1128

deltabase = chainbase

1137

deltabase = chainbase

1129

snapshotdepth = None

1138

snapshotdepth = None

1130

if revlog._sparserevlog and deltabase == nullrev:

1139

if revlog._sparserevlog and deltabase == nullrev:

1131

snapshotdepth = 0

1140

snapshotdepth = 0

1132

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1141

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1133

# A delta chain should always be one full snapshot,

1142

# A delta chain should always be one full snapshot,

1134

# zero or more semi-snapshots, and zero or more deltas

1143

# zero or more semi-snapshots, and zero or more deltas

1135

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1144

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1136

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1145

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1137

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1146

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1138

delta = None

1147

delta = None

1139

if revinfo.cachedelta:

1148

if revinfo.cachedelta:

1140

cachebase = revinfo.cachedelta[0]

1149

cachebase = revinfo.cachedelta[0]

1141

# check if the diff still apply

1150

# check if the diff still apply

1142

currentbase = cachebase

1151

currentbase = cachebase

1143

while (

1152

while (

1144

currentbase != nullrev

1153

currentbase != nullrev

1145

and currentbase != base

1154

and currentbase != base

1146

and self.revlog.length(currentbase) == 0

1155

and self.revlog.length(currentbase) == 0

1147

):

1156

):

1148

currentbase = self.revlog.deltaparent(currentbase)

1157

currentbase = self.revlog.deltaparent(currentbase)

1149

if self.revlog._lazydelta and currentbase == base:

1158

if self.revlog._lazydelta and currentbase == base:

1150

delta = revinfo.cachedelta[1]

1159

delta = revinfo.cachedelta[1]

1151

if delta is None:

1160

if delta is None:

1152

delta = self._builddeltadiff(base, revinfo, fh)

1161

delta = self._builddeltadiff(base, revinfo, fh)

1153

if debug_search:

1162

if debug_search:

1154

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1163

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1155

msg %= len(delta)

1164

msg %= len(delta)

1156

self._write_debug(msg)

1165

self._write_debug(msg)

1157

# snapshotdept need to be neither None nor 0 level snapshot

1166

# snapshotdept need to be neither None nor 0 level snapshot

1158

if revlog.upperboundcomp is not None and snapshotdepth:

1167

if revlog.upperboundcomp is not None and snapshotdepth:

1159

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1168

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1160

snapshotlimit = revinfo.textlen >> snapshotdepth

1169

snapshotlimit = revinfo.textlen >> snapshotdepth

1161

if debug_search:

1170

if debug_search:

1162

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1171

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1163

msg %= lowestrealisticdeltalen

1172

msg %= lowestrealisticdeltalen

1164

self._write_debug(msg)

1173

self._write_debug(msg)

1165

if snapshotlimit < lowestrealisticdeltalen:

1174

if snapshotlimit < lowestrealisticdeltalen:

1166

if debug_search:

1175

if debug_search:

1167

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1176

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1168

self._write_debug(msg)

1177

self._write_debug(msg)

1169

return None

1178

return None

1170

if revlog.length(base) < lowestrealisticdeltalen:

1179

if revlog.length(base) < lowestrealisticdeltalen:

1171

if debug_search:

1180

if debug_search:

1172

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1181

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1173

self._write_debug(msg)

1182

self._write_debug(msg)

1174

return None

1183

return None

1175

header, data = revlog.compress(delta)

1184

header, data = revlog.compress(delta)

1176

deltalen = len(header) + len(data)

1185

deltalen = len(header) + len(data)

1177

offset = revlog.end(len(revlog) - 1)

1186

offset = revlog.end(len(revlog) - 1)

1178

dist = deltalen + offset - revlog.start(chainbase)

1187

dist = deltalen + offset - revlog.start(chainbase)

1179

chainlen, compresseddeltalen = revlog._chaininfo(base)

1188

chainlen, compresseddeltalen = revlog._chaininfo(base)

1180

chainlen += 1

1189

chainlen += 1

1181

compresseddeltalen += deltalen

1190

compresseddeltalen += deltalen

1182

1191

1183

return _deltainfo(

1192

return _deltainfo(

1184

dist,

1193

dist,

1185

deltalen,

1194

deltalen,

1186

(header, data),

1195

(header, data),

1187

deltabase,

1196

deltabase,

1188

chainbase,

1197

chainbase,

1189

chainlen,

1198

chainlen,

1190

compresseddeltalen,

1199

compresseddeltalen,

1191

snapshotdepth,

1200

snapshotdepth,

1192

)

1201

)

1193

1202

1194

def _fullsnapshotinfo(self, fh, revinfo, curr):

1203

def _fullsnapshotinfo(self, fh, revinfo, curr):

1195

rawtext = self.buildtext(revinfo, fh)

1204

rawtext = self.buildtext(revinfo, fh)

1196

data = self.revlog.compress(rawtext)

1205

data = self.revlog.compress(rawtext)

1197

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1206

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1198

deltabase = chainbase = curr

1207

deltabase = chainbase = curr

1199

snapshotdepth = 0

1208

snapshotdepth = 0

1200

chainlen = 1

1209

chainlen = 1

1201

1210

1202

return _deltainfo(

1211

return _deltainfo(

1203

dist,

1212

dist,

1204

deltalen,

1213

deltalen,

1205

data,

1214

data,

1206

deltabase,

1215

deltabase,

1207

chainbase,

1216

chainbase,

1208

chainlen,

1217

chainlen,

1209

compresseddeltalen,

1218

compresseddeltalen,

1210

snapshotdepth,

1219

snapshotdepth,

1211

)

1220

)

1212

1221

1213

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1222

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1214

"""Find an acceptable delta against a candidate revision

1223

"""Find an acceptable delta against a candidate revision

1215

1224

1216

revinfo: information about the revision (instance of _revisioninfo)

1225

revinfo: information about the revision (instance of _revisioninfo)

1217

fh: file handle to either the .i or the .d revlog file,

1226

fh: file handle to either the .i or the .d revlog file,

1218

depending on whether it is inlined or not

1227

depending on whether it is inlined or not

1219

1228

1220

Returns the first acceptable candidate revision, as ordered by

1229

Returns the first acceptable candidate revision, as ordered by

1221

_candidategroups

1230

_candidategroups

1222

1231

1223

If no suitable deltabase is found, we return delta info for a full

1232

If no suitable deltabase is found, we return delta info for a full

1224

snapshot.

1233

snapshot.

1225

1234

1226

`excluded_bases` is an optional set of revision that cannot be used as

1235

`excluded_bases` is an optional set of revision that cannot be used as

1227

a delta base. Use this to recompute delta suitable in censor or strip

1236

a delta base. Use this to recompute delta suitable in censor or strip

1228

context.

1237

context.

1229

"""

1238

"""

1230

if target_rev is None:

1239

if target_rev is None:

1231

target_rev = len(self.revlog)

1240

target_rev = len(self.revlog)

1232

1241

1233

if not revinfo.textlen:

1242

if not revinfo.textlen:

1234

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1243

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1235

1244

1236

if excluded_bases is None:

1245

if excluded_bases is None:

1237

excluded_bases = set()

1246

excluded_bases = set()

1238

1247

1239

# no delta for flag processor revision (see "candelta" for why)

1248

# no delta for flag processor revision (see "candelta" for why)

1240

# not calling candelta since only one revision needs test, also to

1249

# not calling candelta since only one revision needs test, also to

1241

# avoid overhead fetching flags again.

1250

# avoid overhead fetching flags again.

1242

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1251

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1243

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1252

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1244

1253

1245

gather_debug = (

1254

gather_debug = (

1246

self._write_debug is not None or self._debug_info is not None

1255

self._write_debug is not None or self._debug_info is not None

1247

)

1256

)

1248

debug_search = self._write_debug is not None and self._debug_search

1257

debug_search = self._write_debug is not None and self._debug_search

1249

1258

1250

if gather_debug:

1259

if gather_debug:

1251

start = util.timer()

1260

start = util.timer()

1252

1261

1253

# count the number of different delta we tried (for debug purpose)

1262

# count the number of different delta we tried (for debug purpose)

1254

dbg_try_count = 0

1263

dbg_try_count = 0

1255

# count the number of "search round" we did. (for debug purpose)

1264

# count the number of "search round" we did. (for debug purpose)

1256

dbg_try_rounds = 0

1265

dbg_try_rounds = 0

1257

dbg_type = b'unknown'

1266

dbg_type = b'unknown'

1258

1267

1259

cachedelta = revinfo.cachedelta

1268

cachedelta = revinfo.cachedelta

1260

p1 = revinfo.p1

1269

p1 = revinfo.p1

1261

p2 = revinfo.p2

1270

p2 = revinfo.p2

1262

revlog = self.revlog

1271

revlog = self.revlog

1263

1272

1264

deltainfo = None

1273

deltainfo = None

1265

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1274

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1266

1275

1267

if gather_debug:

1276

if gather_debug:

1268

if p1r != nullrev:

1277

if p1r != nullrev:

1269

p1_chain_len = revlog._chaininfo(p1r)[0]

1278

p1_chain_len = revlog._chaininfo(p1r)[0]

1270

else:

1279

else:

1271

p1_chain_len = -1

1280

p1_chain_len = -1

1272

if p2r != nullrev:

1281

if p2r != nullrev:

1273

p2_chain_len = revlog._chaininfo(p2r)[0]

1282

p2_chain_len = revlog._chaininfo(p2r)[0]

1274

else:

1283

else:

1275

p2_chain_len = -1

1284

p2_chain_len = -1

1276

if debug_search:

1285

if debug_search:

1277

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1286

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1278

msg %= target_rev

1287

msg %= target_rev

1279

self._write_debug(msg)

1288

self._write_debug(msg)

1280

1289

1281

groups = _candidategroups(

1290

groups = _candidategroups(

1282

self.revlog,

1291

self.revlog,

1283

revinfo.textlen,

1292

revinfo.textlen,

1284

p1r,

1293

p1r,

1285

p2r,

1294

p2r,

1286

cachedelta,

1295

cachedelta,

1287

excluded_bases,

1296

excluded_bases,

1288

target_rev,

1297

target_rev,

1289

snapshot_cache=self._snapshot_cache,

1298

snapshot_cache=self._snapshot_cache,

1290

)

1299

)

1291

candidaterevs = next(groups)

1300

candidaterevs = next(groups)

1292

while candidaterevs is not None:

1301

while candidaterevs is not None:

1293

dbg_try_rounds += 1

1302

dbg_try_rounds += 1

1294

if debug_search:

1303

if debug_search:

1295

prev = None

1304

prev = None

1296

if deltainfo is not None:

1305

if deltainfo is not None:

1297

prev = deltainfo.base

1306

prev = deltainfo.base

1298

1307

1299

if (

1308

if (

1300

cachedelta is not None

1309

cachedelta is not None

1301

and len(candidaterevs) == 1

1310

and len(candidaterevs) == 1

1302

and cachedelta[0] in candidaterevs

1311

and cachedelta[0] in candidaterevs

1303

):

1312

):

1304

round_type = b"cached-delta"

1313

round_type = b"cached-delta"

1305

elif p1 in candidaterevs or p2 in candidaterevs:

1314

elif p1 in candidaterevs or p2 in candidaterevs:

1306

round_type = b"parents"

1315

round_type = b"parents"

1307

elif prev is not None and all(c < prev for c in candidaterevs):

1316

elif prev is not None and all(c < prev for c in candidaterevs):

1308

round_type = b"refine-down"

1317

round_type = b"refine-down"

1309

elif prev is not None and all(c > prev for c in candidaterevs):

1318

elif prev is not None and all(c > prev for c in candidaterevs):

1310

round_type = b"refine-up"

1319

round_type = b"refine-up"

1311

else:

1320

else:

1312

round_type = b"search-down"

1321

round_type = b"search-down"

1313

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1322

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1314

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1323

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1315

self._write_debug(msg)

1324

self._write_debug(msg)

1316

nominateddeltas = []

1325

nominateddeltas = []

1317

if deltainfo is not None:

1326

if deltainfo is not None:

1318

if debug_search:

1327

if debug_search:

1319

msg = (

1328

msg = (

1320

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1329

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1321

)

1330

)

1322

msg %= (deltainfo.base, deltainfo.deltalen)

1331

msg %= (deltainfo.base, deltainfo.deltalen)

1323

self._write_debug(msg)

1332

self._write_debug(msg)

1324

# if we already found a good delta,

1333

# if we already found a good delta,

1325

# challenge it against refined candidates

1334

# challenge it against refined candidates

1326

nominateddeltas.append(deltainfo)

1335

nominateddeltas.append(deltainfo)

1327

for candidaterev in candidaterevs:

1336

for candidaterev in candidaterevs:

1328

if debug_search:

1337

if debug_search:

1329

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1338

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1330

msg %= candidaterev

1339

msg %= candidaterev

1331

self._write_debug(msg)

1340

self._write_debug(msg)

1332

candidate_type = None

1341

candidate_type = None

1333

if candidaterev == p1:

1342

if candidaterev == p1:

1334

candidate_type = b"p1"

1343

candidate_type = b"p1"

1335

elif candidaterev == p2:

1344

elif candidaterev == p2:

1336

candidate_type = b"p2"

1345

candidate_type = b"p2"

1337

elif self.revlog.issnapshot(candidaterev):

1346

elif self.revlog.issnapshot(candidaterev):

1338

candidate_type = b"snapshot-%d"

1347

candidate_type = b"snapshot-%d"

1339

candidate_type %= self.revlog.snapshotdepth(

1348

candidate_type %= self.revlog.snapshotdepth(

1340

candidaterev

1349

candidaterev

1341

)

1350

)

1342

1351

1343

if candidate_type is not None:

1352

if candidate_type is not None:

1344

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1353

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1345

msg %= candidate_type

1354

msg %= candidate_type

1346

self._write_debug(msg)

1355

self._write_debug(msg)

1347

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1356

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1348

msg %= self.revlog.length(candidaterev)

1357

msg %= self.revlog.length(candidaterev)

1349

self._write_debug(msg)

1358

self._write_debug(msg)

1350

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1359

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1351

msg %= self.revlog.deltaparent(candidaterev)

1360

msg %= self.revlog.deltaparent(candidaterev)

1352

self._write_debug(msg)

1361

self._write_debug(msg)

1353

1362

1354

dbg_try_count += 1

1363

dbg_try_count += 1

1355

1364

1356

if debug_search:

1365

if debug_search:

1357

delta_start = util.timer()

1366

delta_start = util.timer()

1358

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1367

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1359

if debug_search:

1368

if debug_search:

1360

delta_end = util.timer()

1369

delta_end = util.timer()

1361

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1370

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1362

msg %= delta_end - delta_start

1371

msg %= delta_end - delta_start

1363

self._write_debug(msg)

1372

self._write_debug(msg)

1364

if candidatedelta is not None:

1373

if candidatedelta is not None:

1365

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1374

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1366

if debug_search:

1375

if debug_search:

1367

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1376

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1368

msg %= candidatedelta.deltalen

1377

msg %= candidatedelta.deltalen

1369

self._write_debug(msg)

1378

self._write_debug(msg)

1370

nominateddeltas.append(candidatedelta)

1379

nominateddeltas.append(candidatedelta)

1371

elif debug_search:

1380

elif debug_search:

1372

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1381

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1373

msg %= candidatedelta.deltalen

1382

msg %= candidatedelta.deltalen

1374

self._write_debug(msg)

1383

self._write_debug(msg)

1375

elif debug_search:

1384

elif debug_search:

1376

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1385

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1377

self._write_debug(msg)

1386

self._write_debug(msg)

1378

if nominateddeltas:

1387

if nominateddeltas:

1379

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1388

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1380

if deltainfo is not None:

1389

if deltainfo is not None:

1381

candidaterevs = groups.send(deltainfo.base)

1390

candidaterevs = groups.send(deltainfo.base)

1382

else:

1391

else:

1383

candidaterevs = next(groups)

1392

candidaterevs = next(groups)

1384

1393

1385

if deltainfo is None:

1394

if deltainfo is None:

1386

dbg_type = b"full"

1395

dbg_type = b"full"

1387

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1396

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1388

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1397

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1389

dbg_type = b"snapshot"

1398

dbg_type = b"snapshot"

1390

else:

1399

else:

1391

dbg_type = b"delta"

1400

dbg_type = b"delta"

1392

1401

1393

if gather_debug:

1402

if gather_debug:

1394

end = util.timer()

1403

end = util.timer()

1395

if dbg_type == b'full':

1404

if dbg_type == b'full':

1396

used_cached = (

1405

used_cached = (

1397

cachedelta is not None

1406

cachedelta is not None

1398

and dbg_try_rounds == 0

1407

and dbg_try_rounds == 0

1399

and dbg_try_count == 0

1408

and dbg_try_count == 0

1400

and cachedelta[0] == nullrev

1409

and cachedelta[0] == nullrev

1401

)

1410

)

1402

else:

1411

else:

1403

used_cached = (

1412

used_cached = (

1404

cachedelta is not None

1413

cachedelta is not None

1405

and dbg_try_rounds == 1

1414

and dbg_try_rounds == 1

1406

and dbg_try_count == 1

1415

and dbg_try_count == 1

1407

and deltainfo.base == cachedelta[0]

1416

and deltainfo.base == cachedelta[0]

1408

)

1417

)

1409

dbg = {

1418

dbg = {

1410

'duration': end - start,

1419

'duration': end - start,

1411

'revision': target_rev,

1420

'revision': target_rev,

1412

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1421

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1413

'search_round_count': dbg_try_rounds,

1422

'search_round_count': dbg_try_rounds,

1414

'using-cached-base': used_cached,

1423

'using-cached-base': used_cached,

1415

'delta_try_count': dbg_try_count,

1424

'delta_try_count': dbg_try_count,

1416

'type': dbg_type,

1425

'type': dbg_type,

1417

'p1-chain-len': p1_chain_len,

1426

'p1-chain-len': p1_chain_len,

1418

'p2-chain-len': p2_chain_len,

1427

'p2-chain-len': p2_chain_len,

1419

}

1428

}

1420

if (

1429

if (

1421

deltainfo.snapshotdepth # pytype: disable=attribute-error

1430

deltainfo.snapshotdepth # pytype: disable=attribute-error

1422

is not None

1431

is not None

1423

):

1432

):

1424

dbg[

1433

dbg[

1425

'snapshot-depth'

1434

'snapshot-depth'

1426

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1435

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1427

else:

1436

else:

1428

dbg['snapshot-depth'] = 0

1437

dbg['snapshot-depth'] = 0

1429

target_revlog = b"UNKNOWN"

1438

target_revlog = b"UNKNOWN"

1430

target_type = self.revlog.target[0]

1439

target_type = self.revlog.target[0]

1431

target_key = self.revlog.target[1]

1440

target_key = self.revlog.target[1]

1432

if target_type == KIND_CHANGELOG:

1441

if target_type == KIND_CHANGELOG:

1433

target_revlog = b'CHANGELOG:'

1442

target_revlog = b'CHANGELOG:'

1434

elif target_type == KIND_MANIFESTLOG:

1443

elif target_type == KIND_MANIFESTLOG:

1435

target_revlog = b'MANIFESTLOG:'

1444

target_revlog = b'MANIFESTLOG:'

1436

if target_key:

1445

if target_key:

1437

target_revlog += b'%s:' % target_key

1446

target_revlog += b'%s:' % target_key

1438

elif target_type == KIND_FILELOG:

1447

elif target_type == KIND_FILELOG:

1439

target_revlog = b'FILELOG:'

1448

target_revlog = b'FILELOG:'

1440

if target_key:

1449

if target_key:

1441

target_revlog += b'%s:' % target_key

1450

target_revlog += b'%s:' % target_key

1442

dbg['target-revlog'] = target_revlog

1451

dbg['target-revlog'] = target_revlog

1443

1452

1444

if self._debug_info is not None:

1453

if self._debug_info is not None:

1445

self._debug_info.append(dbg)

1454

self._debug_info.append(dbg)

1446

1455

1447

if self._write_debug is not None:

1456

if self._write_debug is not None:

1448

msg = (

1457

msg = (

1449

b"DBG-DELTAS:"

1458

b"DBG-DELTAS:"

1450

b" %-12s"

1459

b" %-12s"

1451

b" rev=%d:"

1460

b" rev=%d:"

1452

b" delta-base=%d"

1461

b" delta-base=%d"

1453

b" is-cached=%d"

1462

b" is-cached=%d"

1454

b" - search-rounds=%d"

1463

b" - search-rounds=%d"

1455

b" try-count=%d"

1464

b" try-count=%d"

1456

b" - delta-type=%-6s"

1465

b" - delta-type=%-6s"

1457

b" snap-depth=%d"

1466

b" snap-depth=%d"

1458

b" - p1-chain-length=%d"

1467

b" - p1-chain-length=%d"

1459

b" p2-chain-length=%d"

1468

b" p2-chain-length=%d"

1460

b" - duration=%f"

1469

b" - duration=%f"

1461

b"\n"

1470

b"\n"

1462

)

1471

)

1463

msg %= (

1472

msg %= (

1464

dbg["target-revlog"],

1473

dbg["target-revlog"],

1465

dbg["revision"],

1474

dbg["revision"],

1466

dbg["delta-base"],

1475

dbg["delta-base"],

1467

dbg["using-cached-base"],

1476

dbg["using-cached-base"],

1468

dbg["search_round_count"],

1477

dbg["search_round_count"],

1469

dbg["delta_try_count"],

1478

dbg["delta_try_count"],

1470

dbg["type"],

1479

dbg["type"],

1471

dbg["snapshot-depth"],

1480

dbg["snapshot-depth"],

1472

dbg["p1-chain-len"],

1481

dbg["p1-chain-len"],

1473

dbg["p2-chain-len"],

1482

dbg["p2-chain-len"],

1474

dbg["duration"],

1483

dbg["duration"],

1475

)

1484

)

1476

self._write_debug(msg)

1485

self._write_debug(msg)

1477

return deltainfo

1486

return deltainfo

1478

1487

1479

1488

1480

def delta_compression(default_compression_header, deltainfo):

1489

def delta_compression(default_compression_header, deltainfo):

1481

"""return (COMPRESSION_MODE, deltainfo)

1490

"""return (COMPRESSION_MODE, deltainfo)

1482

1491

1483

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1492

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1484

compression.

1493

compression.

1485

"""

1494

"""

1486

h, d = deltainfo.data

1495

h, d = deltainfo.data

1487

compression_mode = COMP_MODE_INLINE

1496

compression_mode = COMP_MODE_INLINE

1488

if not h and not d:

1497

if not h and not d:

1489

# not data to store at all... declare them uncompressed

1498

# not data to store at all... declare them uncompressed

1490

compression_mode = COMP_MODE_PLAIN

1499

compression_mode = COMP_MODE_PLAIN

1491

elif not h:

1500

elif not h:

1492

t = d[0:1]

1501

t = d[0:1]

1493

if t == b'\0':

1502

if t == b'\0':

1494

compression_mode = COMP_MODE_PLAIN

1503

compression_mode = COMP_MODE_PLAIN

1495

elif t == default_compression_header:

1504

elif t == default_compression_header:

1496

compression_mode = COMP_MODE_DEFAULT

1505

compression_mode = COMP_MODE_DEFAULT

1497

elif h == b'u':

1506

elif h == b'u':

1498

# we have a more efficient way to declare uncompressed

1507

# we have a more efficient way to declare uncompressed

1499

h = b''

1508

h = b''

1500

compression_mode = COMP_MODE_PLAIN

1509

compression_mode = COMP_MODE_PLAIN

1501

deltainfo = drop_u_compression(deltainfo)

1510

deltainfo = drop_u_compression(deltainfo)

1502

return compression_mode, deltainfo

1511

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 if (
                     revinfo.cachedelta is not None
                     and deltainfo.base == revinfo.cachedelta[0]
                     and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                 ):
                     return True
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
                 snapshot_cache=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
+                if (
+                    cachedelta is not None
+                    and nullrev == cachedelta[0]
+                    and cachedelta[2] == DELTA_BASE_REUSE_FORCE
+                ):
+                    # instruction are to forcibly do a full snapshot
+                    yield None
+                    return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 group_chunk_size = revlog._candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache=snapshot_cache,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         if (
                             cachedelta is not None
                             and rev == cachedelta[0]
                             and cachedelta[2] == DELTA_BASE_REUSE_FORCE
                         ):
                             # instructions are to forcibly consider/use this delta base
                             group.append(rev)
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
             def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 if snapshot_cache is None:
                     snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
                     yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if util.safehasattr(revlog.index, b'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo