upstream/mercurial-mirror Commit - r51331:02325712

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_FORCE,

23

DELTA_BASE_REUSE_FORCE,

24

DELTA_BASE_REUSE_NO,

24

DELTA_BASE_REUSE_NO,

25

KIND_CHANGELOG,

25

KIND_CHANGELOG,

26

KIND_FILELOG,

26

KIND_FILELOG,

27

KIND_MANIFESTLOG,

27

KIND_MANIFESTLOG,

28

REVIDX_ISCENSORED,

28

REVIDX_ISCENSORED,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

REVIDX_RAWTEXT_CHANGING_FLAGS,

30

)

30

)

31

32

from ..thirdparty import attr

32

from ..thirdparty import attr

33

34

from .. import (

34

from .. import (

35

error,

35

error,

36

mdiff,

36

mdiff,

37

util,

37

util,

38

)

38

)

39

40

from . import flagutil

40

from . import flagutil

41

42

# maximum <delta-chain-data>/<revision-text-length> ratio

42

# maximum <delta-chain-data>/<revision-text-length> ratio

43

LIMIT_DELTA2TEXT = 2

43

LIMIT_DELTA2TEXT = 2

44

45

46

class _testrevlog:

46

class _testrevlog:

47

"""minimalist fake revlog to use in doctests"""

47

"""minimalist fake revlog to use in doctests"""

48

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

50

"""data is an list of revision payload boundaries"""

50

"""data is an list of revision payload boundaries"""

51

self._data = data

51

self._data = data

52

self._srdensitythreshold = density

52

self._srdensitythreshold = density

53

self._srmingapsize = mingap

53

self._srmingapsize = mingap

54

self._snapshot = set(snapshot)

54

self._snapshot = set(snapshot)

55

self.index = None

55

self.index = None

56

57

def start(self, rev):

57

def start(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

if rev == 0:

60

if rev == 0:

61

return 0

61

return 0

62

return self._data[rev - 1]

62

return self._data[rev - 1]

63

64

def end(self, rev):

64

def end(self, rev):

65

if rev == nullrev:

65

if rev == nullrev:

66

return 0

66

return 0

67

return self._data[rev]

67

return self._data[rev]

68

69

def length(self, rev):

69

def length(self, rev):

70

return self.end(rev) - self.start(rev)

70

return self.end(rev) - self.start(rev)

71

72

def __len__(self):

72

def __len__(self):

73

return len(self._data)

73

return len(self._data)

74

75

def issnapshot(self, rev):

75

def issnapshot(self, rev):

76

if rev == nullrev:

76

if rev == nullrev:

77

return True

77

return True

78

return rev in self._snapshot

78

return rev in self._snapshot

79

80

81

def slicechunk(revlog, revs, targetsize=None):

81

def slicechunk(revlog, revs, targetsize=None):

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

82

"""slice revs to reduce the amount of unrelated data to be read from disk.

83

84

``revs`` is sliced into groups that should be read in one time.

84

``revs`` is sliced into groups that should be read in one time.

85

Assume that revs are sorted.

85

Assume that revs are sorted.

86

87

The initial chunk is sliced until the overall density (payload/chunks-span

87

The initial chunk is sliced until the overall density (payload/chunks-span

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

88

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

89

`revlog._srmingapsize` is skipped.

89

`revlog._srmingapsize` is skipped.

90

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

91

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

92

For consistency with other slicing choice, this limit won't go lower than

92

For consistency with other slicing choice, this limit won't go lower than

93

`revlog._srmingapsize`.

93

`revlog._srmingapsize`.

94

95

If individual revisions chunk are larger than this limit, they will still

95

If individual revisions chunk are larger than this limit, they will still

96

be raised individually.

96

be raised individually.

97

98

>>> data = [

98

>>> data = [

99

... 5, #00 (5)

99

... 5, #00 (5)

100

... 10, #01 (5)

100

... 10, #01 (5)

101

... 12, #02 (2)

101

... 12, #02 (2)

102

... 12, #03 (empty)

102

... 12, #03 (empty)

103

... 27, #04 (15)

103

... 27, #04 (15)

104

... 31, #05 (4)

104

... 31, #05 (4)

105

... 31, #06 (empty)

105

... 31, #06 (empty)

106

... 42, #07 (11)

106

... 42, #07 (11)

107

... 47, #08 (5)

107

... 47, #08 (5)

108

... 47, #09 (empty)

108

... 47, #09 (empty)

109

... 48, #10 (1)

109

... 48, #10 (1)

110

... 51, #11 (3)

110

... 51, #11 (3)

111

... 74, #12 (23)

111

... 74, #12 (23)

112

... 85, #13 (11)

112

... 85, #13 (11)

113

... 86, #14 (1)

113

... 86, #14 (1)

114

... 91, #15 (5)

114

... 91, #15 (5)

115

... ]

115

... ]

116

>>> revlog = _testrevlog(data, snapshot=range(16))

116

>>> revlog = _testrevlog(data, snapshot=range(16))

117

118

>>> list(slicechunk(revlog, list(range(16))))

118

>>> list(slicechunk(revlog, list(range(16))))

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

119

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

120

>>> list(slicechunk(revlog, [0, 15]))

120

>>> list(slicechunk(revlog, [0, 15]))

121

[[0], [15]]

121

[[0], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 15]))

123

[[0], [11], [15]]

123

[[0], [11], [15]]

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

124

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

125

[[0], [11, 13, 15]]

125

[[0], [11, 13, 15]]

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

126

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

127

[[1, 2], [5, 8, 10, 11], [14]]

127

[[1, 2], [5, 8, 10, 11], [14]]

128

129

Slicing with a maximum chunk size

129

Slicing with a maximum chunk size

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

131

[[0], [11], [13], [15]]

131

[[0], [11], [13], [15]]

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

132

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

133

[[0], [11], [13, 15]]

133

[[0], [11], [13, 15]]

134

135

Slicing involving nullrev

135

Slicing involving nullrev

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

136

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

137

[[-1, 0], [11], [13, 15]]

137

[[-1, 0], [11], [13, 15]]

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

138

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

139

[[-1], [13], [15]]

139

[[-1], [13], [15]]

140

"""

140

"""

141

if targetsize is not None:

141

if targetsize is not None:

142

targetsize = max(targetsize, revlog._srmingapsize)

142

targetsize = max(targetsize, revlog._srmingapsize)

143

# targetsize should not be specified when evaluating delta candidates:

143

# targetsize should not be specified when evaluating delta candidates:

144

# * targetsize is used to ensure we stay within specification when reading,

144

# * targetsize is used to ensure we stay within specification when reading,

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

145

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

146

if densityslicing is None:

146

if densityslicing is None:

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

147

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

148

for chunk in densityslicing(

148

for chunk in densityslicing(

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

149

revs, revlog._srdensitythreshold, revlog._srmingapsize

150

):

150

):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

151

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

152

yield subchunk

152

yield subchunk

153

154

155

def _slicechunktosize(revlog, revs, targetsize=None):

155

def _slicechunktosize(revlog, revs, targetsize=None):

156

"""slice revs to match the target size

156

"""slice revs to match the target size

157

158

This is intended to be used on chunk that density slicing selected by that

158

This is intended to be used on chunk that density slicing selected by that

159

are still too large compared to the read garantee of revlog. This might

159

are still too large compared to the read garantee of revlog. This might

160

happens when "minimal gap size" interrupted the slicing or when chain are

160

happens when "minimal gap size" interrupted the slicing or when chain are

161

built in a way that create large blocks next to each other.

161

built in a way that create large blocks next to each other.

162

163

>>> data = [

163

>>> data = [

164

... 3, #0 (3)

164

... 3, #0 (3)

165

... 5, #1 (2)

165

... 5, #1 (2)

166

... 6, #2 (1)

166

... 6, #2 (1)

167

... 8, #3 (2)

167

... 8, #3 (2)

168

... 8, #4 (empty)

168

... 8, #4 (empty)

169

... 11, #5 (3)

169

... 11, #5 (3)

170

... 12, #6 (1)

170

... 12, #6 (1)

171

... 13, #7 (1)

171

... 13, #7 (1)

172

... 14, #8 (1)

172

... 14, #8 (1)

173

... ]

173

... ]

174

175

== All snapshots cases ==

175

== All snapshots cases ==

176

>>> revlog = _testrevlog(data, snapshot=range(9))

176

>>> revlog = _testrevlog(data, snapshot=range(9))

177

178

Cases where chunk is already small enough

178

Cases where chunk is already small enough

179

>>> list(_slicechunktosize(revlog, [0], 3))

179

>>> list(_slicechunktosize(revlog, [0], 3))

180

[[0]]

180

[[0]]

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

181

>>> list(_slicechunktosize(revlog, [6, 7], 3))

182

[[6, 7]]

182

[[6, 7]]

183

>>> list(_slicechunktosize(revlog, [0], None))

183

>>> list(_slicechunktosize(revlog, [0], None))

184

[[0]]

184

[[0]]

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

185

>>> list(_slicechunktosize(revlog, [6, 7], None))

186

[[6, 7]]

186

[[6, 7]]

187

188

cases where we need actual slicing

188

cases where we need actual slicing

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

189

>>> list(_slicechunktosize(revlog, [0, 1], 3))

190

[[0], [1]]

190

[[0], [1]]

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 3], 3))

192

[[1], [3]]

192

[[1], [3]]

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

193

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

194

[[1, 2], [3]]

194

[[1, 2], [3]]

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

197

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

198

[[3], [5]]

198

[[3], [5]]

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

200

[[5], [6, 7, 8]]

200

[[5], [6, 7, 8]]

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

201

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

202

[[0], [1, 2], [3], [5], [6, 7, 8]]

203

204

Case with too large individual chunk (must return valid chunk)

204

Case with too large individual chunk (must return valid chunk)

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

205

>>> list(_slicechunktosize(revlog, [0, 1], 2))

206

[[0], [1]]

206

[[0], [1]]

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

207

>>> list(_slicechunktosize(revlog, [1, 3], 1))

208

[[1], [3]]

208

[[1], [3]]

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

209

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

210

[[3], [5]]

210

[[3], [5]]

211

212

== No Snapshot cases ==

212

== No Snapshot cases ==

213

>>> revlog = _testrevlog(data)

213

>>> revlog = _testrevlog(data)

214

215

Cases where chunk is already small enough

215

Cases where chunk is already small enough

216

>>> list(_slicechunktosize(revlog, [0], 3))

216

>>> list(_slicechunktosize(revlog, [0], 3))

217

[[0]]

217

[[0]]

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

218

>>> list(_slicechunktosize(revlog, [6, 7], 3))

219

[[6, 7]]

219

[[6, 7]]

220

>>> list(_slicechunktosize(revlog, [0], None))

220

>>> list(_slicechunktosize(revlog, [0], None))

221

[[0]]

221

[[0]]

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

222

>>> list(_slicechunktosize(revlog, [6, 7], None))

223

[[6, 7]]

223

[[6, 7]]

224

225

cases where we need actual slicing

225

cases where we need actual slicing

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

226

>>> list(_slicechunktosize(revlog, [0, 1], 3))

227

[[0], [1]]

227

[[0], [1]]

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 3], 3))

229

[[1], [3]]

229

[[1], [3]]

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

230

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

231

[[1], [2, 3]]

231

[[1], [2, 3]]

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 5], 3))

233

[[3], [5]]

233

[[3], [5]]

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

234

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

235

[[3], [4, 5]]

235

[[3], [4, 5]]

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

237

[[5], [6, 7, 8]]

237

[[5], [6, 7, 8]]

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

238

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

239

[[0], [1, 2], [3], [5], [6, 7, 8]]

240

241

Case with too large individual chunk (must return valid chunk)

241

Case with too large individual chunk (must return valid chunk)

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

242

>>> list(_slicechunktosize(revlog, [0, 1], 2))

243

[[0], [1]]

243

[[0], [1]]

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

244

>>> list(_slicechunktosize(revlog, [1, 3], 1))

245

[[1], [3]]

245

[[1], [3]]

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

246

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

247

[[3], [5]]

247

[[3], [5]]

248

249

== mixed case ==

249

== mixed case ==

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

250

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

251

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

252

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

253

"""

253

"""

254

assert targetsize is None or 0 <= targetsize

254

assert targetsize is None or 0 <= targetsize

255

startdata = revlog.start(revs[0])

255

startdata = revlog.start(revs[0])

256

enddata = revlog.end(revs[-1])

256

enddata = revlog.end(revs[-1])

257

fullspan = enddata - startdata

257

fullspan = enddata - startdata

258

if targetsize is None or fullspan <= targetsize:

258

if targetsize is None or fullspan <= targetsize:

259

yield revs

259

yield revs

260

return

260

return

261

262

startrevidx = 0

262

startrevidx = 0

263

endrevidx = 1

263

endrevidx = 1

264

iterrevs = enumerate(revs)

264

iterrevs = enumerate(revs)

265

next(iterrevs) # skip first rev.

265

next(iterrevs) # skip first rev.

266

# first step: get snapshots out of the way

266

# first step: get snapshots out of the way

267

for idx, r in iterrevs:

267

for idx, r in iterrevs:

268

span = revlog.end(r) - startdata

268

span = revlog.end(r) - startdata

269

snapshot = revlog.issnapshot(r)

269

snapshot = revlog.issnapshot(r)

270

if span <= targetsize and snapshot:

270

if span <= targetsize and snapshot:

271

endrevidx = idx + 1

271

endrevidx = idx + 1

272

else:

272

else:

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

274

if chunk:

274

if chunk:

275

yield chunk

275

yield chunk

276

startrevidx = idx

276

startrevidx = idx

277

startdata = revlog.start(r)

277

startdata = revlog.start(r)

278

endrevidx = idx + 1

278

endrevidx = idx + 1

279

if not snapshot:

279

if not snapshot:

280

break

280

break

281

282

# for the others, we use binary slicing to quickly converge toward valid

282

# for the others, we use binary slicing to quickly converge toward valid

283

# chunks (otherwise, we might end up looking for start/end of many

283

# chunks (otherwise, we might end up looking for start/end of many

284

# revisions). This logic is not looking for the perfect slicing point, it

284

# revisions). This logic is not looking for the perfect slicing point, it

285

# focuses on quickly converging toward valid chunks.

285

# focuses on quickly converging toward valid chunks.

286

nbitem = len(revs)

286

nbitem = len(revs)

287

while (enddata - startdata) > targetsize:

287

while (enddata - startdata) > targetsize:

288

endrevidx = nbitem

288

endrevidx = nbitem

289

if nbitem - startrevidx <= 1:

289

if nbitem - startrevidx <= 1:

290

break # protect against individual chunk larger than limit

290

break # protect against individual chunk larger than limit

291

localenddata = revlog.end(revs[endrevidx - 1])

291

localenddata = revlog.end(revs[endrevidx - 1])

292

span = localenddata - startdata

292

span = localenddata - startdata

293

while span > targetsize:

293

while span > targetsize:

294

if endrevidx - startrevidx <= 1:

294

if endrevidx - startrevidx <= 1:

295

break # protect against individual chunk larger than limit

295

break # protect against individual chunk larger than limit

296

endrevidx -= (endrevidx - startrevidx) // 2

296

endrevidx -= (endrevidx - startrevidx) // 2

297

localenddata = revlog.end(revs[endrevidx - 1])

297

localenddata = revlog.end(revs[endrevidx - 1])

298

span = localenddata - startdata

298

span = localenddata - startdata

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

299

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

300

if chunk:

300

if chunk:

301

yield chunk

301

yield chunk

302

startrevidx = endrevidx

302

startrevidx = endrevidx

303

startdata = revlog.start(revs[startrevidx])

303

startdata = revlog.start(revs[startrevidx])

304

305

chunk = _trimchunk(revlog, revs, startrevidx)

305

chunk = _trimchunk(revlog, revs, startrevidx)

306

if chunk:

306

if chunk:

307

yield chunk

307

yield chunk

308

309

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

310

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

311

"""slice revs to reduce the amount of unrelated data to be read from disk.

312

313

``revs`` is sliced into groups that should be read in one time.

313

``revs`` is sliced into groups that should be read in one time.

314

Assume that revs are sorted.

314

Assume that revs are sorted.

315

316

The initial chunk is sliced until the overall density (payload/chunks-span

316

The initial chunk is sliced until the overall density (payload/chunks-span

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

317

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

318

skipped.

318

skipped.

319

320

>>> revlog = _testrevlog([

320

>>> revlog = _testrevlog([

321

... 5, #00 (5)

321

... 5, #00 (5)

322

... 10, #01 (5)

322

... 10, #01 (5)

323

... 12, #02 (2)

323

... 12, #02 (2)

324

... 12, #03 (empty)

324

... 12, #03 (empty)

325

... 27, #04 (15)

325

... 27, #04 (15)

326

... 31, #05 (4)

326

... 31, #05 (4)

327

... 31, #06 (empty)

327

... 31, #06 (empty)

328

... 42, #07 (11)

328

... 42, #07 (11)

329

... 47, #08 (5)

329

... 47, #08 (5)

330

... 47, #09 (empty)

330

... 47, #09 (empty)

331

... 48, #10 (1)

331

... 48, #10 (1)

332

... 51, #11 (3)

332

... 51, #11 (3)

333

... 74, #12 (23)

333

... 74, #12 (23)

334

... 85, #13 (11)

334

... 85, #13 (11)

335

... 86, #14 (1)

335

... 86, #14 (1)

336

... 91, #15 (5)

336

... 91, #15 (5)

337

... ])

337

... ])

338

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

339

>>> list(_slicechunktodensity(revlog, list(range(16))))

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

340

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 15]))

342

[[0], [15]]

342

[[0], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

344

[[0], [11], [15]]

344

[[0], [11], [15]]

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

345

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

346

[[0], [11, 13, 15]]

346

[[0], [11, 13, 15]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

348

[[1, 2], [5, 8, 10, 11], [14]]

348

[[1, 2], [5, 8, 10, 11], [14]]

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

... mingapsize=20))

350

... mingapsize=20))

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

351

[[1, 2, 3, 5, 8, 10, 11], [14]]

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

... targetdensity=0.95))

353

... targetdensity=0.95))

354

[[1, 2], [5], [8, 10, 11], [14]]

354

[[1, 2], [5], [8, 10, 11], [14]]

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

356

... targetdensity=0.95, mingapsize=12))

356

... targetdensity=0.95, mingapsize=12))

357

[[1, 2], [5, 8, 10, 11], [14]]

357

[[1, 2], [5, 8, 10, 11], [14]]

358

"""

358

"""

359

start = revlog.start

359

start = revlog.start

360

length = revlog.length

360

length = revlog.length

361

362

if len(revs) <= 1:

362

if len(revs) <= 1:

363

yield revs

363

yield revs

364

return

364

return

365

366

deltachainspan = segmentspan(revlog, revs)

366

deltachainspan = segmentspan(revlog, revs)

367

368

if deltachainspan < mingapsize:

368

if deltachainspan < mingapsize:

369

yield revs

369

yield revs

370

return

370

return

371

372

readdata = deltachainspan

372

readdata = deltachainspan

373

chainpayload = sum(length(r) for r in revs)

373

chainpayload = sum(length(r) for r in revs)

374

375

if deltachainspan:

375

if deltachainspan:

376

density = chainpayload / float(deltachainspan)

376

density = chainpayload / float(deltachainspan)

377

else:

377

else:

378

density = 1.0

378

density = 1.0

379

380

if density >= targetdensity:

380

if density >= targetdensity:

381

yield revs

381

yield revs

382

return

382

return

383

384

# Store the gaps in a heap to have them sorted by decreasing size

384

# Store the gaps in a heap to have them sorted by decreasing size

385

gaps = []

385

gaps = []

386

prevend = None

386

prevend = None

387

for i, rev in enumerate(revs):

387

for i, rev in enumerate(revs):

388

revstart = start(rev)

388

revstart = start(rev)

389

revlen = length(rev)

389

revlen = length(rev)

390

391

# Skip empty revisions to form larger holes

391

# Skip empty revisions to form larger holes

392

if revlen == 0:

392

if revlen == 0:

393

continue

393

continue

394

395

if prevend is not None:

395

if prevend is not None:

396

gapsize = revstart - prevend

396

gapsize = revstart - prevend

397

# only consider holes that are large enough

397

# only consider holes that are large enough

398

if gapsize > mingapsize:

398

if gapsize > mingapsize:

399

gaps.append((gapsize, i))

399

gaps.append((gapsize, i))

400

401

prevend = revstart + revlen

401

prevend = revstart + revlen

402

# sort the gaps to pop them from largest to small

402

# sort the gaps to pop them from largest to small

403

gaps.sort()

403

gaps.sort()

404

405

# Collect the indices of the largest holes until the density is acceptable

405

# Collect the indices of the largest holes until the density is acceptable

406

selected = []

406

selected = []

407

while gaps and density < targetdensity:

407

while gaps and density < targetdensity:

408

gapsize, gapidx = gaps.pop()

408

gapsize, gapidx = gaps.pop()

409

410

selected.append(gapidx)

410

selected.append(gapidx)

411

412

# the gap sizes are stored as negatives to be sorted decreasingly

412

# the gap sizes are stored as negatives to be sorted decreasingly

413

# by the heap

413

# by the heap

414

readdata -= gapsize

414

readdata -= gapsize

415

if readdata > 0:

415

if readdata > 0:

416

density = chainpayload / float(readdata)

416

density = chainpayload / float(readdata)

417

else:

417

else:

418

density = 1.0

418

density = 1.0

419

selected.sort()

419

selected.sort()

420

421

# Cut the revs at collected indices

421

# Cut the revs at collected indices

422

previdx = 0

422

previdx = 0

423

for idx in selected:

423

for idx in selected:

424

425

chunk = _trimchunk(revlog, revs, previdx, idx)

425

chunk = _trimchunk(revlog, revs, previdx, idx)

426

if chunk:

426

if chunk:

427

yield chunk

427

yield chunk

428

429

previdx = idx

429

previdx = idx

430

431

chunk = _trimchunk(revlog, revs, previdx)

431

chunk = _trimchunk(revlog, revs, previdx)

432

if chunk:

432

if chunk:

433

yield chunk

433

yield chunk

434

435

436

def _trimchunk(revlog, revs, startidx, endidx=None):

436

def _trimchunk(revlog, revs, startidx, endidx=None):

437

"""returns revs[startidx:endidx] without empty trailing revs

437

"""returns revs[startidx:endidx] without empty trailing revs

438

439

Doctest Setup

439

Doctest Setup

440

>>> revlog = _testrevlog([

440

>>> revlog = _testrevlog([

441

... 5, #0

441

... 5, #0

442

... 10, #1

442

... 10, #1

443

... 12, #2

443

... 12, #2

444

... 12, #3 (empty)

444

... 12, #3 (empty)

445

... 17, #4

445

... 17, #4

446

... 21, #5

446

... 21, #5

447

... 21, #6 (empty)

447

... 21, #6 (empty)

448

... ])

448

... ])

449

450

Contiguous cases:

450

Contiguous cases:

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

452

[0, 1, 2, 3, 4, 5]

452

[0, 1, 2, 3, 4, 5]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

454

[0, 1, 2, 3, 4]

454

[0, 1, 2, 3, 4]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

456

[0, 1, 2]

456

[0, 1, 2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

458

[2]

458

[2]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

460

[3, 4, 5]

460

[3, 4, 5]

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

461

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

462

[3, 4]

462

[3, 4]

463

464

Discontiguous cases:

464

Discontiguous cases:

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

466

[1, 3, 5]

466

[1, 3, 5]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

468

[1]

468

[1]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

470

[3, 5]

470

[3, 5]

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

471

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

472

[3, 5]

472

[3, 5]

473

"""

473

"""

474

length = revlog.length

474

length = revlog.length

475

476

if endidx is None:

476

if endidx is None:

477

endidx = len(revs)

477

endidx = len(revs)

478

479

# If we have a non-emtpy delta candidate, there are nothing to trim

479

# If we have a non-emtpy delta candidate, there are nothing to trim

480

if revs[endidx - 1] < len(revlog):

480

if revs[endidx - 1] < len(revlog):

481

# Trim empty revs at the end, except the very first revision of a chain

481

# Trim empty revs at the end, except the very first revision of a chain

482

while (

482

while (

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

483

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

484

):

484

):

485

endidx -= 1

485

endidx -= 1

486

487

return revs[startidx:endidx]

487

return revs[startidx:endidx]

488

489

490

def segmentspan(revlog, revs):

490

def segmentspan(revlog, revs):

491

"""Get the byte span of a segment of revisions

491

"""Get the byte span of a segment of revisions

492

493

revs is a sorted array of revision numbers

493

revs is a sorted array of revision numbers

494

495

>>> revlog = _testrevlog([

495

>>> revlog = _testrevlog([

496

... 5, #0

496

... 5, #0

497

... 10, #1

497

... 10, #1

498

... 12, #2

498

... 12, #2

499

... 12, #3 (empty)

499

... 12, #3 (empty)

500

... 17, #4

500

... 17, #4

501

... ])

501

... ])

502

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

503

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

504

17

504

17

505

>>> segmentspan(revlog, [0, 4])

505

>>> segmentspan(revlog, [0, 4])

506

17

506

17

507

>>> segmentspan(revlog, [3, 4])

507

>>> segmentspan(revlog, [3, 4])

508

5

508

5

509

>>> segmentspan(revlog, [1, 2, 3,])

509

>>> segmentspan(revlog, [1, 2, 3,])

510

7

510

7

511

>>> segmentspan(revlog, [1, 3])

511

>>> segmentspan(revlog, [1, 3])

512

7

512

7

513

"""

513

"""

514

if not revs:

514

if not revs:

515

return 0

515

return 0

516

end = revlog.end(revs[-1])

516

end = revlog.end(revs[-1])

517

return end - revlog.start(revs[0])

517

return end - revlog.start(revs[0])

518

519

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

520

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

521

"""build full text from a (base, delta) pair and other metadata"""

521

"""build full text from a (base, delta) pair and other metadata"""

522

# special case deltas which replace entire base; no need to decode

522

# special case deltas which replace entire base; no need to decode

523

# base revision. this neatly avoids censored bases, which throw when

523

# base revision. this neatly avoids censored bases, which throw when

524

# they're decoded.

524

# they're decoded.

525

hlen = struct.calcsize(b">lll")

525

hlen = struct.calcsize(b">lll")

526

if delta[:hlen] == mdiff.replacediffheader(

526

if delta[:hlen] == mdiff.replacediffheader(

527

revlog.rawsize(baserev), len(delta) - hlen

527

revlog.rawsize(baserev), len(delta) - hlen

528

):

528

):

529

fulltext = delta[hlen:]

529

fulltext = delta[hlen:]

530

else:

530

else:

531

# deltabase is rawtext before changed by flag processors, which is

531

# deltabase is rawtext before changed by flag processors, which is

532

# equivalent to non-raw text

532

# equivalent to non-raw text

533

basetext = revlog.revision(baserev, _df=fh)

533

basetext = revlog.revision(baserev, _df=fh)

534

fulltext = mdiff.patch(basetext, delta)

534

fulltext = mdiff.patch(basetext, delta)

535

536

try:

536

try:

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

537

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

538

if validatehash:

538

if validatehash:

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

539

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

540

if flags & REVIDX_ISCENSORED:

540

if flags & REVIDX_ISCENSORED:

541

raise error.StorageError(

541

raise error.StorageError(

542

_(b'node %s is not censored') % expectednode

542

_(b'node %s is not censored') % expectednode

543

)

543

)

544

except error.CensoredNodeError:

544

except error.CensoredNodeError:

545

# must pass the censored index flag to add censored revisions

545

# must pass the censored index flag to add censored revisions

546

if not flags & REVIDX_ISCENSORED:

546

if not flags & REVIDX_ISCENSORED:

547

raise

547

raise

548

return fulltext

548

return fulltext

549

550

551

@attr.s(slots=True, frozen=True)

551

@attr.s(slots=True, frozen=True)

552

class _deltainfo:

552

class _deltainfo:

553

distance = attr.ib()

553

distance = attr.ib()

554

deltalen = attr.ib()

554

deltalen = attr.ib()

555

data = attr.ib()

555

data = attr.ib()

556

base = attr.ib()

556

base = attr.ib()

557

chainbase = attr.ib()

557

chainbase = attr.ib()

558

chainlen = attr.ib()

558

chainlen = attr.ib()

559

compresseddeltalen = attr.ib()

559

compresseddeltalen = attr.ib()

560

snapshotdepth = attr.ib()

560

snapshotdepth = attr.ib()

561

562

563

def drop_u_compression(delta):

563

def drop_u_compression(delta):

564

"""turn into a "u" (no-compression) into no-compression without header

564

"""turn into a "u" (no-compression) into no-compression without header

565

566

This is useful for revlog format that has better compression method.

566

This is useful for revlog format that has better compression method.

567

"""

567

"""

568

assert delta.data[0] == b'u', delta.data[0]

568

assert delta.data[0] == b'u', delta.data[0]

569

return _deltainfo(

569

return _deltainfo(

570

delta.distance,

570

delta.distance,

571

delta.deltalen - 1,

571

delta.deltalen - 1,

572

(b'', delta.data[1]),

572

(b'', delta.data[1]),

573

delta.base,

573

delta.base,

574

delta.chainbase,

574

delta.chainbase,

575

delta.chainlen,

575

delta.chainlen,

576

delta.compresseddeltalen,

576

delta.compresseddeltalen,

577

delta.snapshotdepth,

577

delta.snapshotdepth,

578

)

578

)

579

580

581

def is_good_delta_info(revlog, deltainfo, revinfo):

581

def is_good_delta_info(revlog, deltainfo, revinfo):

582

"""Returns True if the given delta is good. Good means that it is within

582

"""Returns True if the given delta is good. Good means that it is within

583

the disk span, disk size, and chain length bounds that we know to be

583

the disk span, disk size, and chain length bounds that we know to be

584

performant."""

584

performant."""

585

if deltainfo is None:

585

if deltainfo is None:

586

return False

586

return False

587

588

if (

588

if (

589

revinfo.cachedelta is not None

589

revinfo.cachedelta is not None

590

and deltainfo.base == revinfo.cachedelta[0]

590

and deltainfo.base == revinfo.cachedelta[0]

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

591

and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE

592

):

592

):

593

return True

593

return True

594

595

# - 'deltainfo.distance' is the distance from the base revision --

595

# - 'deltainfo.distance' is the distance from the base revision --

596

# bounding it limits the amount of I/O we need to do.

596

# bounding it limits the amount of I/O we need to do.

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

597

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

598

# deltas we need to apply -- bounding it limits the amount of CPU

598

# deltas we need to apply -- bounding it limits the amount of CPU

599

# we consume.

599

# we consume.

600

601

textlen = revinfo.textlen

601

textlen = revinfo.textlen

602

defaultmax = textlen * 4

602

defaultmax = textlen * 4

603

maxdist = revlog._maxdeltachainspan

603

maxdist = revlog._maxdeltachainspan

604

if not maxdist:

604

if not maxdist:

605

maxdist = deltainfo.distance # ensure the conditional pass

605

maxdist = deltainfo.distance # ensure the conditional pass

606

maxdist = max(maxdist, defaultmax)

606

maxdist = max(maxdist, defaultmax)

607

608

# Bad delta from read span:

608

# Bad delta from read span:

609

#

609

#

610

# If the span of data read is larger than the maximum allowed.

610

# If the span of data read is larger than the maximum allowed.

611

#

611

#

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

612

# In the sparse-revlog case, we rely on the associated "sparse reading"

613

# to avoid issue related to the span of data. In theory, it would be

613

# to avoid issue related to the span of data. In theory, it would be

614

# possible to build pathological revlog where delta pattern would lead

614

# possible to build pathological revlog where delta pattern would lead

615

# to too many reads. However, they do not happen in practice at all. So

615

# to too many reads. However, they do not happen in practice at all. So

616

# we skip the span check entirely.

616

# we skip the span check entirely.

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

617

if not revlog._sparserevlog and maxdist < deltainfo.distance:

618

return False

618

return False

619

620

# Bad delta from new delta size:

620

# Bad delta from new delta size:

621

#

621

#

622

# If the delta size is larger than the target text, storing the

622

# If the delta size is larger than the target text, storing the

623

# delta will be inefficient.

623

# delta will be inefficient.

624

if textlen < deltainfo.deltalen:

624

if textlen < deltainfo.deltalen:

625

return False

625

return False

626

627

# Bad delta from cumulated payload size:

627

# Bad delta from cumulated payload size:

628

#

628

#

629

# If the sum of delta get larger than K * target text length.

629

# If the sum of delta get larger than K * target text length.

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

630

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

631

return False

631

return False

632

633

# Bad delta from chain length:

633

# Bad delta from chain length:

634

#

634

#

635

# If the number of delta in the chain gets too high.

635

# If the number of delta in the chain gets too high.

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

636

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

637

return False

637

return False

638

639

# bad delta from intermediate snapshot size limit

639

# bad delta from intermediate snapshot size limit

640

#

640

#

641

# If an intermediate snapshot size is higher than the limit. The

641

# If an intermediate snapshot size is higher than the limit. The

642

# limit exist to prevent endless chain of intermediate delta to be

642

# limit exist to prevent endless chain of intermediate delta to be

643

# created.

643

# created.

644

if (

644

if (

645

deltainfo.snapshotdepth is not None

645

deltainfo.snapshotdepth is not None

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

646

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

647

):

647

):

648

return False

648

return False

649

650

# bad delta if new intermediate snapshot is larger than the previous

650

# bad delta if new intermediate snapshot is larger than the previous

651

# snapshot

651

# snapshot

652

if (

652

if (

653

deltainfo.snapshotdepth

653

deltainfo.snapshotdepth

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

654

and revlog.length(deltainfo.base) < deltainfo.deltalen

655

):

655

):

656

return False

656

return False

657

658

return True

658

return True

659

660

661

# If a revision's full text is that much bigger than a base candidate full

661

# If a revision's full text is that much bigger than a base candidate full

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

662

# text's, it is very unlikely that it will produce a valid delta. We no longer

663

# consider these candidates.

663

# consider these candidates.

664

LIMIT_BASE2TEXT = 500

664

LIMIT_BASE2TEXT = 500

665

666

667

def _candidategroups(

667

def _candidategroups(

668

revlog,

668

revlog,

669

textlen,

669

textlen,

670

p1,

670

p1,

671

p2,

671

p2,

672

cachedelta,

672

cachedelta,

673

excluded_bases=None,

673

excluded_bases=None,

674

target_rev=None,

674

target_rev=None,

675

snapshot_cache=None,

675

snapshot_cache=None,

676

):

676

):

677

"""Provides group of revision to be tested as delta base

677

"""Provides group of revision to be tested as delta base

678

679

This top level function focus on emitting groups with unique and worthwhile

679

This top level function focus on emitting groups with unique and worthwhile

680

content. See _raw_candidate_groups for details about the group order.

680

content. See _raw_candidate_groups for details about the group order.

681

"""

681

"""

682

# should we try to build a delta?

682

# should we try to build a delta?

683

if not (len(revlog) and revlog._storedeltachains):

683

if not (len(revlog) and revlog._storedeltachains):

684

yield None

684

yield None

685

return

685

return

686

687

if target_rev is None:

687

if target_rev is None:

688

target_rev = len(revlog)

688

target_rev = len(revlog)

689

690

if not revlog._generaldelta:

691

# before general delta, there is only one possible delta base

692

yield (target_rev - 1,)

693

yield None

694

return

695

690

if (

696

if (

691

cachedelta is not None

697

cachedelta is not None

692

and nullrev == cachedelta[0]

698

and nullrev == cachedelta[0]

693

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

699

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

694

):

700

):

695

# instruction are to forcibly do a full snapshot

701

# instruction are to forcibly do a full snapshot

696

yield None

702

yield None

697

return

703

return

698

704

699

deltalength = revlog.length

705

deltalength = revlog.length

700

deltaparent = revlog.deltaparent

706

deltaparent = revlog.deltaparent

701

sparse = revlog._sparserevlog

707

sparse = revlog._sparserevlog

702

good = None

708

good = None

703

709

704

deltas_limit = textlen * LIMIT_DELTA2TEXT

710

deltas_limit = textlen * LIMIT_DELTA2TEXT

705

group_chunk_size = revlog._candidate_group_chunk_size

711

group_chunk_size = revlog._candidate_group_chunk_size

706

712

707

tested = {nullrev}

713

tested = {nullrev}

708

candidates = _refinedgroups(

714

candidates = _refinedgroups(

709

revlog,

715

revlog,

710

p1,

716

p1,

711

p2,

717

p2,

712

cachedelta,

718

cachedelta,

713

snapshot_cache=snapshot_cache,

719

snapshot_cache=snapshot_cache,

714

)

720

)

715

while True:

721

while True:

716

temptative = candidates.send(good)

722

temptative = candidates.send(good)

717

if temptative is None:

723

if temptative is None:

718

break

724

break

719

group = []

725

group = []

720

for rev in temptative:

726

for rev in temptative:

721

# skip over empty delta (no need to include them in a chain)

727

# skip over empty delta (no need to include them in a chain)

722

while revlog._generaldelta and not (

728

while revlog._generaldelta and not (

723

rev == nullrev or rev in tested or deltalength(rev)

729

rev == nullrev or rev in tested or deltalength(rev)

724

):

730

):

725

tested.add(rev)

731

tested.add(rev)

726

rev = deltaparent(rev)

732

rev = deltaparent(rev)

727

# no need to try a delta against nullrev, this will be done as a

733

# no need to try a delta against nullrev, this will be done as a

728

# last resort.

734

# last resort.

729

if rev == nullrev:

735

if rev == nullrev:

730

continue

736

continue

731

# filter out revision we tested already

737

# filter out revision we tested already

732

if rev in tested:

738

if rev in tested:

733

continue

739

continue

734

740

735

if (

741

if (

736

cachedelta is not None

742

cachedelta is not None

737

and rev == cachedelta[0]

743

and rev == cachedelta[0]

738

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

744

and cachedelta[2] == DELTA_BASE_REUSE_FORCE

739

):

745

):

740

# instructions are to forcibly consider/use this delta base

746

# instructions are to forcibly consider/use this delta base

741

group.append(rev)

747

group.append(rev)

742

continue

748

continue

743

749

744

# an higher authority deamed the base unworthy (e.g. censored)

750

# an higher authority deamed the base unworthy (e.g. censored)

745

if excluded_bases is not None and rev in excluded_bases:

751

if excluded_bases is not None and rev in excluded_bases:

746

tested.add(rev)

752

tested.add(rev)

747

continue

753

continue

748

# We are in some recomputation cases and that rev is too high in

754

# We are in some recomputation cases and that rev is too high in

749

# the revlog

755

# the revlog

750

if target_rev is not None and rev >= target_rev:

756

if target_rev is not None and rev >= target_rev:

751

tested.add(rev)

757

tested.add(rev)

752

continue

758

continue

753

# filter out delta base that will never produce good delta

759

# filter out delta base that will never produce good delta

754

if deltas_limit < revlog.length(rev):

760

if deltas_limit < revlog.length(rev):

755

tested.add(rev)

761

tested.add(rev)

756

continue

762

continue

757

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

763

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

758

tested.add(rev)

764

tested.add(rev)

759

continue

765

continue

760

# no delta for rawtext-changing revs (see "candelta" for why)

766

# no delta for rawtext-changing revs (see "candelta" for why)

761

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

767

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

762

tested.add(rev)

768

tested.add(rev)

763

continue

769

continue

764

770

765

# If we reach here, we are about to build and test a delta.

771

# If we reach here, we are about to build and test a delta.

766

# The delta building process will compute the chaininfo in all

772

# The delta building process will compute the chaininfo in all

767

# case, since that computation is cached, it is fine to access it

773

# case, since that computation is cached, it is fine to access it

768

# here too.

774

# here too.

769

chainlen, chainsize = revlog._chaininfo(rev)

775

chainlen, chainsize = revlog._chaininfo(rev)

770

# if chain will be too long, skip base

776

# if chain will be too long, skip base

771

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

777

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

772

tested.add(rev)

778

tested.add(rev)

773

continue

779

continue

774

# if chain already have too much data, skip base

780

# if chain already have too much data, skip base

775

if deltas_limit < chainsize:

781

if deltas_limit < chainsize:

776

tested.add(rev)

782

tested.add(rev)

777

continue

783

continue

778

if sparse and revlog.upperboundcomp is not None:

784

if sparse and revlog.upperboundcomp is not None:

779

maxcomp = revlog.upperboundcomp

785

maxcomp = revlog.upperboundcomp

780

basenotsnap = (p1, p2, nullrev)

786

basenotsnap = (p1, p2, nullrev)

781

if rev not in basenotsnap and revlog.issnapshot(rev):

787

if rev not in basenotsnap and revlog.issnapshot(rev):

782

snapshotdepth = revlog.snapshotdepth(rev)

788

snapshotdepth = revlog.snapshotdepth(rev)

783

# If text is significantly larger than the base, we can

789

# If text is significantly larger than the base, we can

784

# expect the resulting delta to be proportional to the size

790

# expect the resulting delta to be proportional to the size

785

# difference

791

# difference

786

revsize = revlog.rawsize(rev)

792

revsize = revlog.rawsize(rev)

787

rawsizedistance = max(textlen - revsize, 0)

793

rawsizedistance = max(textlen - revsize, 0)

788

# use an estimate of the compression upper bound.

794

# use an estimate of the compression upper bound.

789

lowestrealisticdeltalen = rawsizedistance // maxcomp

795

lowestrealisticdeltalen = rawsizedistance // maxcomp

790

796

791

# check the absolute constraint on the delta size

797

# check the absolute constraint on the delta size

792

snapshotlimit = textlen >> snapshotdepth

798

snapshotlimit = textlen >> snapshotdepth

793

if snapshotlimit < lowestrealisticdeltalen:

799

if snapshotlimit < lowestrealisticdeltalen:

794

# delta lower bound is larger than accepted upper bound

800

# delta lower bound is larger than accepted upper bound

795

tested.add(rev)

801

tested.add(rev)

796

continue

802

continue

797

803

798

# check the relative constraint on the delta size

804

# check the relative constraint on the delta size

799

revlength = revlog.length(rev)

805

revlength = revlog.length(rev)

800

if revlength < lowestrealisticdeltalen:

806

if revlength < lowestrealisticdeltalen:

801

# delta probable lower bound is larger than target base

807

# delta probable lower bound is larger than target base

802

tested.add(rev)

808

tested.add(rev)

803

continue

809

continue

804

810

805

group.append(rev)

811

group.append(rev)

806

if group:

812

if group:

807

# When the size of the candidate group is big, it can result in a

813

# When the size of the candidate group is big, it can result in a

808

# quite significant performance impact. To reduce this, we can send

814

# quite significant performance impact. To reduce this, we can send

809

# them in smaller batches until the new batch does not provide any

815

# them in smaller batches until the new batch does not provide any

810

# improvements.

816

# improvements.

811

#

817

#

812

# This might reduce the overall efficiency of the compression in

818

# This might reduce the overall efficiency of the compression in

813

# some corner cases, but that should also prevent very pathological

819

# some corner cases, but that should also prevent very pathological

814

# cases from being an issue. (eg. 20 000 candidates).

820

# cases from being an issue. (eg. 20 000 candidates).

815

#

821

#

816

# XXX note that the ordering of the group becomes important as it

822

# XXX note that the ordering of the group becomes important as it

817

# now impacts the final result. The current order is unprocessed

823

# now impacts the final result. The current order is unprocessed

818

# and can be improved.

824

# and can be improved.

819

if group_chunk_size == 0:

825

if group_chunk_size == 0:

820

tested.update(group)

826

tested.update(group)

821

good = yield tuple(group)

827

good = yield tuple(group)

822

else:

828

else:

823

prev_good = good

829

prev_good = good

824

for start in range(0, len(group), group_chunk_size):

830

for start in range(0, len(group), group_chunk_size):

825

sub_group = group[start : start + group_chunk_size]

831

sub_group = group[start : start + group_chunk_size]

826

tested.update(sub_group)

832

tested.update(sub_group)

827

good = yield tuple(sub_group)

833

good = yield tuple(sub_group)

828

if prev_good == good:

834

if prev_good == good:

829

break

835

break

830

836

831

yield None

837

yield None

832

838

833

839

834

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

840

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

835

good = None

841

good = None

836

# First we try to reuse a the delta contained in the bundle.

842

# First we try to reuse a the delta contained in the bundle.

837

# (or from the source revlog)

843

# (or from the source revlog)

838

#

844

#

839

# This logic only applies to general delta repositories and can be disabled

845

# This logic only applies to general delta repositories and can be disabled

840

# through configuration. Disabling reuse source delta is useful when

846

# through configuration. Disabling reuse source delta is useful when

841

# we want to make sure we recomputed "optimal" deltas.

847

# we want to make sure we recomputed "optimal" deltas.

842

debug_info = None

848

debug_info = None

843

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

849

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

844

# Assume what we received from the server is a good choice

850

# Assume what we received from the server is a good choice

845

# build delta will reuse the cache

851

# build delta will reuse the cache

846

if debug_info is not None:

852

if debug_info is not None:

847

debug_info['cached-delta.tested'] += 1

853

debug_info['cached-delta.tested'] += 1

848

good = yield (cachedelta[0],)

854

good = yield (cachedelta[0],)

849

if good is not None:

855

if good is not None:

850

if debug_info is not None:

856

if debug_info is not None:

851

debug_info['cached-delta.accepted'] += 1

857

debug_info['cached-delta.accepted'] += 1

852

yield None

858

yield None

853

return

859

return

854

if snapshot_cache is None:

860

if snapshot_cache is None:

855

snapshot_cache = SnapshotCache()

861

snapshot_cache = SnapshotCache()

856

groups = _rawgroups(

862

groups = _rawgroups(

857

revlog,

863

revlog,

858

p1,

864

p1,

859

p2,

865

p2,

860

cachedelta,

866

cachedelta,

861

snapshot_cache,

867

snapshot_cache,

862

)

868

)

863

for candidates in groups:

869

for candidates in groups:

864

good = yield candidates

870

good = yield candidates

865

if good is not None:

871

if good is not None:

866

break

872

break

867

873

868

# If sparse revlog is enabled, we can try to refine the available deltas

874

# If sparse revlog is enabled, we can try to refine the available deltas

869

if not revlog._sparserevlog:

875

if not revlog._sparserevlog:

870

yield None

876

yield None

871

return

877

return

872

878

873

# if we have a refinable value, try to refine it

879

# if we have a refinable value, try to refine it

874

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

880

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

875

# refine snapshot down

881

# refine snapshot down

876

previous = None

882

previous = None

877

while previous != good:

883

while previous != good:

878

previous = good

884

previous = good

879

base = revlog.deltaparent(good)

885

base = revlog.deltaparent(good)

880

if base == nullrev:

886

if base == nullrev:

881

break

887

break

882

good = yield (base,)

888

good = yield (base,)

883

# refine snapshot up

889

# refine snapshot up

884

if not snapshot_cache.snapshots:

890

if not snapshot_cache.snapshots:

885

snapshot_cache.update(revlog, good + 1)

891

snapshot_cache.update(revlog, good + 1)

886

previous = None

892

previous = None

887

while good != previous:

893

while good != previous:

888

previous = good

894

previous = good

889

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

895

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

890

good = yield children

896

good = yield children

891

897

892

if debug_info is not None:

898

if debug_info is not None:

893

if good is None:

899

if good is None:

894

debug_info['no-solution'] += 1

900

debug_info['no-solution'] += 1

895

901

896

yield None

902

yield None

897

903

898

904

899

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

905

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

900

"""Provides group of revision to be tested as delta base

906

"""Provides group of revision to be tested as delta base

901

907

902

This lower level function focus on emitting delta theorically interresting

908

This lower level function focus on emitting delta theorically interresting

903

without looking it any practical details.

909

without looking it any practical details.

904

910

905

The group order aims at providing fast or small candidates first.

911

The group order aims at providing fast or small candidates first.

906

"""

912

"""

907

gdelta = revlog._generaldelta

913

gdelta = revlog._generaldelta

908

# gate sparse behind general-delta because of issue6056

914

# gate sparse behind general-delta because of issue6056

909

sparse = gdelta and revlog._sparserevlog

915

sparse = gdelta and revlog._sparserevlog

910

curr = len(revlog)

916

curr = len(revlog)

911

prev = curr - 1

917

prev = curr - 1

912

deltachain = lambda rev: revlog._deltachain(rev)[0]

918

deltachain = lambda rev: revlog._deltachain(rev)[0]

913

919

914

if gdelta:

920

if gdelta:

915

# exclude already lazy tested base if any

921

# exclude already lazy tested base if any

916

parents = [p for p in (p1, p2) if p != nullrev]

922

parents = [p for p in (p1, p2) if p != nullrev]

917

923

918

if not revlog._deltabothparents and len(parents) == 2:

924

if not revlog._deltabothparents and len(parents) == 2:

919

parents.sort()

925

parents.sort()

920

# To minimize the chance of having to build a fulltext,

926

# To minimize the chance of having to build a fulltext,

921

# pick first whichever parent is closest to us (max rev)

927

# pick first whichever parent is closest to us (max rev)

922

yield (parents[1],)

928

yield (parents[1],)

923

# then the other one (min rev) if the first did not fit

929

# then the other one (min rev) if the first did not fit

924

yield (parents[0],)

930

yield (parents[0],)

925

elif len(parents) > 0:

931

elif len(parents) > 0:

926

# Test all parents (1 or 2), and keep the best candidate

932

# Test all parents (1 or 2), and keep the best candidate

927

yield parents

933

yield parents

928

934

929

if sparse and parents:

935

if sparse and parents:

930

if snapshot_cache is None:

936

if snapshot_cache is None:

931

# map: base-rev: [snapshot-revs]

937

# map: base-rev: [snapshot-revs]

932

snapshot_cache = SnapshotCache()

938

snapshot_cache = SnapshotCache()

933

# See if we can use an existing snapshot in the parent chains to use as

939

# See if we can use an existing snapshot in the parent chains to use as

934

# a base for a new intermediate-snapshot

940

# a base for a new intermediate-snapshot

935

#

941

#

936

# search for snapshot in parents delta chain

942

# search for snapshot in parents delta chain

937

# map: snapshot-level: snapshot-rev

943

# map: snapshot-level: snapshot-rev

938

parents_snaps = collections.defaultdict(set)

944

parents_snaps = collections.defaultdict(set)

939

candidate_chains = [deltachain(p) for p in parents]

945

candidate_chains = [deltachain(p) for p in parents]

940

for chain in candidate_chains:

946

for chain in candidate_chains:

941

for idx, s in enumerate(chain):

947

for idx, s in enumerate(chain):

942

if not revlog.issnapshot(s):

948

if not revlog.issnapshot(s):

943

break

949

break

944

parents_snaps[idx].add(s)

950

parents_snaps[idx].add(s)

945

snapfloor = min(parents_snaps[0]) + 1

951

snapfloor = min(parents_snaps[0]) + 1

946

snapshot_cache.update(revlog, snapfloor)

952

snapshot_cache.update(revlog, snapfloor)

947

# search for the highest "unrelated" revision

953

# search for the highest "unrelated" revision

948

#

954

#

949

# Adding snapshots used by "unrelated" revision increase the odd we

955

# Adding snapshots used by "unrelated" revision increase the odd we

950

# reuse an independant, yet better snapshot chain.

956

# reuse an independant, yet better snapshot chain.

951

#

957

#

952

# XXX instead of building a set of revisions, we could lazily enumerate

958

# XXX instead of building a set of revisions, we could lazily enumerate

953

# over the chains. That would be more efficient, however we stick to

959

# over the chains. That would be more efficient, however we stick to

954

# simple code for now.

960

# simple code for now.

955

all_revs = set()

961

all_revs = set()

956

for chain in candidate_chains:

962

for chain in candidate_chains:

957

all_revs.update(chain)

963

all_revs.update(chain)

958

other = None

964

other = None

959

for r in revlog.revs(prev, snapfloor):

965

for r in revlog.revs(prev, snapfloor):

960

if r not in all_revs:

966

if r not in all_revs:

961

other = r

967

other = r

962

break

968

break

963

if other is not None:

969

if other is not None:

964

# To avoid unfair competition, we won't use unrelated intermediate

970

# To avoid unfair competition, we won't use unrelated intermediate

965

# snapshot that are deeper than the ones from the parent delta

971

# snapshot that are deeper than the ones from the parent delta

966

# chain.

972

# chain.

967

max_depth = max(parents_snaps.keys())

973

max_depth = max(parents_snaps.keys())

968

chain = deltachain(other)

974

chain = deltachain(other)

969

for depth, s in enumerate(chain):

975

for depth, s in enumerate(chain):

970

if s < snapfloor:

976

if s < snapfloor:

971

continue

977

continue

972

if max_depth < depth:

978

if max_depth < depth:

973

break

979

break

974

if not revlog.issnapshot(s):

980

if not revlog.issnapshot(s):

975

break

981

break

976

parents_snaps[depth].add(s)

982

parents_snaps[depth].add(s)

977

# Test them as possible intermediate snapshot base

983

# Test them as possible intermediate snapshot base

978

# We test them from highest to lowest level. High level one are more

984

# We test them from highest to lowest level. High level one are more

979

# likely to result in small delta

985

# likely to result in small delta

980

floor = None

986

floor = None

981

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

987

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

982

siblings = set()

988

siblings = set()

983

for s in snaps:

989

for s in snaps:

984

siblings.update(snapshot_cache.snapshots[s])

990

siblings.update(snapshot_cache.snapshots[s])

985

# Before considering making a new intermediate snapshot, we check

991

# Before considering making a new intermediate snapshot, we check

986

# if an existing snapshot, children of base we consider, would be

992

# if an existing snapshot, children of base we consider, would be

987

# suitable.

993

# suitable.

988

#

994

#

989

# It give a change to reuse a delta chain "unrelated" to the

995

# It give a change to reuse a delta chain "unrelated" to the

990

# current revision instead of starting our own. Without such

996

# current revision instead of starting our own. Without such

991

# re-use, topological branches would keep reopening new chains.

997

# re-use, topological branches would keep reopening new chains.

992

# Creating more and more snapshot as the repository grow.

998

# Creating more and more snapshot as the repository grow.

993

999

994

if floor is not None:

1000

if floor is not None:

995

# We only do this for siblings created after the one in our

1001

# We only do this for siblings created after the one in our

996

# parent's delta chain. Those created before has less chances

1002

# parent's delta chain. Those created before has less chances

997

# to be valid base since our ancestors had to create a new

1003

# to be valid base since our ancestors had to create a new

998

# snapshot.

1004

# snapshot.

999

siblings = [r for r in siblings if floor < r]

1005

siblings = [r for r in siblings if floor < r]

1000

yield tuple(sorted(siblings))

1006

yield tuple(sorted(siblings))

1001

# then test the base from our parent's delta chain.

1007

# then test the base from our parent's delta chain.

1002

yield tuple(sorted(snaps))

1008

yield tuple(sorted(snaps))

1003

floor = min(snaps)

1009

floor = min(snaps)

1004

# No suitable base found in the parent chain, search if any full

1010

# No suitable base found in the parent chain, search if any full

1005

# snapshots emitted since parent's base would be a suitable base for an

1011

# snapshots emitted since parent's base would be a suitable base for an

1006

# intermediate snapshot.

1012

# intermediate snapshot.

1007

#

1013

#

1008

# It give a chance to reuse a delta chain unrelated to the current

1014

# It give a chance to reuse a delta chain unrelated to the current

1009

# revisions instead of starting our own. Without such re-use,

1015

# revisions instead of starting our own. Without such re-use,

1010

# topological branches would keep reopening new full chains. Creating

1016

# topological branches would keep reopening new full chains. Creating

1011

# more and more snapshot as the repository grow.

1017

# more and more snapshot as the repository grow.

1012

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1018

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

1013

yield tuple(sorted(full))

1019

yield tuple(sorted(full))

1014

1020

1015

if not sparse:

1021

if not sparse:

1016

# other approach failed try against prev to hopefully save us a

1022

# other approach failed try against prev to hopefully save us a

1017

# fulltext.

1023

# fulltext.

1018

yield (prev,)

1024

yield (prev,)

1019

1025

1020

1026

1021

class SnapshotCache:

1027

class SnapshotCache:

1022

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1028

__slots__ = ('snapshots', '_start_rev', '_end_rev')

1023

1029

1024

def __init__(self):

1030

def __init__(self):

1025

self.snapshots = collections.defaultdict(set)

1031

self.snapshots = collections.defaultdict(set)

1026

self._start_rev = None

1032

self._start_rev = None

1027

self._end_rev = None

1033

self._end_rev = None

1028

1034

1029

def update(self, revlog, start_rev=0):

1035

def update(self, revlog, start_rev=0):

1030

"""find snapshots from start_rev to tip"""

1036

"""find snapshots from start_rev to tip"""

1031

nb_revs = len(revlog)

1037

nb_revs = len(revlog)

1032

end_rev = nb_revs - 1

1038

end_rev = nb_revs - 1

1033

if start_rev > end_rev:

1039

if start_rev > end_rev:

1034

return # range is empty

1040

return # range is empty

1035

1041

1036

if self._start_rev is None:

1042

if self._start_rev is None:

1037

assert self._end_rev is None

1043

assert self._end_rev is None

1038

self._update(revlog, start_rev, end_rev)

1044

self._update(revlog, start_rev, end_rev)

1039

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1045

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1040

if start_rev < self._start_rev:

1046

if start_rev < self._start_rev:

1041

self._update(revlog, start_rev, self._start_rev - 1)

1047

self._update(revlog, start_rev, self._start_rev - 1)

1042

if self._end_rev < end_rev:

1048

if self._end_rev < end_rev:

1043

self._update(revlog, self._end_rev + 1, end_rev)

1049

self._update(revlog, self._end_rev + 1, end_rev)

1044

1050

1045

if self._start_rev is None:

1051

if self._start_rev is None:

1046

assert self._end_rev is None

1052

assert self._end_rev is None

1047

self._end_rev = end_rev

1053

self._end_rev = end_rev

1048

self._start_rev = start_rev

1054

self._start_rev = start_rev

1049

else:

1055

else:

1050

self._start_rev = min(self._start_rev, start_rev)

1056

self._start_rev = min(self._start_rev, start_rev)

1051

self._end_rev = max(self._end_rev, end_rev)

1057

self._end_rev = max(self._end_rev, end_rev)

1052

assert self._start_rev <= self._end_rev, (

1058

assert self._start_rev <= self._end_rev, (

1053

self._start_rev,

1059

self._start_rev,

1054

self._end_rev,

1060

self._end_rev,

1055

)

1061

)

1056

1062

1057

def _update(self, revlog, start_rev, end_rev):

1063

def _update(self, revlog, start_rev, end_rev):

1058

"""internal method that actually do update content"""

1064

"""internal method that actually do update content"""

1059

assert self._start_rev is None or (

1065

assert self._start_rev is None or (

1060

start_rev < self._start_rev or start_rev > self._end_rev

1066

start_rev < self._start_rev or start_rev > self._end_rev

1061

), (self._start_rev, self._end_rev, start_rev, end_rev)

1067

), (self._start_rev, self._end_rev, start_rev, end_rev)

1062

assert self._start_rev is None or (

1068

assert self._start_rev is None or (

1063

end_rev < self._start_rev or end_rev > self._end_rev

1069

end_rev < self._start_rev or end_rev > self._end_rev

1064

), (self._start_rev, self._end_rev, start_rev, end_rev)

1070

), (self._start_rev, self._end_rev, start_rev, end_rev)

1065

cache = self.snapshots

1071

cache = self.snapshots

1066

if util.safehasattr(revlog.index, b'findsnapshots'):

1072

if util.safehasattr(revlog.index, b'findsnapshots'):

1067

revlog.index.findsnapshots(cache, start_rev, end_rev)

1073

revlog.index.findsnapshots(cache, start_rev, end_rev)

1068

else:

1074

else:

1069

deltaparent = revlog.deltaparent

1075

deltaparent = revlog.deltaparent

1070

issnapshot = revlog.issnapshot

1076

issnapshot = revlog.issnapshot

1071

for rev in revlog.revs(start_rev, end_rev):

1077

for rev in revlog.revs(start_rev, end_rev):

1072

if issnapshot(rev):

1078

if issnapshot(rev):

1073

cache[deltaparent(rev)].add(rev)

1079

cache[deltaparent(rev)].add(rev)

1074

1080

1075

1081

1076

class deltacomputer:

1082

class deltacomputer:

1077

def __init__(

1083

def __init__(

1078

self,

1084

self,

1079

revlog,

1085

revlog,

1080

write_debug=None,

1086

write_debug=None,

1081

debug_search=False,

1087

debug_search=False,

1082

debug_info=None,

1088

debug_info=None,

1083

):

1089

):

1084

self.revlog = revlog

1090

self.revlog = revlog

1085

self._write_debug = write_debug

1091

self._write_debug = write_debug

1086

self._debug_search = debug_search

1092

self._debug_search = debug_search

1087

self._debug_info = debug_info

1093

self._debug_info = debug_info

1088

self._snapshot_cache = SnapshotCache()

1094

self._snapshot_cache = SnapshotCache()

1089

1095

1090

def buildtext(self, revinfo, fh):

1096

def buildtext(self, revinfo, fh):

1091

"""Builds a fulltext version of a revision

1097

"""Builds a fulltext version of a revision

1092

1098

1093

revinfo: revisioninfo instance that contains all needed info

1099

revinfo: revisioninfo instance that contains all needed info

1094

fh: file handle to either the .i or the .d revlog file,

1100

fh: file handle to either the .i or the .d revlog file,

1095

depending on whether it is inlined or not

1101

depending on whether it is inlined or not

1096

"""

1102

"""

1097

btext = revinfo.btext

1103

btext = revinfo.btext

1098

if btext[0] is not None:

1104

if btext[0] is not None:

1099

return btext[0]

1105

return btext[0]

1100

1106

1101

revlog = self.revlog

1107

revlog = self.revlog

1102

cachedelta = revinfo.cachedelta

1108

cachedelta = revinfo.cachedelta

1103

baserev = cachedelta[0]

1109

baserev = cachedelta[0]

1104

delta = cachedelta[1]

1110

delta = cachedelta[1]

1105

1111

1106

fulltext = btext[0] = _textfromdelta(

1112

fulltext = btext[0] = _textfromdelta(

1107

fh,

1113

fh,

1108

revlog,

1114

revlog,

1109

baserev,

1115

baserev,

1110

delta,

1116

delta,

1111

revinfo.p1,

1117

revinfo.p1,

1112

revinfo.p2,

1118

revinfo.p2,

1113

revinfo.flags,

1119

revinfo.flags,

1114

revinfo.node,

1120

revinfo.node,

1115

)

1121

)

1116

return fulltext

1122

return fulltext

1117

1123

1118

def _builddeltadiff(self, base, revinfo, fh):

1124

def _builddeltadiff(self, base, revinfo, fh):

1119

revlog = self.revlog

1125

revlog = self.revlog

1120

t = self.buildtext(revinfo, fh)

1126

t = self.buildtext(revinfo, fh)

1121

if revlog.iscensored(base):

1127

if revlog.iscensored(base):

1122

# deltas based on a censored revision must replace the

1128

# deltas based on a censored revision must replace the

1123

# full content in one patch, so delta works everywhere

1129

# full content in one patch, so delta works everywhere

1124

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1130

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1125

delta = header + t

1131

delta = header + t

1126

else:

1132

else:

1127

ptext = revlog.rawdata(base, _df=fh)

1133

ptext = revlog.rawdata(base, _df=fh)

1128

delta = mdiff.textdiff(ptext, t)

1134

delta = mdiff.textdiff(ptext, t)

1129

1135

1130

return delta

1136

return delta

1131

1137

1132

def _builddeltainfo(self, revinfo, base, fh):

1138

def _builddeltainfo(self, revinfo, base, fh):

1133

# can we use the cached delta?

1139

# can we use the cached delta?

1134

revlog = self.revlog

1140

revlog = self.revlog

1135

debug_search = self._write_debug is not None and self._debug_search

1141

debug_search = self._write_debug is not None and self._debug_search

1136

chainbase = revlog.chainbase(base)

1142

chainbase = revlog.chainbase(base)

1137

if revlog._generaldelta:

1143

if revlog._generaldelta:

1138

deltabase = base

1144

deltabase = base

1139

else:

1145

else:

1140

deltabase = chainbase

1146

deltabase = chainbase

1141

snapshotdepth = None

1147

snapshotdepth = None

1142

if revlog._sparserevlog and deltabase == nullrev:

1148

if revlog._sparserevlog and deltabase == nullrev:

1143

snapshotdepth = 0

1149

snapshotdepth = 0

1144

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1150

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1145

# A delta chain should always be one full snapshot,

1151

# A delta chain should always be one full snapshot,

1146

# zero or more semi-snapshots, and zero or more deltas

1152

# zero or more semi-snapshots, and zero or more deltas

1147

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1153

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1148

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1154

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1149

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1155

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1150

delta = None

1156

delta = None

1151

if revinfo.cachedelta:

1157

if revinfo.cachedelta:

1152

cachebase = revinfo.cachedelta[0]

1158

cachebase = revinfo.cachedelta[0]

1153

# check if the diff still apply

1159

# check if the diff still apply

1154

currentbase = cachebase

1160

currentbase = cachebase

1155

while (

1161

while (

1156

currentbase != nullrev

1162

currentbase != nullrev

1157

and currentbase != base

1163

and currentbase != base

1158

and self.revlog.length(currentbase) == 0

1164

and self.revlog.length(currentbase) == 0

1159

):

1165

):

1160

currentbase = self.revlog.deltaparent(currentbase)

1166

currentbase = self.revlog.deltaparent(currentbase)

1161

if self.revlog._lazydelta and currentbase == base:

1167

if self.revlog._lazydelta and currentbase == base:

1162

delta = revinfo.cachedelta[1]

1168

delta = revinfo.cachedelta[1]

1163

if delta is None:

1169

if delta is None:

1164

delta = self._builddeltadiff(base, revinfo, fh)

1170

delta = self._builddeltadiff(base, revinfo, fh)

1165

if debug_search:

1171

if debug_search:

1166

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1172

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1167

msg %= len(delta)

1173

msg %= len(delta)

1168

self._write_debug(msg)

1174

self._write_debug(msg)

1169

# snapshotdept need to be neither None nor 0 level snapshot

1175

# snapshotdept need to be neither None nor 0 level snapshot

1170

if revlog.upperboundcomp is not None and snapshotdepth:

1176

if revlog.upperboundcomp is not None and snapshotdepth:

1171

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1177

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1172

snapshotlimit = revinfo.textlen >> snapshotdepth

1178

snapshotlimit = revinfo.textlen >> snapshotdepth

1173

if debug_search:

1179

if debug_search:

1174

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1180

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1175

msg %= lowestrealisticdeltalen

1181

msg %= lowestrealisticdeltalen

1176

self._write_debug(msg)

1182

self._write_debug(msg)

1177

if snapshotlimit < lowestrealisticdeltalen:

1183

if snapshotlimit < lowestrealisticdeltalen:

1178

if debug_search:

1184

if debug_search:

1179

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1185

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1180

self._write_debug(msg)

1186

self._write_debug(msg)

1181

return None

1187

return None

1182

if revlog.length(base) < lowestrealisticdeltalen:

1188

if revlog.length(base) < lowestrealisticdeltalen:

1183

if debug_search:

1189

if debug_search:

1184

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1190

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1185

self._write_debug(msg)

1191

self._write_debug(msg)

1186

return None

1192

return None

1187

header, data = revlog.compress(delta)

1193

header, data = revlog.compress(delta)

1188

deltalen = len(header) + len(data)

1194

deltalen = len(header) + len(data)

1189

offset = revlog.end(len(revlog) - 1)

1195

offset = revlog.end(len(revlog) - 1)

1190

dist = deltalen + offset - revlog.start(chainbase)

1196

dist = deltalen + offset - revlog.start(chainbase)

1191

chainlen, compresseddeltalen = revlog._chaininfo(base)

1197

chainlen, compresseddeltalen = revlog._chaininfo(base)

1192

chainlen += 1

1198

chainlen += 1

1193

compresseddeltalen += deltalen

1199

compresseddeltalen += deltalen

1194

1200

1195

return _deltainfo(

1201

return _deltainfo(

1196

dist,

1202

dist,

1197

deltalen,

1203

deltalen,

1198

(header, data),

1204

(header, data),

1199

deltabase,

1205

deltabase,

1200

chainbase,

1206

chainbase,

1201

chainlen,

1207

chainlen,

1202

compresseddeltalen,

1208

compresseddeltalen,

1203

snapshotdepth,

1209

snapshotdepth,

1204

)

1210

)

1205

1211

1206

def _fullsnapshotinfo(self, fh, revinfo, curr):

1212

def _fullsnapshotinfo(self, fh, revinfo, curr):

1207

rawtext = self.buildtext(revinfo, fh)

1213

rawtext = self.buildtext(revinfo, fh)

1208

data = self.revlog.compress(rawtext)

1214

data = self.revlog.compress(rawtext)

1209

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1215

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1210

deltabase = chainbase = curr

1216

deltabase = chainbase = curr

1211

snapshotdepth = 0

1217

snapshotdepth = 0

1212

chainlen = 1

1218

chainlen = 1

1213

1219

1214

return _deltainfo(

1220

return _deltainfo(

1215

dist,

1221

dist,

1216

deltalen,

1222

deltalen,

1217

data,

1223

data,

1218

deltabase,

1224

deltabase,

1219

chainbase,

1225

chainbase,

1220

chainlen,

1226

chainlen,

1221

compresseddeltalen,

1227

compresseddeltalen,

1222

snapshotdepth,

1228

snapshotdepth,

1223

)

1229

)

1224

1230

1225

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1231

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1226

"""Find an acceptable delta against a candidate revision

1232

"""Find an acceptable delta against a candidate revision

1227

1233

1228

revinfo: information about the revision (instance of _revisioninfo)

1234

revinfo: information about the revision (instance of _revisioninfo)

1229

fh: file handle to either the .i or the .d revlog file,

1235

fh: file handle to either the .i or the .d revlog file,

1230

depending on whether it is inlined or not

1236

depending on whether it is inlined or not

1231

1237

1232

Returns the first acceptable candidate revision, as ordered by

1238

Returns the first acceptable candidate revision, as ordered by

1233

_candidategroups

1239

_candidategroups

1234

1240

1235

If no suitable deltabase is found, we return delta info for a full

1241

If no suitable deltabase is found, we return delta info for a full

1236

snapshot.

1242

snapshot.

1237

1243

1238

`excluded_bases` is an optional set of revision that cannot be used as

1244

`excluded_bases` is an optional set of revision that cannot be used as

1239

a delta base. Use this to recompute delta suitable in censor or strip

1245

a delta base. Use this to recompute delta suitable in censor or strip

1240

context.

1246

context.

1241

"""

1247

"""

1242

if target_rev is None:

1248

if target_rev is None:

1243

target_rev = len(self.revlog)

1249

target_rev = len(self.revlog)

1244

1250

1245

if not revinfo.textlen:

1251

if not revinfo.textlen:

1246

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1252

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1247

1253

1248

if excluded_bases is None:

1254

if excluded_bases is None:

1249

excluded_bases = set()

1255

excluded_bases = set()

1250

1256

1251

# no delta for flag processor revision (see "candelta" for why)

1257

# no delta for flag processor revision (see "candelta" for why)

1252

# not calling candelta since only one revision needs test, also to

1258

# not calling candelta since only one revision needs test, also to

1253

# avoid overhead fetching flags again.

1259

# avoid overhead fetching flags again.

1254

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1260

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1255

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1261

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1256

1262

1257

gather_debug = (

1263

gather_debug = (

1258

self._write_debug is not None or self._debug_info is not None

1264

self._write_debug is not None or self._debug_info is not None

1259

)

1265

)

1260

debug_search = self._write_debug is not None and self._debug_search

1266

debug_search = self._write_debug is not None and self._debug_search

1261

1267

1262

if gather_debug:

1268

if gather_debug:

1263

start = util.timer()

1269

start = util.timer()

1264

1270

1265

# count the number of different delta we tried (for debug purpose)

1271

# count the number of different delta we tried (for debug purpose)

1266

dbg_try_count = 0

1272

dbg_try_count = 0

1267

# count the number of "search round" we did. (for debug purpose)

1273

# count the number of "search round" we did. (for debug purpose)

1268

dbg_try_rounds = 0

1274

dbg_try_rounds = 0

1269

dbg_type = b'unknown'

1275

dbg_type = b'unknown'

1270

1276

1271

cachedelta = revinfo.cachedelta

1277

cachedelta = revinfo.cachedelta

1272

p1 = revinfo.p1

1278

p1 = revinfo.p1

1273

p2 = revinfo.p2

1279

p2 = revinfo.p2

1274

revlog = self.revlog

1280

revlog = self.revlog

1275

1281

1276

deltainfo = None

1282

deltainfo = None

1277

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1283

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1278

1284

1279

if gather_debug:

1285

if gather_debug:

1280

if p1r != nullrev:

1286

if p1r != nullrev:

1281

p1_chain_len = revlog._chaininfo(p1r)[0]

1287

p1_chain_len = revlog._chaininfo(p1r)[0]

1282

else:

1288

else:

1283

p1_chain_len = -1

1289

p1_chain_len = -1

1284

if p2r != nullrev:

1290

if p2r != nullrev:

1285

p2_chain_len = revlog._chaininfo(p2r)[0]

1291

p2_chain_len = revlog._chaininfo(p2r)[0]

1286

else:

1292

else:

1287

p2_chain_len = -1

1293

p2_chain_len = -1

1288

if debug_search:

1294

if debug_search:

1289

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1295

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1290

msg %= target_rev

1296

msg %= target_rev

1291

self._write_debug(msg)

1297

self._write_debug(msg)

1292

1298

1293

groups = _candidategroups(

1299

groups = _candidategroups(

1294

self.revlog,

1300

self.revlog,

1295

revinfo.textlen,

1301

revinfo.textlen,

1296

p1r,

1302

p1r,

1297

p2r,

1303

p2r,

1298

cachedelta,

1304

cachedelta,

1299

excluded_bases,

1305

excluded_bases,

1300

target_rev,

1306

target_rev,

1301

snapshot_cache=self._snapshot_cache,

1307

snapshot_cache=self._snapshot_cache,

1302

)

1308

)

1303

candidaterevs = next(groups)

1309

candidaterevs = next(groups)

1304

while candidaterevs is not None:

1310

while candidaterevs is not None:

1305

dbg_try_rounds += 1

1311

dbg_try_rounds += 1

1306

if debug_search:

1312

if debug_search:

1307

prev = None

1313

prev = None

1308

if deltainfo is not None:

1314

if deltainfo is not None:

1309

prev = deltainfo.base

1315

prev = deltainfo.base

1310

1316

1311

if (

1317

if (

1312

cachedelta is not None

1318

cachedelta is not None

1313

and len(candidaterevs) == 1

1319

and len(candidaterevs) == 1

1314

and cachedelta[0] in candidaterevs

1320

and cachedelta[0] in candidaterevs

1315

):

1321

):

1316

round_type = b"cached-delta"

1322

round_type = b"cached-delta"

1317

elif p1 in candidaterevs or p2 in candidaterevs:

1323

elif p1 in candidaterevs or p2 in candidaterevs:

1318

round_type = b"parents"

1324

round_type = b"parents"

1319

elif prev is not None and all(c < prev for c in candidaterevs):

1325

elif prev is not None and all(c < prev for c in candidaterevs):

1320

round_type = b"refine-down"

1326

round_type = b"refine-down"

1321

elif prev is not None and all(c > prev for c in candidaterevs):

1327

elif prev is not None and all(c > prev for c in candidaterevs):

1322

round_type = b"refine-up"

1328

round_type = b"refine-up"

1323

else:

1329

else:

1324

round_type = b"search-down"

1330

round_type = b"search-down"

1325

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1331

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1326

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1332

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1327

self._write_debug(msg)

1333

self._write_debug(msg)

1328

nominateddeltas = []

1334

nominateddeltas = []

1329

if deltainfo is not None:

1335

if deltainfo is not None:

1330

if debug_search:

1336

if debug_search:

1331

msg = (

1337

msg = (

1332

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1338

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1333

)

1339

)

1334

msg %= (deltainfo.base, deltainfo.deltalen)

1340

msg %= (deltainfo.base, deltainfo.deltalen)

1335

self._write_debug(msg)

1341

self._write_debug(msg)

1336

# if we already found a good delta,

1342

# if we already found a good delta,

1337

# challenge it against refined candidates

1343

# challenge it against refined candidates

1338

nominateddeltas.append(deltainfo)

1344

nominateddeltas.append(deltainfo)

1339

for candidaterev in candidaterevs:

1345

for candidaterev in candidaterevs:

1340

if debug_search:

1346

if debug_search:

1341

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1347

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1342

msg %= candidaterev

1348

msg %= candidaterev

1343

self._write_debug(msg)

1349

self._write_debug(msg)

1344

candidate_type = None

1350

candidate_type = None

1345

if candidaterev == p1:

1351

if candidaterev == p1:

1346

candidate_type = b"p1"

1352

candidate_type = b"p1"

1347

elif candidaterev == p2:

1353

elif candidaterev == p2:

1348

candidate_type = b"p2"

1354

candidate_type = b"p2"

1349

elif self.revlog.issnapshot(candidaterev):

1355

elif self.revlog.issnapshot(candidaterev):

1350

candidate_type = b"snapshot-%d"

1356

candidate_type = b"snapshot-%d"

1351

candidate_type %= self.revlog.snapshotdepth(

1357

candidate_type %= self.revlog.snapshotdepth(

1352

candidaterev

1358

candidaterev

1353

)

1359

)

1354

1360

1355

if candidate_type is not None:

1361

if candidate_type is not None:

1356

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1362

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1357

msg %= candidate_type

1363

msg %= candidate_type

1358

self._write_debug(msg)

1364

self._write_debug(msg)

1359

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1365

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1360

msg %= self.revlog.length(candidaterev)

1366

msg %= self.revlog.length(candidaterev)

1361

self._write_debug(msg)

1367

self._write_debug(msg)

1362

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1368

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1363

msg %= self.revlog.deltaparent(candidaterev)

1369

msg %= self.revlog.deltaparent(candidaterev)

1364

self._write_debug(msg)

1370

self._write_debug(msg)

1365

1371

1366

dbg_try_count += 1

1372

dbg_try_count += 1

1367

1373

1368

if debug_search:

1374

if debug_search:

1369

delta_start = util.timer()

1375

delta_start = util.timer()

1370

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1376

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1371

if debug_search:

1377

if debug_search:

1372

delta_end = util.timer()

1378

delta_end = util.timer()

1373

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1379

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1374

msg %= delta_end - delta_start

1380

msg %= delta_end - delta_start

1375

self._write_debug(msg)

1381

self._write_debug(msg)

1376

if candidatedelta is not None:

1382

if candidatedelta is not None:

1377

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1383

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1378

if debug_search:

1384

if debug_search:

1379

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1385

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1380

msg %= candidatedelta.deltalen

1386

msg %= candidatedelta.deltalen

1381

self._write_debug(msg)

1387

self._write_debug(msg)

1382

nominateddeltas.append(candidatedelta)

1388

nominateddeltas.append(candidatedelta)

1383

elif debug_search:

1389

elif debug_search:

1384

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1390

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1385

msg %= candidatedelta.deltalen

1391

msg %= candidatedelta.deltalen

1386

self._write_debug(msg)

1392

self._write_debug(msg)

1387

elif debug_search:

1393

elif debug_search:

1388

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1394

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1389

self._write_debug(msg)

1395

self._write_debug(msg)

1390

if nominateddeltas:

1396

if nominateddeltas:

1391

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1397

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1392

if deltainfo is not None:

1398

if deltainfo is not None:

1393

candidaterevs = groups.send(deltainfo.base)

1399

candidaterevs = groups.send(deltainfo.base)

1394

else:

1400

else:

1395

candidaterevs = next(groups)

1401

candidaterevs = next(groups)

1396

1402

1397

if deltainfo is None:

1403

if deltainfo is None:

1398

dbg_type = b"full"

1404

dbg_type = b"full"

1399

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1405

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1400

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1406

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1401

dbg_type = b"snapshot"

1407

dbg_type = b"snapshot"

1402

else:

1408

else:

1403

dbg_type = b"delta"

1409

dbg_type = b"delta"

1404

1410

1405

if gather_debug:

1411

if gather_debug:

1406

end = util.timer()

1412

end = util.timer()

1407

if dbg_type == b'full':

1413

if dbg_type == b'full':

1408

used_cached = (

1414

used_cached = (

1409

cachedelta is not None

1415

cachedelta is not None

1410

and dbg_try_rounds == 0

1416

and dbg_try_rounds == 0

1411

and dbg_try_count == 0

1417

and dbg_try_count == 0

1412

and cachedelta[0] == nullrev

1418

and cachedelta[0] == nullrev

1413

)

1419

)

1414

else:

1420

else:

1415

used_cached = (

1421

used_cached = (

1416

cachedelta is not None

1422

cachedelta is not None

1417

and dbg_try_rounds == 1

1423

and dbg_try_rounds == 1

1418

and dbg_try_count == 1

1424

and dbg_try_count == 1

1419

and deltainfo.base == cachedelta[0]

1425

and deltainfo.base == cachedelta[0]

1420

)

1426

)

1421

dbg = {

1427

dbg = {

1422

'duration': end - start,

1428

'duration': end - start,

1423

'revision': target_rev,

1429

'revision': target_rev,

1424

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1430

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1425

'search_round_count': dbg_try_rounds,

1431

'search_round_count': dbg_try_rounds,

1426

'using-cached-base': used_cached,

1432

'using-cached-base': used_cached,

1427

'delta_try_count': dbg_try_count,

1433

'delta_try_count': dbg_try_count,

1428

'type': dbg_type,

1434

'type': dbg_type,

1429

'p1-chain-len': p1_chain_len,

1435

'p1-chain-len': p1_chain_len,

1430

'p2-chain-len': p2_chain_len,

1436

'p2-chain-len': p2_chain_len,

1431

}

1437

}

1432

if (

1438

if (

1433

deltainfo.snapshotdepth # pytype: disable=attribute-error

1439

deltainfo.snapshotdepth # pytype: disable=attribute-error

1434

is not None

1440

is not None

1435

):

1441

):

1436

dbg[

1442

dbg[

1437

'snapshot-depth'

1443

'snapshot-depth'

1438

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1444

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1439

else:

1445

else:

1440

dbg['snapshot-depth'] = 0

1446

dbg['snapshot-depth'] = 0

1441

target_revlog = b"UNKNOWN"

1447

target_revlog = b"UNKNOWN"

1442

target_type = self.revlog.target[0]

1448

target_type = self.revlog.target[0]

1443

target_key = self.revlog.target[1]

1449

target_key = self.revlog.target[1]

1444

if target_type == KIND_CHANGELOG:

1450

if target_type == KIND_CHANGELOG:

1445

target_revlog = b'CHANGELOG:'

1451

target_revlog = b'CHANGELOG:'

1446

elif target_type == KIND_MANIFESTLOG:

1452

elif target_type == KIND_MANIFESTLOG:

1447

target_revlog = b'MANIFESTLOG:'

1453

target_revlog = b'MANIFESTLOG:'

1448

if target_key:

1454

if target_key:

1449

target_revlog += b'%s:' % target_key

1455

target_revlog += b'%s:' % target_key

1450

elif target_type == KIND_FILELOG:

1456

elif target_type == KIND_FILELOG:

1451

target_revlog = b'FILELOG:'

1457

target_revlog = b'FILELOG:'

1452

if target_key:

1458

if target_key:

1453

target_revlog += b'%s:' % target_key

1459

target_revlog += b'%s:' % target_key

1454

dbg['target-revlog'] = target_revlog

1460

dbg['target-revlog'] = target_revlog

1455

1461

1456

if self._debug_info is not None:

1462

if self._debug_info is not None:

1457

self._debug_info.append(dbg)

1463

self._debug_info.append(dbg)

1458

1464

1459

if self._write_debug is not None:

1465

if self._write_debug is not None:

1460

msg = (

1466

msg = (

1461

b"DBG-DELTAS:"

1467

b"DBG-DELTAS:"

1462

b" %-12s"

1468

b" %-12s"

1463

b" rev=%d:"

1469

b" rev=%d:"

1464

b" delta-base=%d"

1470

b" delta-base=%d"

1465

b" is-cached=%d"

1471

b" is-cached=%d"

1466

b" - search-rounds=%d"

1472

b" - search-rounds=%d"

1467

b" try-count=%d"

1473

b" try-count=%d"

1468

b" - delta-type=%-6s"

1474

b" - delta-type=%-6s"

1469

b" snap-depth=%d"

1475

b" snap-depth=%d"

1470

b" - p1-chain-length=%d"

1476

b" - p1-chain-length=%d"

1471

b" p2-chain-length=%d"

1477

b" p2-chain-length=%d"

1472

b" - duration=%f"

1478

b" - duration=%f"

1473

b"\n"

1479

b"\n"

1474

)

1480

)

1475

msg %= (

1481

msg %= (

1476

dbg["target-revlog"],

1482

dbg["target-revlog"],

1477

dbg["revision"],

1483

dbg["revision"],

1478

dbg["delta-base"],

1484

dbg["delta-base"],

1479

dbg["using-cached-base"],

1485

dbg["using-cached-base"],

1480

dbg["search_round_count"],

1486

dbg["search_round_count"],

1481

dbg["delta_try_count"],

1487

dbg["delta_try_count"],

1482

dbg["type"],

1488

dbg["type"],

1483

dbg["snapshot-depth"],

1489

dbg["snapshot-depth"],

1484

dbg["p1-chain-len"],

1490

dbg["p1-chain-len"],

1485

dbg["p2-chain-len"],

1491

dbg["p2-chain-len"],

1486

dbg["duration"],

1492

dbg["duration"],

1487

)

1493

)

1488

self._write_debug(msg)

1494

self._write_debug(msg)

1489

return deltainfo

1495

return deltainfo

1490

1496

1491

1497

1492

def delta_compression(default_compression_header, deltainfo):

1498

def delta_compression(default_compression_header, deltainfo):

1493

"""return (COMPRESSION_MODE, deltainfo)

1499

"""return (COMPRESSION_MODE, deltainfo)

1494

1500

1495

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1501

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1496

compression.

1502

compression.

1497

"""

1503

"""

1498

h, d = deltainfo.data

1504

h, d = deltainfo.data

1499

compression_mode = COMP_MODE_INLINE

1505

compression_mode = COMP_MODE_INLINE

1500

if not h and not d:

1506

if not h and not d:

1501

# not data to store at all... declare them uncompressed

1507

# not data to store at all... declare them uncompressed

1502

compression_mode = COMP_MODE_PLAIN

1508

compression_mode = COMP_MODE_PLAIN

1503

elif not h:

1509

elif not h:

1504

t = d[0:1]

1510

t = d[0:1]

1505

if t == b'\0':

1511

if t == b'\0':

1506

compression_mode = COMP_MODE_PLAIN

1512

compression_mode = COMP_MODE_PLAIN

1507

elif t == default_compression_header:

1513

elif t == default_compression_header:

1508

compression_mode = COMP_MODE_DEFAULT

1514

compression_mode = COMP_MODE_DEFAULT

1509

elif h == b'u':

1515

elif h == b'u':

1510

# we have a more efficient way to declare uncompressed

1516

# we have a more efficient way to declare uncompressed

1511

h = b''

1517

h = b''

1512

compression_mode = COMP_MODE_PLAIN

1518

compression_mode = COMP_MODE_PLAIN

1513

deltainfo = drop_u_compression(deltainfo)

1519

deltainfo = drop_u_compression(deltainfo)

1514

return compression_mode, deltainfo

1520

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -0,0 +1,196 b''
	1	============================================================================
	2	Pulling from modern to a non-general delta target (and other related checks)
	3	============================================================================
	4
	5	There is various issue that can arise when we update the code with modern
	6	storage in mind while working on delta processing. So this file is meant for
	7	various scenario that might break in the future or have break in the past.
	8
	9	Setup
	10	=====
	11
	12	Create a modern server with an older clone
	13
	14	$ cat << EOF >> $HGRCPATH
	15	> [command-templates]
	16	> log = "{desc} {tags}\n"
	17	> EOF
	18
	19	$ hg init server
	20
	21	$ hg clone --quiet --pull server client --config format.usegeneraldelta=no
	22	$ hg debugformat -R client \| grep generaldelta
	23	generaldelta: no
	24
	25	Create some complexe history
	26
	27	$ cd server
	28	$ hg debugbuilddag -n '.+3:a$.+5:b/a:k$.+7:c/b:l$.+6:d/a:m<k+6/l+1/m'
	29	$ hg log -G
	30	o r36 tip
	31	\|\
	32	\| o r35
	33	\| \|
	34	\| o r34
	35	\| \|\
	36	\| \| o r33
	37	\| \| \|
	38	\| \| o r32
	39	\| \| \|
	40	\| \| o r31
	41	\| \| \|
	42	\| \| o r30
	43	\| \| \|
	44	\| \| o r29
	45	\| \| \|
	46	\| \| o r28
	47	\| \| \|
	48	o \| \| r27 m
	49	\|\ \ \
	50	\| o \| \| r26 d
	51	\| \| \| \|
	52	\| o \| \| r25
	53	\| \| \| \|
	54	\| o \| \| r24
	55	\| \| \| \|
	56	\| o \| \| r23
	57	\| \| \| \|
	58	\| o \| \| r22
	59	\| \| \| \|
	60	\| o \| \| r21
	61	\| \| \| \|
	62	\| o \| \| r20
	63	\| / /
	64	\| o \| r19 l
	65	\| \|\ \
	66	\| \| o \| r18 c
	67	\| \| \| \|
	68	\| \| o \| r17
	69	\| \| \| \|
	70	\| \| o \| r16
	71	\| \| \| \|
	72	\| \| o \| r15
	73	\| \| \| \|
	74	\| \| o \| r14
	75	\| \| \| \|
	76	\| \| o \| r13
	77	\| \| \| \|
	78	\| \| o \| r12
	79	\| \| \| \|
	80	\| \| o \| r11
	81	\| \| /
	82	+---o r10 k
	83	\| \|/
	84	\| o r9 b
	85	\| \|
	86	\| o r8
	87	\| \|
	88	\| o r7
	89	\| \|
	90	\| o r6
	91	\| \|
	92	\| o r5
	93	\| \|
	94	\| o r4
	95	\|
	96	o r3 a
	97	\|
	98	o r2
	99	\|
	100	o r1
	101	\|
	102	o r0
	103
	104	$ cd ..
	105
	106
	107	Pull it in the client
	108	=====================
	109
	110
	111	pull with default value
	112	-----------------------
	113
	114	$ cp -R client client-simple-pull
	115	$ hg -R client-simple-pull pull
	116	pulling from $TESTTMP/server
	117	requesting all changes
	118	adding changesets
	119	adding manifests
	120	adding file changes
	121	added 37 changesets with 37 changes to 37 files
	122	new changesets 61246295ee1e:b4b117cbbcf3
	123	(run 'hg update' to get a working copy)
	124	$ hg -R client-simple-pull verify
	125	checking changesets
	126	checking manifests
	127	crosschecking files in changesets and manifests
	128	checking files
	129	checking dirstate
	130	checked 37 changesets with 37 changes to 37 files
	131
	132
	133	pull with "no-reuse" policy
	134	---------------------------
	135
	136	$ cp -R client client-no-reuse
	137	$ hg -R client-no-reuse pull --config paths.default:pulled-delta-reuse-policy=no-reuse
	138	pulling from $TESTTMP/server
	139	requesting all changes
	140	adding changesets
	141	adding manifests
	142	adding file changes
	143	added 37 changesets with 37 changes to 37 files
	144	new changesets 61246295ee1e:b4b117cbbcf3
	145	(run 'hg update' to get a working copy)
	146	$ hg -R client-no-reuse verify
	147	checking changesets
	148	checking manifests
	149	crosschecking files in changesets and manifests
	150	checking files
	151	checking dirstate
	152	checked 37 changesets with 37 changes to 37 files
	153
	154
	155	pull with "try-base" policy
	156	---------------------------
	157
	158	$ cp -R client client-try-base
	159	$ hg -R client-try-base pull --config paths.default:pulled-delta-reuse-policy=try-base
	160	pulling from $TESTTMP/server
	161	requesting all changes
	162	adding changesets
	163	adding manifests
	164	adding file changes
	165	added 37 changesets with 37 changes to 37 files
	166	new changesets 61246295ee1e:b4b117cbbcf3
	167	(run 'hg update' to get a working copy)
	168	$ hg -R client-try-base verify
	169	checking changesets
	170	checking manifests
	171	crosschecking files in changesets and manifests
	172	checking files
	173	checking dirstate
	174	checked 37 changesets with 37 changes to 37 files
	175
	176
	177	pull with "forced" policy
	178	-------------------------
	179
	180	$ cp -R client client-forced
	181	$ hg -R client-forced pull --config paths.default:pulled-delta-reuse-policy=forced
	182	pulling from $TESTTMP/server
	183	requesting all changes
	184	adding changesets
	185	adding manifests
	186	adding file changes
	187	added 37 changesets with 37 changes to 37 files
	188	new changesets 61246295ee1e:b4b117cbbcf3
	189	(run 'hg update' to get a working copy)
	190	$ hg -R client-forced verify
	191	checking changesets
	192	checking manifests
	193	crosschecking files in changesets and manifests
	194	checking files
	195	checking dirstate
	196	checked 37 changesets with 37 changes to 37 files

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_FORCE,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 if (
                     revinfo.cachedelta is not None
                     and deltainfo.base == revinfo.cachedelta[0]
                     and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
                 ):
                     return True
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
                 snapshot_cache=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 if target_rev is None:
                     target_rev = len(revlog)
+                if not revlog._generaldelta:
+                    # before general delta, there is only one possible delta base
+                    yield (target_rev - 1,)
+                    yield None
+                    return
                 if (
                     cachedelta is not None
                     and nullrev == cachedelta[0]
                     and cachedelta[2] == DELTA_BASE_REUSE_FORCE
                 ):
                     # instruction are to forcibly do a full snapshot
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 group_chunk_size = revlog._candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache=snapshot_cache,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         if (
                             cachedelta is not None
                             and rev == cachedelta[0]
                             and cachedelta[2] == DELTA_BASE_REUSE_FORCE
                         ):
                             # instructions are to forcibly consider/use this delta base
                             group.append(rev)
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
             def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 if snapshot_cache is None:
                     snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
                     yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if util.safehasattr(revlog.index, b'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                     self._snapshot_cache = SnapshotCache()
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                         snapshot_cache=self._snapshot_cache,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         if dbg_type == b'full':
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 0
                                 and dbg_try_count == 0
                                 and cachedelta[0] == nullrev
                             )
                         else:
                             used_cached = (
                                 cachedelta is not None
                                 and dbg_try_rounds == 1
                                 and dbg_try_count == 1
                                 and deltainfo.base == cachedelta[0]
                             )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo